bugfix skip test split when empty

2023-03-28 14:40:23 +03:00
parent 8903ba5d89
commit 026b6a39a9
5 changed files with 28 additions and 15 deletions
--- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py
+++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py
@@ -97,7 +97,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
        """

        target_column_name = dk.label_list[0]
-        for split in ["train", "test"]:
+        for split in self.splits:
            label_df = data_dictionary[f"{split}_labels"]
            self.assert_valid_class_names(label_df[target_column_name], class_names)
            label_df[target_column_name] = list(
--- a/freqtrade/freqai/base_models/BasePyTorchModel.py
+++ b/freqtrade/freqai/base_models/BasePyTorchModel.py
@@ -22,6 +22,8 @@ class BasePyTorchModel(IFreqaiModel):
        super().__init__(config=kwargs["config"])
        self.dd.model_type = "pytorch"
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
+        self.splits = ["train", "test"] if test_size != 0 else ["train"]

    def train(
        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
--- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
@@ -76,5 +76,5 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
            squeeze_target_tensor=True,
            **self.trainer_kwargs,
        )
-        trainer.fit(data_dictionary)
+        trainer.fit(data_dictionary, self.splits)
        return trainer
--- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
+++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
@@ -72,5 +72,5 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
            target_tensor_type=torch.float,
            **self.trainer_kwargs,
        )
-        trainer.fit(data_dictionary)
+        trainer.fit(data_dictionary, self.splits)
        return trainer
--- a/freqtrade/freqai/torch/PyTorchModelTrainer.py
+++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py
@@ -1,7 +1,7 @@
 import logging
 import math
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional

 import pandas as pd
 import torch
@@ -43,7 +43,6 @@ class PyTorchModelTrainer:
            self.optimizer.step(). used to calculate n_epochs.
        :param batch_size: The size of the batches to use during training.
        :param max_n_eval_batches: The maximum number batches to use for evaluation.
-
        """
        self.model = model
        self.optimizer = optimizer
@@ -58,21 +57,27 @@ class PyTorchModelTrainer:
        if init_model:
            self.load_from_checkpoint(init_model)

-    def fit(self, data_dictionary: Dict[str, pd.DataFrame]):
+    def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]):
        """
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        :param splits: splits to use in training, splits must contain "train",
+        optional "test" could be added by setting freqai.data_split_parameters.test_size > 0
+        in the config file.
+
         - Calculates the predicted output for the batch using the PyTorch model.
         - Calculates the loss between the predicted and actual output using a loss function.
         - Computes the gradients of the loss with respect to the model's parameters using
           backpropagation.
         - Updates the model's parameters using an optimizer.
        """
-        data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary)
+        data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits)
        epochs = self.calc_n_epochs(
            n_obs=len(data_dictionary["train_features"]),
            batch_size=self.batch_size,
            n_iters=self.max_iters
        )
-        for epoch in range(epochs):
+        for epoch in range(1, epochs+1):
            # training
            losses = []
            for i, batch_data in enumerate(data_loaders_dictionary["train"]):
@@ -87,13 +92,18 @@ class PyTorchModelTrainer:
                self.optimizer.step()
                losses.append(loss.item())
            train_loss = sum(losses) / len(losses)
+            log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}"

            # evaluation
-            test_loss = self.estimate_loss(data_loaders_dictionary, self.max_n_eval_batches, "test")
-            logger.info(
-                f"epoch {epoch}/{epochs}:"
-                f" train loss {train_loss:.4f} ; test loss {test_loss:.4f}"
-            )
+            if "test" in splits:
+                test_loss = self.estimate_loss(
+                    data_loaders_dictionary,
+                    self.max_n_eval_batches,
+                    "test"
+                )
+                log_message += f" ; test loss {test_loss:.4f}"
+
+            logger.info(log_message)

    @torch.no_grad()
    def estimate_loss(
@@ -122,13 +132,14 @@ class PyTorchModelTrainer:

    def create_data_loaders_dictionary(
            self,
-            data_dictionary: Dict[str, pd.DataFrame]
+            data_dictionary: Dict[str, pd.DataFrame],
+            splits: List[str]
    ) -> Dict[str, DataLoader]:
        """
        Converts the input data to PyTorch tensors using a data loader.
        """
        data_loader_dictionary = {}
-        for split in ["train", "test"]:
+        for split in splits:
            x = torch.from_numpy(data_dictionary[f"{split}_features"].values).float()
            y = torch.from_numpy(data_dictionary[f"{split}_labels"].values)\
                .to(self.target_tensor_type)