diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py index c08142876..7795b37ce 100644 --- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py +++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py @@ -97,7 +97,7 @@ class BasePyTorchClassifier(BasePyTorchModel): """ target_column_name = dk.label_list[0] - for split in ["train", "test"]: + for split in self.splits: label_df = data_dictionary[f"{split}_labels"] self.assert_valid_class_names(label_df[target_column_name], class_names) label_df[target_column_name] = list( diff --git a/freqtrade/freqai/base_models/BasePyTorchModel.py b/freqtrade/freqai/base_models/BasePyTorchModel.py index d6372fa36..189f7d906 100644 --- a/freqtrade/freqai/base_models/BasePyTorchModel.py +++ b/freqtrade/freqai/base_models/BasePyTorchModel.py @@ -22,6 +22,8 @@ class BasePyTorchModel(IFreqaiModel): super().__init__(config=kwargs["config"]) self.dd.model_type = "pytorch" self.device = "cuda" if torch.cuda.is_available() else "cpu" + test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size') + self.splits = ["train", "test"] if test_size != 0 else ["train"] def train( self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py index 20c0b0c65..389aa6155 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py @@ -76,5 +76,5 @@ class PyTorchMLPClassifier(BasePyTorchClassifier): squeeze_target_tensor=True, **self.trainer_kwargs, ) - trainer.fit(data_dictionary) + trainer.fit(data_dictionary, self.splits) return trainer diff --git a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py index df149ffbf..ca6a13f6e 100644 --- a/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py +++ b/freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py @@ -72,5 +72,5 @@ class PyTorchMLPRegressor(BasePyTorchRegressor): target_tensor_type=torch.float, **self.trainer_kwargs, ) - trainer.fit(data_dictionary) + trainer.fit(data_dictionary, self.splits) return trainer diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 2ef4b57c9..609e19eda 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -1,7 +1,7 @@ import logging import math from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import pandas as pd import torch @@ -43,7 +43,6 @@ class PyTorchModelTrainer: self.optimizer.step(). used to calculate n_epochs. :param batch_size: The size of the batches to use during training. :param max_n_eval_batches: The maximum number batches to use for evaluation. - """ self.model = model self.optimizer = optimizer @@ -58,21 +57,27 @@ class PyTorchModelTrainer: if init_model: self.load_from_checkpoint(init_model) - def fit(self, data_dictionary: Dict[str, pd.DataFrame]): + def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]): """ + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + :param splits: splits to use in training, splits must contain "train", + optional "test" could be added by setting freqai.data_split_parameters.test_size > 0 + in the config file. + - Calculates the predicted output for the batch using the PyTorch model. - Calculates the loss between the predicted and actual output using a loss function. - Computes the gradients of the loss with respect to the model's parameters using backpropagation. - Updates the model's parameters using an optimizer. """ - data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary) + data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) epochs = self.calc_n_epochs( n_obs=len(data_dictionary["train_features"]), batch_size=self.batch_size, n_iters=self.max_iters ) - for epoch in range(epochs): + for epoch in range(1, epochs+1): # training losses = [] for i, batch_data in enumerate(data_loaders_dictionary["train"]): @@ -87,13 +92,18 @@ class PyTorchModelTrainer: self.optimizer.step() losses.append(loss.item()) train_loss = sum(losses) / len(losses) + log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}" # evaluation - test_loss = self.estimate_loss(data_loaders_dictionary, self.max_n_eval_batches, "test") - logger.info( - f"epoch {epoch}/{epochs}:" - f" train loss {train_loss:.4f} ; test loss {test_loss:.4f}" - ) + if "test" in splits: + test_loss = self.estimate_loss( + data_loaders_dictionary, + self.max_n_eval_batches, + "test" + ) + log_message += f" ; test loss {test_loss:.4f}" + + logger.info(log_message) @torch.no_grad() def estimate_loss( @@ -122,13 +132,14 @@ class PyTorchModelTrainer: def create_data_loaders_dictionary( self, - data_dictionary: Dict[str, pd.DataFrame] + data_dictionary: Dict[str, pd.DataFrame], + splits: List[str] ) -> Dict[str, DataLoader]: """ Converts the input data to PyTorch tensors using a data loader. """ data_loader_dictionary = {} - for split in ["train", "test"]: + for split in splits: x = torch.from_numpy(data_dictionary[f"{split}_features"].values).float() y = torch.from_numpy(data_dictionary[f"{split}_labels"].values)\ .to(self.target_tensor_type)