Merge pull request #8297 from Yinon-Polak/feat/add-pytorch-model-support
Feat/add pytorch model support
This commit is contained in:
		
							
								
								
									
										147
									
								
								freqtrade/freqai/base_models/BasePyTorchClassifier.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								freqtrade/freqai/base_models/BasePyTorchClassifier.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,147 @@ | ||||
| import logging | ||||
| from typing import Dict, List, Tuple | ||||
|  | ||||
| import numpy as np | ||||
| import numpy.typing as npt | ||||
| import pandas as pd | ||||
| import torch | ||||
| from pandas import DataFrame | ||||
| from torch.nn import functional as F | ||||
|  | ||||
| from freqtrade.exceptions import OperationalException | ||||
| from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class BasePyTorchClassifier(BasePyTorchModel): | ||||
|     """ | ||||
|     A PyTorch implementation of a classifier. | ||||
|     User must implement fit method | ||||
|  | ||||
|     Important! | ||||
|  | ||||
|     - User must declare the target class names in the strategy, | ||||
|     under IStrategy.set_freqai_targets method. | ||||
|  | ||||
|     for example, in your strategy: | ||||
|     ``` | ||||
|         def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs): | ||||
|             self.freqai.class_names = ["down", "up"] | ||||
|             dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) > | ||||
|                                                   dataframe["close"], 'up', 'down') | ||||
|  | ||||
|             return dataframe | ||||
|     """ | ||||
|     def __init__(self, **kwargs): | ||||
|         super().__init__(**kwargs) | ||||
|         self.class_name_to_index = None | ||||
|         self.index_to_class_name = None | ||||
|  | ||||
|     def predict( | ||||
|         self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs | ||||
|     ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: | ||||
|         """ | ||||
|         Filter the prediction features data and predict with it. | ||||
|         :param unfiltered_df: Full dataframe for the current backtest period. | ||||
|         :return: | ||||
|         :pred_df: dataframe containing the predictions | ||||
|         :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove | ||||
|         data (NaNs) or felt uncertain about data (PCA and DI index) | ||||
|         :raises ValueError: if 'class_names' doesn't exist in model meta_data. | ||||
|         """ | ||||
|  | ||||
|         class_names = self.model.model_meta_data.get("class_names", None) | ||||
|         if not class_names: | ||||
|             raise ValueError( | ||||
|                 "Missing class names. " | ||||
|                 "self.model.model_meta_data['class_names'] is None." | ||||
|             ) | ||||
|  | ||||
|         if not self.class_name_to_index: | ||||
|             self.init_class_names_to_index_mapping(class_names) | ||||
|  | ||||
|         dk.find_features(unfiltered_df) | ||||
|         filtered_df, _ = dk.filter_features( | ||||
|             unfiltered_df, dk.training_features_list, training_filter=False | ||||
|         ) | ||||
|         filtered_df = dk.normalize_data_from_metadata(filtered_df) | ||||
|         dk.data_dictionary["prediction_features"] = filtered_df | ||||
|         self.data_cleaning_predict(dk) | ||||
|         x = self.data_convertor.convert_x( | ||||
|             dk.data_dictionary["prediction_features"], | ||||
|             device=self.device | ||||
|         ) | ||||
|         logits = self.model.model(x) | ||||
|         probs = F.softmax(logits, dim=-1) | ||||
|         predicted_classes = torch.argmax(probs, dim=-1) | ||||
|         predicted_classes_str = self.decode_class_names(predicted_classes) | ||||
|         pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names) | ||||
|         pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]]) | ||||
|         pred_df = pd.concat([pred_df, pred_df_prob], axis=1) | ||||
|         return (pred_df, dk.do_predict) | ||||
|  | ||||
|     def encode_class_names( | ||||
|             self, | ||||
|             data_dictionary: Dict[str, pd.DataFrame], | ||||
|             dk: FreqaiDataKitchen, | ||||
|             class_names: List[str], | ||||
|     ): | ||||
|         """ | ||||
|         encode class name, str -> int | ||||
|         assuming first column of *_labels data frame to be the target column | ||||
|         containing the class names | ||||
|         """ | ||||
|  | ||||
|         target_column_name = dk.label_list[0] | ||||
|         for split in self.splits: | ||||
|             label_df = data_dictionary[f"{split}_labels"] | ||||
|             self.assert_valid_class_names(label_df[target_column_name], class_names) | ||||
|             label_df[target_column_name] = list( | ||||
|                 map(lambda x: self.class_name_to_index[x], label_df[target_column_name]) | ||||
|             ) | ||||
|  | ||||
|     @staticmethod | ||||
|     def assert_valid_class_names( | ||||
|             target_column: pd.Series, | ||||
|             class_names: List[str] | ||||
|     ): | ||||
|         non_defined_labels = set(target_column) - set(class_names) | ||||
|         if len(non_defined_labels) != 0: | ||||
|             raise OperationalException( | ||||
|                 f"Found non defined labels: {non_defined_labels}, ", | ||||
|                 f"expecting labels: {class_names}" | ||||
|             ) | ||||
|  | ||||
|     def decode_class_names(self, class_ints: torch.Tensor) -> List[str]: | ||||
|         """ | ||||
|         decode class name, int -> str | ||||
|         """ | ||||
|  | ||||
|         return list(map(lambda x: self.index_to_class_name[x.item()], class_ints)) | ||||
|  | ||||
|     def init_class_names_to_index_mapping(self, class_names): | ||||
|         self.class_name_to_index = {s: i for i, s in enumerate(class_names)} | ||||
|         self.index_to_class_name = {i: s for i, s in enumerate(class_names)} | ||||
|         logger.info(f"encoded class name to index: {self.class_name_to_index}") | ||||
|  | ||||
|     def convert_label_column_to_int( | ||||
|             self, | ||||
|             data_dictionary: Dict[str, pd.DataFrame], | ||||
|             dk: FreqaiDataKitchen, | ||||
|             class_names: List[str] | ||||
|     ): | ||||
|         self.init_class_names_to_index_mapping(class_names) | ||||
|         self.encode_class_names(data_dictionary, dk, class_names) | ||||
|  | ||||
|     def get_class_names(self) -> List[str]: | ||||
|         if not self.class_names: | ||||
|             raise ValueError( | ||||
|                 "self.class_names is empty, " | ||||
|                 "set self.freqai.class_names = ['class a', 'class b', 'class c'] " | ||||
|                 "inside IStrategy.set_freqai_targets method." | ||||
|             ) | ||||
|  | ||||
|         return self.class_names | ||||
							
								
								
									
										83
									
								
								freqtrade/freqai/base_models/BasePyTorchModel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								freqtrade/freqai/base_models/BasePyTorchModel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| import logging | ||||
| from abc import ABC, abstractmethod | ||||
| from time import time | ||||
| from typing import Any | ||||
|  | ||||
| import torch | ||||
| from pandas import DataFrame | ||||
|  | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.freqai_interface import IFreqaiModel | ||||
| from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class BasePyTorchModel(IFreqaiModel, ABC): | ||||
|     """ | ||||
|     Base class for PyTorch type models. | ||||
|     User *must* inherit from this class and set fit() and predict() and | ||||
|     data_convertor property. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, **kwargs): | ||||
|         super().__init__(config=kwargs["config"]) | ||||
|         self.dd.model_type = "pytorch" | ||||
|         self.device = "cuda" if torch.cuda.is_available() else "cpu" | ||||
|         test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size') | ||||
|         self.splits = ["train", "test"] if test_size != 0 else ["train"] | ||||
|  | ||||
|     def train( | ||||
|         self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs | ||||
|     ) -> Any: | ||||
|         """ | ||||
|         Filter the training data and train a model to it. Train makes heavy use of the datakitchen | ||||
|         for storing, saving, loading, and analyzing the data. | ||||
|         :param unfiltered_df: Full dataframe for the current training period | ||||
|         :return: | ||||
|         :model: Trained model which can be used to inference (self.predict) | ||||
|         """ | ||||
|  | ||||
|         logger.info(f"-------------------- Starting training {pair} --------------------") | ||||
|  | ||||
|         start_time = time() | ||||
|  | ||||
|         features_filtered, labels_filtered = dk.filter_features( | ||||
|             unfiltered_df, | ||||
|             dk.training_features_list, | ||||
|             dk.label_list, | ||||
|             training_filter=True, | ||||
|         ) | ||||
|  | ||||
|         # split data into train/test data. | ||||
|         data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) | ||||
|         if not self.freqai_info.get("fit_live_predictions", 0) or not self.live: | ||||
|             dk.fit_labels() | ||||
|         # normalize all data based on train_dataset only | ||||
|         data_dictionary = dk.normalize_data(data_dictionary) | ||||
|  | ||||
|         # optional additional data cleaning/analysis | ||||
|         self.data_cleaning_train(dk) | ||||
|  | ||||
|         logger.info( | ||||
|             f"Training model on {len(dk.data_dictionary['train_features'].columns)} features" | ||||
|         ) | ||||
|         logger.info(f"Training model on {len(data_dictionary['train_features'])} data points") | ||||
|  | ||||
|         model = self.fit(data_dictionary, dk) | ||||
|         end_time = time() | ||||
|  | ||||
|         logger.info(f"-------------------- Done training {pair} " | ||||
|                     f"({end_time - start_time:.2f} secs) --------------------") | ||||
|  | ||||
|         return model | ||||
|  | ||||
|     @property | ||||
|     @abstractmethod | ||||
|     def data_convertor(self) -> PyTorchDataConvertor: | ||||
|         """ | ||||
|         a class responsible for converting `*_features` & `*_labels` pandas dataframes | ||||
|         to pytorch tensors. | ||||
|         """ | ||||
|         raise NotImplementedError("Abstract property") | ||||
							
								
								
									
										49
									
								
								freqtrade/freqai/base_models/BasePyTorchRegressor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								freqtrade/freqai/base_models/BasePyTorchRegressor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| import logging | ||||
| from typing import Tuple | ||||
|  | ||||
| import numpy as np | ||||
| import numpy.typing as npt | ||||
| from pandas import DataFrame | ||||
|  | ||||
| from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class BasePyTorchRegressor(BasePyTorchModel): | ||||
|     """ | ||||
|     A PyTorch implementation of a regressor. | ||||
|     User must implement fit method | ||||
|     """ | ||||
|     def __init__(self, **kwargs): | ||||
|         super().__init__(**kwargs) | ||||
|  | ||||
|     def predict( | ||||
|         self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs | ||||
|     ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: | ||||
|         """ | ||||
|         Filter the prediction features data and predict with it. | ||||
|         :param unfiltered_df: Full dataframe for the current backtest period. | ||||
|         :return: | ||||
|         :pred_df: dataframe containing the predictions | ||||
|         :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove | ||||
|         data (NaNs) or felt uncertain about data (PCA and DI index) | ||||
|         """ | ||||
|  | ||||
|         dk.find_features(unfiltered_df) | ||||
|         filtered_df, _ = dk.filter_features( | ||||
|             unfiltered_df, dk.training_features_list, training_filter=False | ||||
|         ) | ||||
|         filtered_df = dk.normalize_data_from_metadata(filtered_df) | ||||
|         dk.data_dictionary["prediction_features"] = filtered_df | ||||
|  | ||||
|         self.data_cleaning_predict(dk) | ||||
|         x = self.data_convertor.convert_x( | ||||
|             dk.data_dictionary["prediction_features"], | ||||
|             device=self.device | ||||
|         ) | ||||
|         y = self.model.model(x) | ||||
|         pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]]) | ||||
|         return (pred_df, dk.do_predict) | ||||
| @@ -446,7 +446,7 @@ class FreqaiDataDrawer: | ||||
|             dump(model, save_path / f"{dk.model_filename}_model.joblib") | ||||
|         elif self.model_type == 'keras': | ||||
|             model.save(save_path / f"{dk.model_filename}_model.h5") | ||||
|         elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type: | ||||
|         elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]: | ||||
|             model.save(save_path / f"{dk.model_filename}_model.zip") | ||||
|  | ||||
|         if dk.svm_model is not None: | ||||
| @@ -496,7 +496,7 @@ class FreqaiDataDrawer: | ||||
|             dk.training_features_list = dk.data["training_features_list"] | ||||
|             dk.label_list = dk.data["label_list"] | ||||
|  | ||||
|     def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any: | ||||
|     def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:  # noqa: C901 | ||||
|         """ | ||||
|         loads all data required to make a prediction on a sub-train time range | ||||
|         :returns: | ||||
| @@ -537,6 +537,11 @@ class FreqaiDataDrawer: | ||||
|                 self.model_type, self.freqai_info['rl_config']['model_type']) | ||||
|             MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) | ||||
|             model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") | ||||
|         elif self.model_type == 'pytorch': | ||||
|             import torch | ||||
|             zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip") | ||||
|             model = zip["pytrainer"] | ||||
|             model = model.load_from_checkpoint(zip) | ||||
|  | ||||
|         if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): | ||||
|             dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") | ||||
|   | ||||
| @@ -83,6 +83,7 @@ class IFreqaiModel(ABC): | ||||
|         self.CONV_WIDTH = self.freqai_info.get('conv_width', 1) | ||||
|         if self.ft_params.get("inlier_metric_window", 0): | ||||
|             self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2 | ||||
|         self.class_names: List[str] = []  # used in classification subclasses | ||||
|         self.pair_it = 0 | ||||
|         self.pair_it_train = 0 | ||||
|         self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist")) | ||||
| @@ -571,8 +572,9 @@ class IFreqaiModel(ABC): | ||||
|             file_type = ".joblib" | ||||
|         elif self.dd.model_type == 'keras': | ||||
|             file_type = ".h5" | ||||
|         elif 'stable_baselines' in self.dd.model_type or 'sb3_contrib' == self.dd.model_type: | ||||
|         elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]: | ||||
|             file_type = ".zip" | ||||
|  | ||||
|         path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model{file_type}") | ||||
|         file_exists = path_to_modelfile.is_file() | ||||
|         if file_exists: | ||||
|   | ||||
| @@ -14,16 +14,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class CatboostClassifier(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         train_data = Pool( | ||||
|   | ||||
| @@ -15,16 +15,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class CatboostClassifierMultiTarget(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         cbc = CatBoostClassifier( | ||||
|   | ||||
| @@ -14,16 +14,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class CatboostRegressor(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         train_data = Pool( | ||||
|   | ||||
| @@ -15,16 +15,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class CatboostRegressorMultiTarget(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         cbr = CatBoostRegressor( | ||||
|   | ||||
| @@ -12,16 +12,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class LightGBMClassifier(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: | ||||
|   | ||||
| @@ -13,16 +13,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class LightGBMClassifierMultiTarget(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         lgb = LGBMClassifier(**self.model_training_parameters) | ||||
|   | ||||
| @@ -12,18 +12,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class LightGBMRegressor(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         Most regressors use the same function names and arguments e.g. user | ||||
|         can drop in LGBMRegressor in place of CatBoostRegressor and all data | ||||
|         management will be properly handled by Freqai. | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: | ||||
|   | ||||
| @@ -13,16 +13,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class LightGBMRegressorMultiTarget(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         lgb = LGBMRegressor(**self.model_training_parameters) | ||||
|   | ||||
							
								
								
									
										89
									
								
								freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								freqtrade/freqai/prediction_models/PyTorchMLPClassifier.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| from typing import Any, Dict | ||||
|  | ||||
| import torch | ||||
|  | ||||
| from freqtrade.freqai.base_models.BasePyTorchClassifier import BasePyTorchClassifier | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor, | ||||
|                                                          PyTorchDataConvertor) | ||||
| from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel | ||||
| from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer | ||||
|  | ||||
|  | ||||
| class PyTorchMLPClassifier(BasePyTorchClassifier): | ||||
|     """ | ||||
|     This class implements the fit method of IFreqaiModel. | ||||
|     in the fit method we initialize the model and trainer objects. | ||||
|     the only requirement from the model is to be aligned to PyTorchClassifier | ||||
|     predict method that expects the model to predict a tensor of type long. | ||||
|  | ||||
|     parameters are passed via `model_training_parameters` under the freqai | ||||
|     section in the config file. e.g: | ||||
|     { | ||||
|         ... | ||||
|         "freqai": { | ||||
|             ... | ||||
|             "model_training_parameters" : { | ||||
|                 "learning_rate": 3e-4, | ||||
|                 "trainer_kwargs": { | ||||
|                     "max_iters": 5000, | ||||
|                     "batch_size": 64, | ||||
|                     "max_n_eval_batches": null, | ||||
|                 }, | ||||
|                 "model_kwargs": { | ||||
|                     "hidden_dim": 512, | ||||
|                     "dropout_percent": 0.2, | ||||
|                     "n_layer": 1, | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     """ | ||||
|  | ||||
|     @property | ||||
|     def data_convertor(self) -> PyTorchDataConvertor: | ||||
|         return DefaultPyTorchDataConvertor( | ||||
|             target_tensor_type=torch.long, | ||||
|             squeeze_target_tensor=True | ||||
|         ) | ||||
|  | ||||
|     def __init__(self, **kwargs) -> None: | ||||
|         super().__init__(**kwargs) | ||||
|         config = self.freqai_info.get("model_training_parameters", {}) | ||||
|         self.learning_rate: float = config.get("learning_rate",  3e-4) | ||||
|         self.model_kwargs: Dict[str, Any] = config.get("model_kwargs",  {}) | ||||
|         self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs",  {}) | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         :raises ValueError: If self.class_names is not defined in the parent class. | ||||
|         """ | ||||
|  | ||||
|         class_names = self.get_class_names() | ||||
|         self.convert_label_column_to_int(data_dictionary, dk, class_names) | ||||
|         n_features = data_dictionary["train_features"].shape[-1] | ||||
|         model = PyTorchMLPModel( | ||||
|             input_dim=n_features, | ||||
|             output_dim=len(class_names), | ||||
|             **self.model_kwargs | ||||
|         ) | ||||
|         model.to(self.device) | ||||
|         optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) | ||||
|         criterion = torch.nn.CrossEntropyLoss() | ||||
|         init_model = self.get_init_model(dk.pair) | ||||
|         trainer = PyTorchModelTrainer( | ||||
|             model=model, | ||||
|             optimizer=optimizer, | ||||
|             criterion=criterion, | ||||
|             model_meta_data={"class_names": class_names}, | ||||
|             device=self.device, | ||||
|             init_model=init_model, | ||||
|             data_convertor=self.data_convertor, | ||||
|             **self.trainer_kwargs, | ||||
|         ) | ||||
|         trainer.fit(data_dictionary, self.splits) | ||||
|         return trainer | ||||
							
								
								
									
										83
									
								
								freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								freqtrade/freqai/prediction_models/PyTorchMLPRegressor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| from typing import Any, Dict | ||||
|  | ||||
| import torch | ||||
|  | ||||
| from freqtrade.freqai.base_models.BasePyTorchRegressor import BasePyTorchRegressor | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor, | ||||
|                                                          PyTorchDataConvertor) | ||||
| from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel | ||||
| from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer | ||||
|  | ||||
|  | ||||
| class PyTorchMLPRegressor(BasePyTorchRegressor): | ||||
|     """ | ||||
|     This class implements the fit method of IFreqaiModel. | ||||
|     in the fit method we initialize the model and trainer objects. | ||||
|     the only requirement from the model is to be aligned to PyTorchRegressor | ||||
|     predict method that expects the model to predict tensor of type float. | ||||
|     the trainer defines the training loop. | ||||
|  | ||||
|     parameters are passed via `model_training_parameters` under the freqai | ||||
|     section in the config file. e.g: | ||||
|     { | ||||
|         ... | ||||
|         "freqai": { | ||||
|             ... | ||||
|             "model_training_parameters" : { | ||||
|                 "learning_rate": 3e-4, | ||||
|                 "trainer_kwargs": { | ||||
|                     "max_iters": 5000, | ||||
|                     "batch_size": 64, | ||||
|                     "max_n_eval_batches": null, | ||||
|                 }, | ||||
|                 "model_kwargs": { | ||||
|                     "hidden_dim": 512, | ||||
|                     "dropout_percent": 0.2, | ||||
|                     "n_layer": 1, | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     """ | ||||
|  | ||||
|     @property | ||||
|     def data_convertor(self) -> PyTorchDataConvertor: | ||||
|         return DefaultPyTorchDataConvertor(target_tensor_type=torch.float) | ||||
|  | ||||
|     def __init__(self, **kwargs) -> None: | ||||
|         super().__init__(**kwargs) | ||||
|         config = self.freqai_info.get("model_training_parameters", {}) | ||||
|         self.learning_rate: float = config.get("learning_rate",  3e-4) | ||||
|         self.model_kwargs: Dict[str, Any] = config.get("model_kwargs",  {}) | ||||
|         self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs",  {}) | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         n_features = data_dictionary["train_features"].shape[-1] | ||||
|         model = PyTorchMLPModel( | ||||
|             input_dim=n_features, | ||||
|             output_dim=1, | ||||
|             **self.model_kwargs | ||||
|         ) | ||||
|         model.to(self.device) | ||||
|         optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) | ||||
|         criterion = torch.nn.MSELoss() | ||||
|         init_model = self.get_init_model(dk.pair) | ||||
|         trainer = PyTorchModelTrainer( | ||||
|             model=model, | ||||
|             optimizer=optimizer, | ||||
|             criterion=criterion, | ||||
|             device=self.device, | ||||
|             init_model=init_model, | ||||
|             data_convertor=self.data_convertor, | ||||
|             **self.trainer_kwargs, | ||||
|         ) | ||||
|         trainer.fit(data_dictionary, self.splits) | ||||
|         return trainer | ||||
| @@ -18,16 +18,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class XGBoostClassifier(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         X = data_dictionary["train_features"].to_numpy() | ||||
|   | ||||
| @@ -18,16 +18,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class XGBoostRFClassifier(BaseClassifierModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|             all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         X = data_dictionary["train_features"].to_numpy() | ||||
|   | ||||
| @@ -12,16 +12,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class XGBoostRFRegressor(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         X = data_dictionary["train_features"] | ||||
|   | ||||
| @@ -12,16 +12,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class XGBoostRegressor(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         X = data_dictionary["train_features"] | ||||
|   | ||||
| @@ -13,16 +13,20 @@ logger = logging.getLogger(__name__) | ||||
|  | ||||
| class XGBoostRegressorMultiTarget(BaseRegressionModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     User created prediction model. The class inherits IFreqaiModel, which | ||||
|     means it has full access to all Frequency AI functionality. Typically, | ||||
|     users would use this to override the common `fit()`, `train()`, or | ||||
|     `predict()` methods to add their custom data handling tools or change | ||||
|     various aspects of the training that cannot be configured via the | ||||
|     top level config.json file. | ||||
|     """ | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: | ||||
|         """ | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|                                 all the training and test data/labels. | ||||
|         :param data_dictionary: the dictionary holding all data for train, test, | ||||
|             labels, weights | ||||
|         :param dk: The datakitchen object for the current coin/model | ||||
|         """ | ||||
|  | ||||
|         xgb = XGBRegressor(**self.model_training_parameters) | ||||
|   | ||||
							
								
								
									
										67
									
								
								freqtrade/freqai/torch/PyTorchDataConvertor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								freqtrade/freqai/torch/PyTorchDataConvertor.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| from abc import ABC, abstractmethod | ||||
| from typing import List, Optional | ||||
|  | ||||
| import pandas as pd | ||||
| import torch | ||||
|  | ||||
|  | ||||
| class PyTorchDataConvertor(ABC): | ||||
|     """ | ||||
|     This class is responsible for converting `*_features` & `*_labels` pandas dataframes | ||||
|     to pytorch tensors. | ||||
|     """ | ||||
|  | ||||
|     @abstractmethod | ||||
|     def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: | ||||
|         """ | ||||
|         :param df: "*_features" dataframe. | ||||
|         :param device: The device to use for training (e.g. 'cpu', 'cuda'). | ||||
|         """ | ||||
|  | ||||
|     @abstractmethod | ||||
|     def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: | ||||
|         """ | ||||
|         :param df: "*_labels" dataframe. | ||||
|         :param device: The device to use for training (e.g. 'cpu', 'cuda'). | ||||
|         """ | ||||
|  | ||||
|  | ||||
| class DefaultPyTorchDataConvertor(PyTorchDataConvertor): | ||||
|     """ | ||||
|     A default conversion that keeps features dataframe shapes. | ||||
|     """ | ||||
|  | ||||
|     def __init__( | ||||
|             self, | ||||
|             target_tensor_type: Optional[torch.dtype] = None, | ||||
|             squeeze_target_tensor: bool = False | ||||
|     ): | ||||
|         """ | ||||
|         :param target_tensor_type: type of target tensor, for classification use | ||||
|             torch.long, for regressor use torch.float or torch.double. | ||||
|         :param squeeze_target_tensor: controls the target shape, used for loss functions | ||||
|             that requires 0D or 1D. | ||||
|         """ | ||||
|         self._target_tensor_type = target_tensor_type | ||||
|         self._squeeze_target_tensor = squeeze_target_tensor | ||||
|  | ||||
|     def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: | ||||
|         x = torch.from_numpy(df.values).float() | ||||
|         if device: | ||||
|             x = x.to(device) | ||||
|  | ||||
|         return [x] | ||||
|  | ||||
|     def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: | ||||
|         y = torch.from_numpy(df.values) | ||||
|  | ||||
|         if self._target_tensor_type: | ||||
|             y = y.to(self._target_tensor_type) | ||||
|  | ||||
|         if self._squeeze_target_tensor: | ||||
|             y = y.squeeze() | ||||
|  | ||||
|         if device: | ||||
|             y = y.to(device) | ||||
|  | ||||
|         return [y] | ||||
							
								
								
									
										97
									
								
								freqtrade/freqai/torch/PyTorchMLPModel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								freqtrade/freqai/torch/PyTorchMLPModel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| import logging | ||||
| from typing import List | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class PyTorchMLPModel(nn.Module): | ||||
|     """ | ||||
|     A multi-layer perceptron (MLP) model implemented using PyTorch. | ||||
|  | ||||
|     This class mainly serves as a simple example for the integration of PyTorch model's | ||||
|     to freqai. It is not optimized at all and should not be used for production purposes. | ||||
|  | ||||
|     :param input_dim: The number of input features. This parameter specifies the number | ||||
|         of features in the input data that the MLP will use to make predictions. | ||||
|     :param output_dim: The number of output classes. This parameter specifies the number | ||||
|         of classes that the MLP will predict. | ||||
|     :param hidden_dim: The number of hidden units in each layer. This parameter controls | ||||
|         the complexity of the MLP and determines how many nonlinear relationships the MLP | ||||
|         can represent. Increasing the number of hidden units can increase the capacity of | ||||
|         the MLP to model complex patterns, but it also increases the risk of overfitting | ||||
|         the training data. Default: 256 | ||||
|     :param dropout_percent: The dropout rate for regularization. This parameter specifies | ||||
|         the probability of dropping out a neuron during training to prevent overfitting. | ||||
|         The dropout rate should be tuned carefully to balance between underfitting and | ||||
|         overfitting. Default: 0.2 | ||||
|     :param n_layer: The number of layers in the MLP. This parameter specifies the number | ||||
|         of layers in the MLP architecture. Adding more layers to the MLP can increase its | ||||
|         capacity to model complex patterns, but it also increases the risk of overfitting | ||||
|         the training data. Default: 1 | ||||
|  | ||||
|     :returns: The output of the MLP, with shape (batch_size, output_dim) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, input_dim: int, output_dim: int, **kwargs): | ||||
|         super().__init__() | ||||
|         hidden_dim: int = kwargs.get("hidden_dim", 256) | ||||
|         dropout_percent: int = kwargs.get("dropout_percent", 0.2) | ||||
|         n_layer: int = kwargs.get("n_layer", 1) | ||||
|         self.input_layer = nn.Linear(input_dim, hidden_dim) | ||||
|         self.blocks = nn.Sequential(*[Block(hidden_dim, dropout_percent) for _ in range(n_layer)]) | ||||
|         self.output_layer = nn.Linear(hidden_dim, output_dim) | ||||
|         self.relu = nn.ReLU() | ||||
|         self.dropout = nn.Dropout(p=dropout_percent) | ||||
|  | ||||
|     def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor: | ||||
|         x: torch.Tensor = tensors[0] | ||||
|         x = self.relu(self.input_layer(x)) | ||||
|         x = self.dropout(x) | ||||
|         x = self.blocks(x) | ||||
|         x = self.output_layer(x) | ||||
|         return x | ||||
|  | ||||
|  | ||||
| class Block(nn.Module): | ||||
|     """ | ||||
|     A building block for a multi-layer perceptron (MLP). | ||||
|  | ||||
|     :param hidden_dim: The number of hidden units in the feedforward network. | ||||
|     :param dropout_percent: The dropout rate for regularization. | ||||
|  | ||||
|     :returns: torch.Tensor. with shape (batch_size, hidden_dim) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, hidden_dim: int, dropout_percent: int): | ||||
|         super().__init__() | ||||
|         self.ff = FeedForward(hidden_dim) | ||||
|         self.dropout = nn.Dropout(p=dropout_percent) | ||||
|         self.ln = nn.LayerNorm(hidden_dim) | ||||
|  | ||||
|     def forward(self, x: torch.Tensor) -> torch.Tensor: | ||||
|         x = self.ff(self.ln(x)) | ||||
|         x = self.dropout(x) | ||||
|         return x | ||||
|  | ||||
|  | ||||
| class FeedForward(nn.Module): | ||||
|     """ | ||||
|     A simple fully-connected feedforward neural network block. | ||||
|  | ||||
|     :param hidden_dim: The number of hidden units in the block. | ||||
|     :return: torch.Tensor. with shape (batch_size, hidden_dim) | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, hidden_dim: int): | ||||
|         super().__init__() | ||||
|         self.net = nn.Sequential( | ||||
|             nn.Linear(hidden_dim, hidden_dim), | ||||
|             nn.ReLU(), | ||||
|         ) | ||||
|  | ||||
|     def forward(self, x: torch.Tensor) -> torch.Tensor: | ||||
|         return self.net(x) | ||||
							
								
								
									
										208
									
								
								freqtrade/freqai/torch/PyTorchModelTrainer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										208
									
								
								freqtrade/freqai/torch/PyTorchModelTrainer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,208 @@ | ||||
| import logging | ||||
| import math | ||||
| from pathlib import Path | ||||
| from typing import Any, Dict, List, Optional | ||||
|  | ||||
| import pandas as pd | ||||
| import torch | ||||
| from torch import nn | ||||
| from torch.optim import Optimizer | ||||
| from torch.utils.data import DataLoader, TensorDataset | ||||
|  | ||||
| from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor | ||||
| from freqtrade.freqai.torch.PyTorchTrainerInterface import PyTorchTrainerInterface | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class PyTorchModelTrainer(PyTorchTrainerInterface): | ||||
|     def __init__( | ||||
|             self, | ||||
|             model: nn.Module, | ||||
|             optimizer: Optimizer, | ||||
|             criterion: nn.Module, | ||||
|             device: str, | ||||
|             init_model: Dict, | ||||
|             data_convertor: PyTorchDataConvertor, | ||||
|             model_meta_data: Dict[str, Any] = {}, | ||||
|             **kwargs | ||||
|     ): | ||||
|         """ | ||||
|         :param model: The PyTorch model to be trained. | ||||
|         :param optimizer: The optimizer to use for training. | ||||
|         :param criterion: The loss function to use for training. | ||||
|         :param device: The device to use for training (e.g. 'cpu', 'cuda'). | ||||
|         :param init_model: A dictionary containing the initial model/optimizer | ||||
|             state_dict and model_meta_data saved by self.save() method. | ||||
|         :param model_meta_data: Additional metadata about the model (optional). | ||||
|         :param data_convertor: convertor from pd.DataFrame to torch.tensor. | ||||
|         :param max_iters: The number of training iterations to run. | ||||
|             iteration here refers to the number of times we call | ||||
|             self.optimizer.step(). used to calculate n_epochs. | ||||
|         :param batch_size: The size of the batches to use during training. | ||||
|         :param max_n_eval_batches: The maximum number batches to use for evaluation. | ||||
|         """ | ||||
|         self.model = model | ||||
|         self.optimizer = optimizer | ||||
|         self.criterion = criterion | ||||
|         self.model_meta_data = model_meta_data | ||||
|         self.device = device | ||||
|         self.max_iters: int = kwargs.get("max_iters", 100) | ||||
|         self.batch_size: int = kwargs.get("batch_size", 64) | ||||
|         self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) | ||||
|         self.data_convertor = data_convertor | ||||
|         if init_model: | ||||
|             self.load_from_checkpoint(init_model) | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]): | ||||
|         """ | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|         all the training and test data/labels. | ||||
|         :param splits: splits to use in training, splits must contain "train", | ||||
|         optional "test" could be added by setting freqai.data_split_parameters.test_size > 0 | ||||
|         in the config file. | ||||
|  | ||||
|          - Calculates the predicted output for the batch using the PyTorch model. | ||||
|          - Calculates the loss between the predicted and actual output using a loss function. | ||||
|          - Computes the gradients of the loss with respect to the model's parameters using | ||||
|            backpropagation. | ||||
|          - Updates the model's parameters using an optimizer. | ||||
|         """ | ||||
|         data_loaders_dictionary = self.create_data_loaders_dictionary(data_dictionary, splits) | ||||
|         epochs = self.calc_n_epochs( | ||||
|             n_obs=len(data_dictionary["train_features"]), | ||||
|             batch_size=self.batch_size, | ||||
|             n_iters=self.max_iters | ||||
|         ) | ||||
|         for epoch in range(1, epochs + 1): | ||||
|             # training | ||||
|             losses = [] | ||||
|             for i, batch_data in enumerate(data_loaders_dictionary["train"]): | ||||
|  | ||||
|                 for tensor in batch_data: | ||||
|                     tensor.to(self.device) | ||||
|  | ||||
|                 xb = batch_data[:-1] | ||||
|                 yb = batch_data[-1] | ||||
|                 yb_pred = self.model(xb) | ||||
|                 loss = self.criterion(yb_pred, yb) | ||||
|  | ||||
|                 self.optimizer.zero_grad(set_to_none=True) | ||||
|                 loss.backward() | ||||
|                 self.optimizer.step() | ||||
|                 losses.append(loss.item()) | ||||
|             train_loss = sum(losses) / len(losses) | ||||
|             log_message = f"epoch {epoch}/{epochs}: train loss {train_loss:.4f}" | ||||
|  | ||||
|             # evaluation | ||||
|             if "test" in splits: | ||||
|                 test_loss = self.estimate_loss( | ||||
|                     data_loaders_dictionary, | ||||
|                     self.max_n_eval_batches, | ||||
|                     "test" | ||||
|                 ) | ||||
|                 log_message += f" ; test loss {test_loss:.4f}" | ||||
|  | ||||
|             logger.info(log_message) | ||||
|  | ||||
|     @torch.no_grad() | ||||
|     def estimate_loss( | ||||
|             self, | ||||
|             data_loader_dictionary: Dict[str, DataLoader], | ||||
|             max_n_eval_batches: Optional[int], | ||||
|             split: str, | ||||
|     ) -> float: | ||||
|         self.model.eval() | ||||
|         n_batches = 0 | ||||
|         losses = [] | ||||
|         for i, batch_data in enumerate(data_loader_dictionary[split]): | ||||
|             if max_n_eval_batches and i > max_n_eval_batches: | ||||
|                 n_batches += 1 | ||||
|                 break | ||||
|  | ||||
|             for tensor in batch_data: | ||||
|                 tensor.to(self.device) | ||||
|  | ||||
|             xb = batch_data[:-1] | ||||
|             yb = batch_data[-1] | ||||
|             yb_pred = self.model(xb) | ||||
|             loss = self.criterion(yb_pred, yb) | ||||
|             losses.append(loss.item()) | ||||
|  | ||||
|         self.model.train() | ||||
|         return sum(losses) / len(losses) | ||||
|  | ||||
|     def create_data_loaders_dictionary( | ||||
|             self, | ||||
|             data_dictionary: Dict[str, pd.DataFrame], | ||||
|             splits: List[str] | ||||
|     ) -> Dict[str, DataLoader]: | ||||
|         """ | ||||
|         Converts the input data to PyTorch tensors using a data loader. | ||||
|         """ | ||||
|         data_loader_dictionary = {} | ||||
|         for split in splits: | ||||
|             x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"]) | ||||
|             y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"]) | ||||
|             dataset = TensorDataset(*x, *y) | ||||
|             data_loader = DataLoader( | ||||
|                 dataset, | ||||
|                 batch_size=self.batch_size, | ||||
|                 shuffle=True, | ||||
|                 drop_last=True, | ||||
|                 num_workers=0, | ||||
|             ) | ||||
|             data_loader_dictionary[split] = data_loader | ||||
|  | ||||
|         return data_loader_dictionary | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_n_epochs(n_obs: int, batch_size: int, n_iters: int) -> int: | ||||
|         """ | ||||
|         Calculates the number of epochs required to reach the maximum number | ||||
|         of iterations specified in the model training parameters. | ||||
|  | ||||
|         the motivation here is that `max_iters` is easier to optimize and keep stable, | ||||
|         across different n_obs - the number of data points. | ||||
|         """ | ||||
|  | ||||
|         n_batches = math.ceil(n_obs // batch_size) | ||||
|         epochs = math.ceil(n_iters // n_batches) | ||||
|         if epochs <= 10: | ||||
|             logger.warning("User set `max_iters` in such a way that the trainer will only perform " | ||||
|                            f" {epochs} epochs. Please consider increasing this value accordingly") | ||||
|             if epochs <= 1: | ||||
|                 logger.warning("Epochs set to 1. Please review your `max_iters` value") | ||||
|                 epochs = 1 | ||||
|         return epochs | ||||
|  | ||||
|     def save(self, path: Path): | ||||
|         """ | ||||
|         - Saving any nn.Module state_dict | ||||
|         - Saving model_meta_data, this dict should contain any additional data that the | ||||
|           user needs to store. e.g class_names for classification models. | ||||
|         """ | ||||
|  | ||||
|         torch.save({ | ||||
|             "model_state_dict": self.model.state_dict(), | ||||
|             "optimizer_state_dict": self.optimizer.state_dict(), | ||||
|             "model_meta_data": self.model_meta_data, | ||||
|             "pytrainer": self | ||||
|         }, path) | ||||
|  | ||||
|     def load(self, path: Path): | ||||
|         checkpoint = torch.load(path) | ||||
|         return self.load_from_checkpoint(checkpoint) | ||||
|  | ||||
|     def load_from_checkpoint(self, checkpoint: Dict): | ||||
|         """ | ||||
|         when using continual_learning, DataDrawer will load the dictionary | ||||
|         (containing state dicts and model_meta_data) by calling torch.load(path). | ||||
|         you can access this dict from any class that inherits IFreqaiModel by calling | ||||
|         get_init_model method. | ||||
|         """ | ||||
|         self.model.load_state_dict(checkpoint["model_state_dict"]) | ||||
|         self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) | ||||
|         self.model_meta_data = checkpoint["model_meta_data"] | ||||
|         return self | ||||
							
								
								
									
										53
									
								
								freqtrade/freqai/torch/PyTorchTrainerInterface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								freqtrade/freqai/torch/PyTorchTrainerInterface.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| from abc import ABC, abstractmethod | ||||
| from pathlib import Path | ||||
| from typing import Dict, List | ||||
|  | ||||
| import pandas as pd | ||||
| import torch | ||||
| from torch import nn | ||||
|  | ||||
|  | ||||
| class PyTorchTrainerInterface(ABC): | ||||
|  | ||||
|     @abstractmethod | ||||
|     def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None: | ||||
|         """ | ||||
|         :param data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|         all the training and test data/labels. | ||||
|         :param splits: splits to use in training, splits must contain "train", | ||||
|         optional "test" could be added by setting freqai.data_split_parameters.test_size > 0 | ||||
|         in the config file. | ||||
|  | ||||
|          - Calculates the predicted output for the batch using the PyTorch model. | ||||
|          - Calculates the loss between the predicted and actual output using a loss function. | ||||
|          - Computes the gradients of the loss with respect to the model's parameters using | ||||
|            backpropagation. | ||||
|          - Updates the model's parameters using an optimizer. | ||||
|         """ | ||||
|  | ||||
|     @abstractmethod | ||||
|     def save(self, path: Path) -> None: | ||||
|         """ | ||||
|         - Saving any nn.Module state_dict | ||||
|         - Saving model_meta_data, this dict should contain any additional data that the | ||||
|           user needs to store. e.g class_names for classification models. | ||||
|         """ | ||||
|  | ||||
|     def load(self, path: Path) -> nn.Module: | ||||
|         """ | ||||
|         :param path: path to zip file. | ||||
|         :returns: pytorch model. | ||||
|         """ | ||||
|         checkpoint = torch.load(path) | ||||
|         return self.load_from_checkpoint(checkpoint) | ||||
|  | ||||
|     @abstractmethod | ||||
|     def load_from_checkpoint(self, checkpoint: Dict) -> nn.Module: | ||||
|         """ | ||||
|         when using continual_learning, DataDrawer will load the dictionary | ||||
|         (containing state dicts and model_meta_data) by calling torch.load(path). | ||||
|         you can access this dict from any class that inherits IFreqaiModel by calling | ||||
|         get_init_model method. | ||||
|         :checkpoint checkpoint: dict containing the model & optimizer state dicts, | ||||
|         model_meta_data, etc.. | ||||
|         """ | ||||
							
								
								
									
										0
									
								
								freqtrade/freqai/torch/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								freqtrade/freqai/torch/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -223,6 +223,7 @@ class FreqaiExampleHybridStrategy(IStrategy): | ||||
|         :param metadata: metadata of current pair | ||||
|         usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] | ||||
|         """ | ||||
|         self.freqai.class_names = ["down", "up"] | ||||
|         dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-50) > | ||||
|                                               dataframe["close"], 'up', 'down') | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user