From fea63fba12ffc0a52bf0eae89b509035ecf2fd7b Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Tue, 12 Jul 2022 10:12:50 +0200 Subject: [PATCH 1/3] Fix saving/loading historic predictions --- freqtrade/freqai/data_drawer.py | 14 +++++++------- .../prediction_models/BaseRegressionModel.py | 3 ++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 2278cf310..b65d8623a 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -1,13 +1,13 @@ import collections import json import logging +import pickle import re import shutil import threading from pathlib import Path from typing import Any, Dict, Tuple -# import pickle as pk import numpy as np import pandas as pd from pandas import DataFrame @@ -78,8 +78,8 @@ class FreqaiDataDrawer: """ exists = Path(self.full_path / str("historic_predictions.json")).resolve().exists() if exists: - with open(self.full_path / str("historic_predictions.json"), "r") as fp: - self.pair_dict = json.load(fp) + with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp: + self.historic_predictions = pickle.load(fp) logger.info(f"Found existing historic predictions at {self.full_path}, but beware of " "that statistics may be inaccurate if the bot has been offline for " "an extended period of time.") @@ -97,15 +97,15 @@ class FreqaiDataDrawer: """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("pair_dictionary.json"), "w") as fp: - json.dump(self.pair_dict, fp, default=self.np_encoder) + with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp: + pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL) def save_historic_predictions_to_disk(self): """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("historic_predictions.json"), "w") as fp: - json.dump(self.historic_predictions, fp, default=self.np_encoder) + with open(self.full_path / str("pair_dictionary.json"), "w") as fp: + json.dump(self.pair_dict, fp, default=self.np_encoder) def save_follower_dict_to_disk(self): """ diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 2097cb556..260e24182 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -71,7 +71,8 @@ class BaseRegressionModel(IFreqaiModel): data_dictionary['train_features'], model, dk, pair) elif self.freqai_info.get('fit_live_predictions_candles', 0): dk.fit_live_predictions() - self.dd.save_historic_predictions_to_disk() + + self.dd.save_historic_predictions_to_disk() logger.info(f"--------------------done training {pair}--------------------") From ef409dd34550ba56c4c3f2122a44185042f08bfb Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 12 Jul 2022 18:09:17 +0200 Subject: [PATCH 2/3] Add ground work for TensorFlow models, add protections from common mistakes --- freqtrade/freqai/data_kitchen.py | 16 +++++++--- freqtrade/freqai/freqai_interface.py | 32 ++++++++++++------- .../prediction_models/BaseRegressionModel.py | 6 ++-- freqtrade/resolvers/freqaimodel_resolver.py | 11 +++++-- 4 files changed, 44 insertions(+), 21 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1f78df0f8..56c1a67ed 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -57,6 +57,7 @@ class FreqaiDataKitchen: self.live = live self.pair = pair self.svm_model: linear_model.SGDOneClassSVM = None + self.keras = self.freqai_config.get("keras", False) self.set_all_pairs() if not self.live: self.full_timerange = self.create_fulltimerange( @@ -92,7 +93,7 @@ class FreqaiDataKitchen: return - def save_data(self, model: Any, coin: str = "", keras_model=False, label=None) -> None: + def save_data(self, model: Any, coin: str = "", label=None) -> None: """ Saves all data associated with a model for a single sub-train time range :params: @@ -106,7 +107,7 @@ class FreqaiDataKitchen: save_path = Path(self.data_path) # Save the trained model - if not keras_model: + if not self.keras: dump(model, save_path / f"{self.model_filename}_model.joblib") else: model.save(save_path / f"{self.model_filename}_model.h5") @@ -140,7 +141,7 @@ class FreqaiDataKitchen: return - def load_data(self, coin: str = "", keras_model=False) -> Any: + def load_data(self, coin: str = "") -> Any: """ loads all data required to make a prediction on a sub-train time range :returns: @@ -174,7 +175,7 @@ class FreqaiDataKitchen: # try to access model in memory instead of loading object from disk to save time if self.live and self.model_filename in self.dd.model_dictionary: model = self.dd.model_dictionary[self.model_filename] - elif not keras_model: + elif not self.keras: model = load(self.data_path / str(self.model_filename + "_model.joblib")) else: from tensorflow import keras @@ -559,6 +560,13 @@ class FreqaiDataKitchen: predict: bool = If true, inference an existing SVM model, else construct one """ + if self.keras: + logger.warning("SVM outlier removal not currently supported for Keras based models. " + "Skipping user requested function.") + if predict: + self.do_predict = np.ones(len(self.data_dictionary["prediction_features"])) + return + if predict: assert self.svm_model, "No svm model available for outlier removal" y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"]) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index b03b1f3b0..56a179dc3 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -69,6 +69,9 @@ class IFreqaiModel(ABC): self.ready_to_scan = False self.first = True self.keras = self.freqai_info.get("keras", False) + if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): + self.freqai_info["feature_parameters"]["DI_threshold"] = 0 + logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") self.CONV_WIDTH = self.freqai_info.get("conv_width", 2) def assert_config(self, config: Dict[str, Any]) -> None: @@ -197,9 +200,9 @@ class IFreqaiModel(ABC): self.model = self.train(dataframe_train, metadata["pair"], dk) self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = trained_timestamp.stopts dk.set_new_model_names(metadata["pair"], trained_timestamp) - dk.save_data(self.model, metadata["pair"], keras_model=self.keras) + dk.save_data(self.model, metadata["pair"]) else: - self.model = dk.load_data(metadata["pair"], keras_model=self.keras) + self.model = dk.load_data(metadata["pair"]) self.check_if_feature_list_matches_strategy(dataframe_train, dk) @@ -276,7 +279,7 @@ class IFreqaiModel(ABC): ) # load the model and associated data into the data kitchen - self.model = dk.load_data(coin=metadata["pair"], keras_model=self.keras) + self.model = dk.load_data(coin=metadata["pair"]) if not self.model: logger.warning( @@ -353,13 +356,15 @@ class IFreqaiModel(ABC): of how outlier data points are dropped from the dataframe used for training. """ - if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"): + if self.freqai_info.get("feature_parameters", {}).get( + "principal_component_analysis", False + ): dk.principal_component_analysis() - if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"): + if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=False) - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"): + if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): dk.data["avg_mean_dist"] = dk.compute_distances() # if self.feature_parameters["determine_statistical_distributions"]: @@ -378,13 +383,15 @@ class IFreqaiModel(ABC): of how the do_predict vector is modified. do_predict is ultimately passed back to strategy for buy signals. """ - if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"): + if self.freqai_info.get("feature_parameters", {}).get( + "principal_component_analysis", False + ): dk.pca_transform(dataframe) - if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"): + if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=True) - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"): + if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): dk.check_if_pred_in_training_spaces() # if self.feature_parameters["determine_statistical_distributions"]: @@ -479,14 +486,15 @@ class IFreqaiModel(ABC): if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning: with self.lock: self.dd.pair_to_end_of_training_queue(pair) - dk.save_data(model, coin=pair, keras_model=self.keras) + dk.save_data(model, coin=pair) if self.freqai_info.get("purge_old_models", False): self.dd.purge_old_models() # self.retrain = False - def set_initial_historic_predictions(self, df: DataFrame, model: Any, - dk: FreqaiDataKitchen, pair: str) -> None: + def set_initial_historic_predictions( + self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str + ) -> None: trained_predictions = model.predict(df) pred_df = DataFrame(trained_predictions, columns=dk.label_list) for label in dk.label_list: diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 260e24182..f9a9bb69f 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -12,9 +12,9 @@ logger = logging.getLogger(__name__) class BaseRegressionModel(IFreqaiModel): """ - User created prediction model. The class needs to override three necessary - functions, predict(), train(), fit(). The class inherits ModelHandler which - has its own DataHandler where data is held, saved, loaded, and managed. + Base class for regression type models (e.g. Catboost, LightGBM, XGboost etc.). + User *must* inherit from this class and set fit() and predict(). See example scripts + such as prediction_models/CatboostPredictionModel.py for guidance. """ def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: diff --git a/freqtrade/resolvers/freqaimodel_resolver.py b/freqtrade/resolvers/freqaimodel_resolver.py index e666b462c..0fcfca363 100644 --- a/freqtrade/resolvers/freqaimodel_resolver.py +++ b/freqtrade/resolvers/freqaimodel_resolver.py @@ -24,8 +24,9 @@ class FreqaiModelResolver(IResolver): object_type = IFreqaiModel object_type_str = "FreqaiModel" user_subdir = USERPATH_FREQAIMODELS - initial_search_path = Path(__file__).parent.parent.joinpath( - "freqai/prediction_models").resolve() + initial_search_path = ( + Path(__file__).parent.parent.joinpath("freqai/prediction_models").resolve() + ) @staticmethod def load_freqaimodel(config: Dict) -> IFreqaiModel: @@ -33,6 +34,7 @@ class FreqaiModelResolver(IResolver): Load the custom class from config parameter :param config: configuration dictionary """ + disallowed_models = ["BaseRegressionModel", "BaseTensorFlowModel"] freqaimodel_name = config.get("freqaimodel") if not freqaimodel_name: @@ -40,6 +42,11 @@ class FreqaiModelResolver(IResolver): "No freqaimodel set. Please use `--freqaimodel` to " "specify the FreqaiModel class to use.\n" ) + if freqaimodel_name in disallowed_models: + raise OperationalException( + f"{freqaimodel_name} is a baseclass and cannot be used directly. User must choose " + "an existing child class or inherit from this baseclass.\n" + ) freqaimodel = FreqaiModelResolver.load_object( freqaimodel_name, config, From 4141d165ff6ee7dab0d141c680cf956d18e142a2 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 12 Jul 2022 19:10:09 +0200 Subject: [PATCH 3/3] add BaseTensorFlowModel class --- .../prediction_models/BaseTensorFlowModel.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 freqtrade/freqai/prediction_models/BaseTensorFlowModel.py diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py new file mode 100644 index 000000000..098ff24dd --- /dev/null +++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py @@ -0,0 +1,78 @@ +import logging +from typing import Tuple + +from pandas import DataFrame + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.freqai_interface import IFreqaiModel + + +logger = logging.getLogger(__name__) + + +class BaseTensorFlowModel(IFreqaiModel): + """ + Base class for TensorFlow type models. + User *must* inherit from this class and set fit() and predict(). + """ + + def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: + """ + User uses this function to add any additional return values to the dataframe. + e.g. + dataframe['volatility'] = dk.volatility_values + """ + + return dataframe + + def train( + self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + ) -> Tuple[DataFrame, DataFrame]: + """ + Filter the training data and train a model to it. Train makes heavy use of the datakitchen + for storing, saving, loading, and analyzing the data. + :params: + :unfiltered_dataframe: Full dataframe for the current training period + :metadata: pair metadata from strategy. + :returns: + :model: Trained model which can be used to inference (self.predict) + """ + + logger.info("--------------------Starting training " f"{pair} --------------------") + + # filter the features requested by user in the configuration file and elegantly handle NaNs + features_filtered, labels_filtered = dk.filter_features( + unfiltered_dataframe, + dk.training_features_list, + dk.label_list, + training_filter=True, + ) + + # split data into train/test data. + data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + if not self.freqai_info.get('fit_live_predictions', 0): + dk.fit_labels() + # normalize all data based on train_dataset only + data_dictionary = dk.normalize_data(data_dictionary) + + # optional additional data cleaning/analysis + self.data_cleaning_train(dk) + + logger.info( + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + ) + logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') + + model = self.fit(data_dictionary) + + if pair not in self.dd.historic_predictions: + self.set_initial_historic_predictions( + data_dictionary['train_features'], model, dk, pair) + elif self.freqai_info.get('fit_live_predictions_candles', 0): + dk.fit_live_predictions() + + self.dd.save_historic_predictions_to_disk() + + logger.info(f"--------------------done training {pair}--------------------") + + return model