From 6193205012f681d233933fee50df66f33b63ddcc Mon Sep 17 00:00:00 2001 From: robcaulk Date: Thu, 26 May 2022 21:07:50 +0200 Subject: [PATCH] fix bug for target_mean/std array merging in backtesting --- config_examples/config_freqai.example.json | 19 ++- docs/freqai.md | 106 +++++++++++--- freqtrade/freqai/data_kitchen.py | 131 ++++++++++-------- freqtrade/freqai/freqai_interface.py | 23 +-- .../CatboostPredictionModel.py | 13 +- freqtrade/templates/FreqaiExampleStrategy.py | 4 +- 6 files changed, 186 insertions(+), 110 deletions(-) diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json index 7582afef0..b6c7ba7d8 100644 --- a/config_examples/config_freqai.example.json +++ b/config_examples/config_freqai.example.json @@ -1,7 +1,7 @@ { "max_open_trades": 1, "stake_currency": "USDT", - "stake_amount": 800, + "stake_amount": 900, "tradable_balance_ratio": 1, "fiat_display_currency": "USD", "dry_run": true, @@ -24,8 +24,7 @@ "rateLimit": 200 }, "pair_whitelist": [ - "BTC/USDT", - "ETH/USDT" + "BTC/USDT" ], "pair_blacklist": [] }, @@ -55,7 +54,7 @@ "15m" ], "train_period": 30, - "backtest_period": 10, + "backtest_period": 7, "identifier": "example", "live_trained_timestamp": 0, "corr_pairlist": [ @@ -64,16 +63,16 @@ "DOT/USDT" ], "feature_parameters": { - "period": 12, + "period": 24, "shift": 1, - "DI_threshold": 1, - "weight_factor": 0, + "DI_threshold": 0, + "weight_factor": 0.9, "principal_component_analysis": false, - "use_SVM_to_remove_outliers": false, - "stratify": 0 + "use_SVM_to_remove_outliers": true, + "stratify": 3 }, "data_split_parameters": { - "test_size": 0.25, + "test_size": 0.33, "random_state": 1 }, "model_training_parameters": { diff --git a/docs/freqai.md b/docs/freqai.md index 403145525..821f42258 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -221,33 +221,43 @@ This way, the user can return to using any model they wish by simply changing th ### Building a freqai strategy -The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()` +The Freqai strategy requires the user to include the following lines of code in the strategy: ```python - from freqtrade.freqai.strategy_bridge import CustomModel + from freqtrade.freqai.strategy_bridge import CustomModel - def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: - # the configuration file parameters are stored here - self.freqai_info = self.config['freqai'] + def informative_pairs(self): + whitelist_pairs = self.dp.current_whitelist() + corr_pairs = self.config["freqai"]["corr_pairlist"] + informative_pairs = [] + for tf in self.config["freqai"]["timeframes"]: + for pair in whitelist_pairs: + informative_pairs.append((pair, tf)) + for pair in corr_pairs: + if pair in whitelist_pairs: + continue # avoid duplication + informative_pairs.append((pair, tf)) + return informative_pairs - # the model is instantiated here - self.model = CustomModel(self.config) + def bot_start(self): + self.model = CustomModel(self.config) - print('Populating indicators...') + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + self.freqai_info = self.config['freqai'] - # the following loops are necessary for building the features - # indicated by the user in the configuration file. - for tf in self.freqai_info['timeframes']: - for i in self.freqai_info['corr_pairlist']: - dataframe = self.populate_any_indicators(i, - dataframe.copy(), tf, coin=i.split("/")[0]+'-') + # the following loops are necessary for building the features + # indicated by the user in the configuration file. + for tf in self.freqai_info['timeframes']: + for i in self.freqai_info['corr_pairlist']: + dataframe = self.populate_any_indicators(i, + dataframe.copy(), tf, coin=i.split("/")[0]+'-') - # the model will return 4 values, its prediction, an indication of whether or not the prediction - # should be accepted, the target mean/std values from the labels used during each training period. - (dataframe['prediction'], dataframe['do_predict'], - dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata) + # the model will return 4 values, its prediction, an indication of whether or not the prediction + # should be accepted, the target mean/std values from the labels used during each training period. + (dataframe['prediction'], dataframe['do_predict'], + dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata) - return dataframe + return dataframe ``` The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds @@ -314,7 +324,7 @@ data point and all other training data points: $$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$ -where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$ +where $d_{ab}$ is the distance between the normalized points $a$ and $b$. $p$ is the number of features i.e. the length of the vector $X$. The characteristic distance, $\overline{d}$ for a set of training data points is simply the mean of the average distances: @@ -392,13 +402,63 @@ The user can stratify the training/testing data using: which will split the data chronolocially so that every X data points is a testing data point. In the present example, the user is asking for every third data point in the dataframe to be used for -testing, the other points are used for training. +testing, the other points are used for training. + + + + ## Additional information -### Feature standardization +### Feature normalization -The feature set created by the user is automatically standardized to the training +The feature set created by the user is automatically normalized to the training data only. This includes all test data and unseen prediction data (dry/live/backtest). ### File structure diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index eafb9cc46..b5f1f6edb 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -141,9 +141,9 @@ class FreqaiDataKitchen: :model: User trained model which can be inferenced for new predictions """ - # if self.live: - self.model_filename = self.data_drawer.pair_dict[coin]['model_filename'] - self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path']) + if self.live: + self.model_filename = self.data_drawer.pair_dict[coin]['model_filename'] + self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path']) with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp: self.data = json.load(fp) @@ -329,42 +329,6 @@ class FreqaiDataKitchen: :data_dictionary: updated dictionary with standardized values. """ # standardize the data by training stats - train_mean = data_dictionary["train_features"].mean() - train_std = data_dictionary["train_features"].std() - data_dictionary["train_features"] = ( - data_dictionary["train_features"] - train_mean - ) / train_std - data_dictionary["test_features"] = ( - data_dictionary["test_features"] - train_mean - ) / train_std - - train_labels_std = data_dictionary["train_labels"].std() - train_labels_mean = data_dictionary["train_labels"].mean() - data_dictionary["train_labels"] = ( - data_dictionary["train_labels"] - train_labels_mean - ) / train_labels_std - data_dictionary["test_labels"] = ( - data_dictionary["test_labels"] - train_labels_mean - ) / train_labels_std - - for item in train_std.keys(): - self.data[item + "_std"] = train_std[item] - self.data[item + "_mean"] = train_mean[item] - - self.data["labels_std"] = train_labels_std - self.data["labels_mean"] = train_labels_mean - - return data_dictionary - - def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: - """ - Standardize all data in the data_dictionary according to the training dataset - :params: - :data_dictionary: dictionary containing the cleaned and split training/test data/labels - :returns: - :data_dictionary: updated dictionary with standardized values. - """ - # standardize the data by training stats train_max = data_dictionary["train_features"].max() train_min = data_dictionary["train_features"].min() data_dictionary["train_features"] = 2 * ( @@ -392,9 +356,9 @@ class FreqaiDataKitchen: return data_dictionary - def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame: + def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame: """ - Standardizes a set of data using the mean and standard deviation from + Normalize a set of data using the mean and standard deviation from the associated training data. :params: :df: Dataframe to be standardized @@ -406,19 +370,6 @@ class FreqaiDataKitchen: return df - def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame: - """ - Normalizes a set of data using the mean and standard deviation from - the associated training data. - :params: - :df: Dataframe to be standardized - """ - - for item in df.keys(): - df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"] - - return df - def split_timerange( self, tr: str, train_split: int = 28, bt_split: int = 7 ) -> Tuple[list, list]: @@ -657,12 +608,12 @@ class FreqaiDataKitchen: """ ones = np.ones(len_dataframe) - s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"] + target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"] self.full_predictions = np.append(self.full_predictions, predictions) self.full_do_predict = np.append(self.full_do_predict, do_predict) - self.full_target_mean = np.append(self.full_target_mean, s_mean) - self.full_target_std = np.append(self.full_target_std, s_std) + self.full_target_mean = np.append(self.full_target_mean, target_mean) + self.full_target_std = np.append(self.full_target_std, target_std) return @@ -827,6 +778,23 @@ class FreqaiDataKitchen: return dataframe + def fit_labels(self) -> None: + import scipy as spy + + f = spy.stats.norm.fit(self.data_dictionary["train_labels"]) + + # KEEPME incase we want to let user start to grab quantiles. + # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][ + # 'target_quantile'], *f) + # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][ + # 'target_quantile'], *f) + + self.data["target_mean"], self.data["target_std"] = f[0], f[1] + # self.data["upper_quantile"] = upper_q + # self.data["lower_quantile"] = lower_q + + return + def np_encoder(self, object): if isinstance(object, np.generic): return object.item() @@ -968,3 +936,52 @@ class FreqaiDataKitchen: # ) # return + + # def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: + # """ + # standardize all data in the data_dictionary according to the training dataset + # :params: + # :data_dictionary: dictionary containing the cleaned and split training/test data/labels + # :returns: + # :data_dictionary: updated dictionary with standardized values. + # """ + # # standardize the data by training stats + # train_mean = data_dictionary["train_features"].mean() + # train_std = data_dictionary["train_features"].std() + # data_dictionary["train_features"] = ( + # data_dictionary["train_features"] - train_mean + # ) / train_std + # data_dictionary["test_features"] = ( + # data_dictionary["test_features"] - train_mean + # ) / train_std + + # train_labels_std = data_dictionary["train_labels"].std() + # train_labels_mean = data_dictionary["train_labels"].mean() + # data_dictionary["train_labels"] = ( + # data_dictionary["train_labels"] - train_labels_mean + # ) / train_labels_std + # data_dictionary["test_labels"] = ( + # data_dictionary["test_labels"] - train_labels_mean + # ) / train_labels_std + + # for item in train_std.keys(): + # self.data[item + "_std"] = train_std[item] + # self.data[item + "_mean"] = train_mean[item] + + # self.data["labels_std"] = train_labels_std + # self.data["labels_mean"] = train_labels_mean + + # return data_dictionary + + # def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame: + # """ + # Normalizes a set of data using the mean and standard deviation from + # the associated training data. + # :params: + # :df: Dataframe to be standardized + # """ + + # for item in df.keys(): + # df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"] + + # return df diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index d7bbc549a..68d21ecdc 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -158,12 +158,7 @@ class IFreqaiModel(ABC): else: self.model = dh.load_data(metadata['pair']) - # strategy_provided_features = self.dh.find_features(dataframe_train) - # # FIXME doesnt work with PCA - # if strategy_provided_features != self.dh.training_features_list: - # logger.info("User changed input features, retraining model.") - # self.model = self.train(dataframe_train, metadata) - # self.dh.save_data(self.model) + self.check_if_feature_list_matches_strategy(dataframe_train, dh) preds, do_preds = self.predict(dataframe_backtest, dh) @@ -220,16 +215,23 @@ class IFreqaiModel(ABC): self.model = dh.load_data(coin=metadata['pair']) - # FIXME - # strategy_provided_features = dh.find_features(dataframe) - # if strategy_provided_features != dh.training_features_list: - # self.train_model_in_series(new_trained_timerange, metadata, strategy) + self.check_if_feature_list_matches_strategy(dataframe, dh) preds, do_preds = self.predict(dataframe, dh) dh.append_predictions(preds, do_preds, len(dataframe)) return dh + def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, + dh: FreqaiDataKitchen) -> None: + strategy_provided_features = dh.find_features(dataframe) + if strategy_provided_features != dh.training_features_list: + raise OperationalException("Trying to access pretrained model with `identifier` " + "but found different features furnished by current strategy." + "Change `identifer` to train from scratch, or ensure the" + "strategy is furnishing the same features as the pretrained" + "model") + def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None: """ Base data cleaning method for train @@ -237,6 +239,7 @@ class IFreqaiModel(ABC): based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example of how outlier data points are dropped from the dataframe used for training. """ + if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): dh.principal_component_analysis() diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 73ea46032..3f70400d8 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -33,10 +33,6 @@ class CatboostPredictionModel(IFreqaiModel): / dataframe["close"] - 1 ) - dh.data["s_mean"] = dataframe["s"].mean() - dh.data["s_std"] = dataframe["s"].std() - - # logger.info("label mean", dh.data["s_mean"], "label std", dh.data["s_std"]) return dataframe["s"] @@ -68,8 +64,9 @@ class CatboostPredictionModel(IFreqaiModel): # split data into train/test data. data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered) - # standardize all data based on train_dataset only - data_dictionary = dh.standardize_data(data_dictionary) + dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy + # normalize all data based on train_dataset only + data_dictionary = dh.normalize_data(data_dictionary) # optional additional data cleaning/analysis self.data_cleaning_train(dh) @@ -128,7 +125,7 @@ class CatboostPredictionModel(IFreqaiModel): filtered_dataframe, _ = dh.filter_features( unfiltered_dataframe, original_feature_list, training_filter=False ) - filtered_dataframe = dh.standardize_data_from_metadata(filtered_dataframe) + filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe) dh.data_dictionary["prediction_features"] = filtered_dataframe # optional additional data cleaning/analysis @@ -136,7 +133,7 @@ class CatboostPredictionModel(IFreqaiModel): predictions = self.model.predict(dh.data_dictionary["prediction_features"]) - # compute the non-standardized predictions + # compute the non-normalized predictions dh.predictions = (predictions + 1) * (dh.data["labels_max"] - dh.data["labels_min"]) / 2 + dh.data["labels_min"] diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index d2eb2c306..ed7c828cc 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy): dataframe["target_std"], ) = self.model.bridge.start(dataframe, metadata, self) - dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5 - dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1 + dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] + dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] return dataframe def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: