From 81f227cd9e82d321db4f0f70a54476576b246b8f Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 9 May 2022 17:01:49 +0200 Subject: [PATCH] create more flexible whitelist, avoid duplicating whitelist features into corr_pairlist, update docs --- config_examples/config_freqai.example.json | 5 +- docs/freqai.md | 82 ++++++++++++++++--- freqtrade/freqai/data_kitchen.py | 47 +++++++---- freqtrade/freqai/freqai_interface.py | 12 +-- freqtrade/templates/ExamplePredictionModel.py | 7 +- freqtrade/templates/FreqaiExampleStrategy.py | 5 +- 6 files changed, 119 insertions(+), 39 deletions(-) diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json index 351585d17..d89c835b1 100644 --- a/config_examples/config_freqai.example.json +++ b/config_examples/config_freqai.example.json @@ -24,7 +24,8 @@ "rateLimit": 200 }, "pair_whitelist": [ - "BTC/USDT" + "BTC/USDT", + "ETH/USDT" ], "pair_blacklist": [] }, @@ -55,7 +56,7 @@ ], "train_period": 30, "backtest_period": 7, - "identifier": "livetest5", + "identifier": "new_corrlist", "live_trained_timerange": "20220330-20220429", "live_full_backtestrange": "20220302-20220501", "base_features": [ diff --git a/docs/freqai.md b/docs/freqai.md index 844881613..431705dd9 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -65,8 +65,6 @@ config setup includes: "feature_parameters" : { "period": 24, "shift": 2, - "drop_features": false, - "DI_threshold": 1, "weight_factor": 0, }, "data_split_parameters" : { @@ -79,8 +77,7 @@ config setup includes: "learning_rate": 0.02, "task_type": "CPU", }, - }, - + } ``` ### Building the feature set @@ -153,8 +150,6 @@ The Freqai strategy requires the user to include the following lines of code in # the following loops are necessary for building the features # indicated by the user in the configuration file. for tf in self.freqai_info['timeframes']: - dataframe = self.populate_any_indicators(metadata['pair'], - dataframe.copy(), tf) for i in self.freqai_info['corr_pairlist']: dataframe = self.populate_any_indicators(i, dataframe.copy(), tf, coin=i.split("/")[0]+'-') @@ -177,8 +172,36 @@ and `make_labels()` to let them customize various aspects of their training proc ### Running the model live -TODO: Freqai is not automated for live yet. +Freqai can be run dry/live using the following command +```bash +freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel +``` + +By default, Freqai will not find find any existing models and will start by training a new one +given the user configuration settings. Following training, it will use that model to predict for the +duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain +a new model, and begin making predictions with the updated model. + +If the user wishes to start dry/live from a saved model, the following configuration +parameters need to be set: + +```json + "freqai": { + "identifier": "example", + "live_trained_timerange": "20220330-20220429", + "live_full_backtestrange": "20220302-20220501" + } +``` + +Where the `identifier` is the same identifier which was set during the backtesting/training. Meanwhile, +the `live_trained_timerange` is the sub-trained timerange (the training window) which was set +during backtesting/training. These are available to the user inside `user_data/models/*/sub-train-*`. +`live_full_backtestrange` was the full data range assocaited with the backtest/training (the full time +window that the training window and backtesting windows slide through). These values can be located +inside the `user_data/models/` directory. In this case, although Freqai will initiate with a +pretrained model, if a full `backtest_period` has elapsed since the end of the user set +`live_trained_timerange`, it will self retrain. ## Data anylsis techniques ### Controlling the model learning process @@ -226,12 +249,49 @@ $$ DI_k = d_k/\overline{d} $$ Equity and crypto markets suffer from a high level of non-patterned noise in the form of outlier data points. The dissimilarity index allows predictions which are outliers and not existent in the model feature space, to be thrown out due -to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase -or decrease the extrapolation of the trained model. +to low levels of certainty. Activating the Dissimilarity Index can be achieved with: + +```json + "freqai": { + "feature_parameters" : { + "DI_threshold": 1 + } + } +``` + +The user can tweak the DI with `DI_threshold` to increase or decrease the extrapolation of the +trained model. ### Reducing data dimensionality with Principal Component Analysis -TO BE WRITTEN +Users can reduce the dimensionality of their features by activating the `principal_component_analysis`: + +```json + "freqai": { + "feature_parameters" : { + "principal_component_analysis": true + } + } +``` + +Which will perform PCA on the features and reduce the dimensionality of the data so that the explained +variance of the data set is >= 0.999. + +### Removing outliers based on feature statistical distributions + +The user can tell Freqai to remove outlier data points from the trainig/test data sets by setting: + +```json + "freqai": { + "feature_parameters" : { + "remove_outliers": true + } + } +``` + +Freqai will check the statistical distributions of each feature (or component if the user activated +`principal_component_analysis`) and remove any data point that sits more than 3 standard deviations away +from the mean. ## Additional information ### Feature standardization @@ -242,5 +302,5 @@ data only. This includes all test data and unseen prediction data (dry/live/back ### File structure `user_data_dir/models/` contains all the data associated with the trainings and -backtestings. This file structure is heavily controlled and read by the `DataHandler()` +backtestings. This file structure is heavily controlled and read by the `FreqaiDataKitchen()` and should thus not be modified. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 7b6a65a59..961f26e57 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -485,7 +485,7 @@ class FreqaiDataKitchen: return - def build_feature_list(self, config: dict) -> list: + def build_feature_list(self, config: dict, metadata: dict) -> list: """ Build the list of features that will be used to filter the full dataframe. Feature list is construced from the @@ -501,8 +501,10 @@ class FreqaiDataKitchen: shift = "" if n > 0: shift = "_shift-" + str(n) - # features.append(ft + shift + "_" + tf) + features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf) for p in config["freqai"]["corr_pairlist"]: + if metadata['pair'] in p: + continue # avoid duplicate features features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) # logger.info("number of features %s", len(features)) @@ -640,9 +642,10 @@ class FreqaiDataKitchen: exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], self.config, validate=False) - pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']] + pairs = self.freqai_config['corr_pairlist'] + if metadata['pair'] not in pairs: + pairs += metadata['pair'] # dont include pair twice timerange = TimeRange.parse_timerange(new_timerange) - # data_handler = get_datahandler(datadir, data_format) refresh_backtest_ohlcv_data( exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'], @@ -656,33 +659,45 @@ class FreqaiDataKitchen: def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any], DataFrame]: corr_dataframes: Dict[Any, Any] = {} - # pair_dataframes: Dict[Any, Any] = {} + base_dataframes: Dict[Any, Any] = {} pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']] timerange = TimeRange.parse_timerange(new_timerange) - for p in pairs: - corr_dataframes[p] = {} - for tf in self.freqai_config['timeframes']: + for tf in self.freqai_config['timeframes']: + base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], + timeframe=tf, + pair=metadata['pair'], timerange=timerange) + for p in pairs: + if metadata['pair'] in p: + continue # dont repeat anything from whitelist + corr_dataframes[p] = {} corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'], timeframe=tf, pair=p, timerange=timerange) - base_dataframe = [dataframe for key, dataframe in corr_dataframes.items() - if metadata['pair'] in key] + # base_dataframe = [dataframe for key, dataframe in corr_dataframes.items() + # if metadata['pair'] in key] # [0] indexes the lowest tf for the basepair - return corr_dataframes, base_dataframe[0][self.config['timeframe']] + return corr_dataframes, base_dataframes - def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict, + def use_strategy_to_populate_indicators(self, strategy: IStrategy, corr_dataframes: dict, - dataframe: DataFrame) -> DataFrame: + base_dataframes: dict, + metadata: dict) -> DataFrame: - # dataframe = pair_dataframes[0] # this is the base tf pair df + dataframe = base_dataframes[self.config['timeframe']] for tf in self.freqai_config["timeframes"]: - # dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy, - # tf, pair_dataframes[tf]) + dataframe = strategy.populate_any_indicators(metadata['pair'], + dataframe.copy(), + tf, + base_dataframes[tf], + coin=metadata['pair'].split("/")[0] + "-" + ) for i in self.freqai_config["corr_pairlist"]: + if metadata['pair'] in i: + continue # dont repeat anything from whitelist dataframe = strategy.populate_any_indicators(i, dataframe.copy(), tf, diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 222061e2a..e019eb842 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -93,7 +93,7 @@ class IFreqaiModel(ABC): else: self.model = self.dh.load_data() - preds, do_preds = self.predict(dataframe_backtest) + preds, do_preds = self.predict(dataframe_backtest, metadata) self.dh.append_predictions(preds, do_preds, len(dataframe_backtest)) print('predictions', len(self.dh.full_predictions), @@ -120,13 +120,13 @@ class IFreqaiModel(ABC): if retrain or not file_exists: self.dh.download_new_data_for_retraining(new_trained_timerange, metadata) # dataframe = download-data - corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange, + corr_dataframes, base_dataframes = self.dh.load_pairs_histories(new_trained_timerange, metadata) unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy, - metadata, corr_dataframes, - pair_dataframes) + base_dataframes, + metadata) self.model = self.train(unfiltered_dataframe, metadata) self.dh.save_data(self.model) @@ -134,7 +134,7 @@ class IFreqaiModel(ABC): self.freqai_info self.model = self.dh.load_data() - preds, do_preds = self.predict(dataframe) + preds, do_preds = self.predict(dataframe, metadata) self.dh.append_predictions(preds, do_preds, len(dataframe)) # dataframe should have len 1 here @@ -175,7 +175,7 @@ class IFreqaiModel(ABC): return @abstractmethod - def predict(self, dataframe: DataFrame) -> Tuple[npt.ArrayLike, npt.ArrayLike]: + def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. diff --git a/freqtrade/templates/ExamplePredictionModel.py b/freqtrade/templates/ExamplePredictionModel.py index 08f9d2ba9..3db8d3aeb 100644 --- a/freqtrade/templates/ExamplePredictionModel.py +++ b/freqtrade/templates/ExamplePredictionModel.py @@ -53,7 +53,7 @@ class ExamplePredictionModel(IFreqaiModel): logger.info("--------------------Starting training--------------------") # create the full feature list based on user config info - self.dh.training_features_list = self.dh.build_feature_list(self.config) + self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata) unfiltered_labels = self.make_labels(unfiltered_dataframe) # filter the features requested by user in the configuration file and elegantly handle NaNs @@ -114,7 +114,8 @@ class ExamplePredictionModel(IFreqaiModel): return model - def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]: + def predict(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, + DataFrame]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. @@ -126,7 +127,7 @@ class ExamplePredictionModel(IFreqaiModel): # logger.info("--------------------Starting prediction--------------------") - original_feature_list = self.dh.build_feature_list(self.config) + original_feature_list = self.dh.build_feature_list(self.config, metadata) filtered_dataframe, _ = self.dh.filter_features( unfiltered_dataframe, original_feature_list, training_filter=False ) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 13309d8c3..45526e2ac 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -142,8 +142,11 @@ class FreqaiExampleStrategy(IStrategy): # the following loops are necessary for building the features # indicated by the user in the configuration file. for tf in self.freqai_info["timeframes"]: - # dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf) + dataframe = self.populate_any_indicators(self.pair, dataframe.copy(), tf, + coin=self.pair.split("/")[0] + "-") for pair in self.freqai_info["corr_pairlist"]: + if metadata['pair'] in pair: + continue # do not include whitelisted pair twice if it is in corr_pairlist dataframe = self.populate_any_indicators( pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-" )