From 8227b4aafe51b30e5942d293e8d0052c968442dd Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 27 Dec 2022 11:37:01 -0300 Subject: [PATCH 01/19] freqAI Strategy - improve user experience --- freqtrade/freqai/data_kitchen.py | 183 ++++++++++++++++++- freqtrade/strategy/interface.py | 40 ++++ freqtrade/templates/FreqaiExampleStrategy.py | 90 ++++++++- 3 files changed, 306 insertions(+), 7 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 9c8158c8a..c3e5929de 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,4 +1,5 @@ import copy +import inspect import logging import shutil from datetime import datetime, timezone @@ -23,6 +24,7 @@ from freqtrade.constants import Config from freqtrade.data.converter import reduce_dataframe_footprint from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds +from freqtrade.strategy import merge_informative_pair from freqtrade.strategy.interface import IStrategy @@ -1176,6 +1178,103 @@ class FreqaiDataKitchen: return dataframe + def get_pair_data_for_features(self, + pair: str, + tf: str, + strategy: IStrategy, + corr_dataframes: dict = {}, + base_dataframes: dict = {}, + is_corr_pairs: bool = False) -> DataFrame: + """ + Get the data for the pair. If it's not in the dictionary, get it from the data provider + :param pair: str = pair to get data for + :param tf: str = timeframe to get data for + :param strategy: IStrategy = user defined strategy object + :param corr_dataframes: dict = dict containing the df pair dataframes + (for user defined timeframes) + :param base_dataframes: dict = dict containing the current pair dataframes + (for user defined timeframes) + :param is_corr_pairs: bool = whether the pair is a corr pair or not + :return: dataframe = dataframe containing the pair data + """ + if is_corr_pairs: + dataframe = corr_dataframes[pair][tf] + if not dataframe.empty: + return dataframe + else: + dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf) + return dataframe + else: + dataframe = base_dataframes[tf] + if not dataframe.empty: + return dataframe + else: + dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf) + return dataframe + + def merge_features(self, df_main: DataFrame, df_to_merge: DataFrame, + tf: str, timeframe_inf: str, suffix: str) -> DataFrame: + """ + Merge the features of the dataframe and remove HLCV and date added columns + :param df_main: DataFrame = main dataframe + :param df_to_merge: DataFrame = dataframe to merge + :param tf: str = timeframe of the main dataframe + :param timeframe_inf: str = timeframe of the dataframe to merge + :param suffix: str = suffix to add to the columns of the dataframe to merge + :return: dataframe = merged dataframe + """ + dataframe = merge_informative_pair(df_main, df_to_merge, tf, timeframe_inf=timeframe_inf, + append_timeframe=False, suffix=suffix, ffill=True) + skip_columns = [ + (f"{s}_{suffix}") for s in ["date", "open", "high", "low", "close", "volume"] + ] + dataframe = dataframe.drop(columns=skip_columns) + return dataframe + + def populate_features(self, dataframe: DataFrame, pair: str, strategy: IStrategy, + corr_dataframes: dict, base_dataframes: dict, + is_corr_pairs: bool = False) -> DataFrame: + """ + Use the user defined strategy functions for populating features + :param dataframe: DataFrame = dataframe to populate + :param pair: str = pair to populate + :param strategy: IStrategy = user defined strategy object + :param corr_dataframes: dict = dict containing the df pair dataframes + :param base_dataframes: dict = dict containing the current pair dataframes + :param is_corr_pairs: bool = whether the pair is a corr pair or not + :return: dataframe = populated dataframe + """ + tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes") + + for tf in tfs: + informative_df = self.get_pair_data_for_features( + pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs) + informative_copy = informative_df.copy() + + for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: + df_features = strategy.freqai_feature_engineering_indicator_periods( + informative_copy.copy(), t) + suffix = f"{t}" + informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix) + + generic_df = strategy.freqai_feature_engineering_generic(informative_copy.copy()) + suffix = "gen" + + informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix) + + indicators = [col for col in informative_df if col.startswith("%")] + for n in range(self.freqai_config["feature_parameters"]["include_shifted_candles"] + 1): + if n == 0: + continue + df_shift = informative_df[indicators].shift(n) + df_shift = df_shift.add_suffix("_shift-" + str(n)) + informative_df = pd.concat((informative_df, df_shift), axis=1) + + dataframe = self.merge_features(dataframe.copy(), informative_df, + self.config["timeframe"], tf, f'{pair}_{tf}') + + return dataframe + def use_strategy_to_populate_indicators( self, strategy: IStrategy, @@ -1188,7 +1287,88 @@ class FreqaiDataKitchen: """ Use the user defined strategy for populating indicators during retrain :param strategy: IStrategy = user defined strategy object - :param corr_dataframes: dict = dict containing the informative pair dataframes + :param corr_dataframes: dict = dict containing the df pair dataframes + (for user defined timeframes) + :param base_dataframes: dict = dict containing the current pair dataframes + (for user defined timeframes) + :param pair: str = pair to populate + :param prediction_dataframe: DataFrame = dataframe containing the pair data + used for prediction + :param do_corr_pairs: bool = whether to populate corr pairs or not + :return: + dataframe: DataFrame = dataframe containing populated indicators + """ + + # this is a hack to check if the user is using the populate_any_indicators function + new_version = inspect.getsource(strategy.populate_any_indicators) == ( + inspect.getsource(IStrategy.populate_any_indicators)) + + if new_version: + tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes") + pairs: List[str] = self.freqai_config["feature_parameters"].get( + "include_corr_pairlist", []) + + if not prediction_dataframe.empty: + dataframe = prediction_dataframe.copy() + for tf in tfs: + base_dataframes[tf] = pd.DataFrame() + for p in pairs: + if p not in corr_dataframes: + corr_dataframes[p] = {} + corr_dataframes[p][tf] = pd.DataFrame() + else: + dataframe = base_dataframes[self.config["timeframe"]].copy() + + corr_pairs: List[str] = self.freqai_config["feature_parameters"].get( + "include_corr_pairlist", []) + dataframe = self.populate_features(dataframe.copy(), pair, strategy, + corr_dataframes, base_dataframes) + + # ensure corr pairs are always last + for corr_pair in corr_pairs: + if pair == corr_pair: + continue # dont repeat anything from whitelist + if corr_pairs and do_corr_pairs: + dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy, + corr_dataframes, base_dataframes, True) + + dataframe = strategy.freqai_feature_engineering_generalized_indicators(dataframe.copy()) + dataframe = strategy.freqai_set_targets(dataframe.copy()) + + self.get_unique_classes_from_labels(dataframe) + + dataframe = self.remove_special_chars_from_feature_names(dataframe) + + if self.config.get('reduce_df_footprint', False): + dataframe = reduce_dataframe_footprint(dataframe) + + return dataframe + + else: + # the user is using the populate_any_indicators functions which is deprecated + logger.warning("DEPRECATION WARNING: " + "You are using the deprecated populate_any_indicators function. " + "Please update your strategy to use " + "the new feature_engineering functions.") + + df = self.use_strategy_to_populate_indicators_old_version( + strategy, corr_dataframes, base_dataframes, pair, + prediction_dataframe, do_corr_pairs) + return df + + def use_strategy_to_populate_indicators_old_version( + self, + strategy: IStrategy, + corr_dataframes: dict = {}, + base_dataframes: dict = {}, + pair: str = "", + prediction_dataframe: DataFrame = pd.DataFrame(), + do_corr_pairs: bool = True, + ) -> DataFrame: + """ + Use the user defined strategy for populating indicators during retrain + :param strategy: IStrategy = user defined strategy object + :param corr_dataframes: dict = dict containing the df pair dataframes (for user defined timeframes) :param base_dataframes: dict = dict containing the current pair dataframes (for user defined timeframes) @@ -1212,6 +1392,7 @@ class FreqaiDataKitchen: corr_dataframes[p][tf] = None else: dataframe = base_dataframes[self.config["timeframe"]].copy() + # dataframe = strategy.dp.get_pair_dataframe(pair, self.config["timeframe"]) sgi = False for tf in tfs: diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py index 781ae6c5c..6bcc2a23f 100644 --- a/freqtrade/strategy/interface.py +++ b/freqtrade/strategy/interface.py @@ -598,6 +598,7 @@ class IStrategy(ABC, HyperStrategyMixin): informative: DataFrame = None, set_generalized_indicators: bool = False) -> DataFrame: """ + DEPRECATED - USE FEATURE ENGINEERING FUNCTIONS INSTEAD Function designed to automatically generate, name and merge features from user indicated timeframes in the configuration file. User can add additional features here, but must follow the naming convention. @@ -610,6 +611,45 @@ class IStrategy(ABC, HyperStrategyMixin): """ return df + def freqai_feature_engineering_indicator_periods(self, dataframe: DataFrame, + period: int, **kwargs): + """ + This function will be called for all include_timeframes in each indicator_periods_candles + (including corr_pairs). + After that, the features will be shifted by the number of candles in the + include_shifted_candles. + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) + """ + return dataframe + + def freqai_feature_engineering_generic(self, dataframe: DataFrame, **kwargs): + """ + This optional function will be called for all include_timeframes (including corr_pairs). + After that, the features will be shifted by the number of candles in the + include_shifted_candles. + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + """ + return dataframe + + def freqai_feature_engineering_generalized_indicators(self, dataframe: DataFrame, **kwargs): + """ + This optional function will be called once with the dataframe of the main timeframe. + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + return dataframe + + def freqai_set_targets(self, dataframe, **kwargs): + """ + Required function to set the targets for the model. + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + return dataframe + ### # END - Intended to be overridden by strategy ### diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index fc39b0ab4..323919a47 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -47,16 +47,94 @@ class FreqaiExampleStrategy(IStrategy): std_dev_multiplier_sell = CategoricalParameter( [0.75, 1, 1.25, 1.5, 1.75], space="sell", default=1.25, optimize=True) - def populate_any_indicators( + def freqai_feature_engineering_indicator_periods(self, dataframe, period, **kwargs): + """ + This function will be called for all include_timeframes in each indicator_periods_candles + (including corr_pairs). + After that, the features will be shifted by the number of candles in the + include_shifted_candles. + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) + """ + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) + dataframe["%-sma-period"] = ta.SMA(dataframe, timeperiod=period) + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) + + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(dataframe), window=period, stds=2.2 + ) + dataframe["bb_lowerband-period"] = bollinger["lower"] + dataframe["bb_middleband-period"] = bollinger["mid"] + dataframe["bb_upperband-period"] = bollinger["upper"] + + dataframe["%-bb_width-period"] = ( + dataframe["bb_upperband-period"] + - dataframe["bb_lowerband-period"] + ) / dataframe["bb_middleband-period"] + dataframe["%-close-bb_lower-period"] = ( + dataframe["close"] / dataframe["bb_lowerband-period"] + ) + + dataframe["%-roc-period"] = ta.ROC(dataframe, timeperiod=period) + + dataframe["%-relative_volume-period"] = ( + dataframe["volume"] / dataframe["volume"].rolling(period).mean() + ) + + return dataframe + + def freqai_feature_engineering_generic(self, dataframe, **kwargs): + """ + This optional function will be called for all include_timeframes (including corr_pairs). + After that, the features will be shifted by the number of candles in the + include_shifted_candles. + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + """ + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] + return dataframe + + def freqai_feature_engineering_generalized_indicators(self, dataframe, **kwargs): + """ + This optional function will be called once with the dataframe of the main timeframe. + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + dataframe["%-hour_of_day"] = (dataframe["date"].dt.hour + 1) / 25 + return dataframe + + def freqai_set_targets(self, dataframe, **kwargs): + """ + Required function to set the targets for the model. + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 + ) + return dataframe + + def populate_any_indicators_old( self, pair, df, tf, informative=None, set_generalized_indicators=False ): """ + DEPRECATED - USE FEATURE ENGINEERING FUNCTIONS INSTEAD Function designed to automatically generate, name and merge features - from user indicated timeframes in the configuration file. User controls the indicators - passed to the training/prediction by prepending indicators with `f'%-{pair}` - (see convention below). I.e. user should not prepend any supporting metrics - (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the - model. + from user indicated timeframes in the configuration file. User can add + additional features here, but must follow the naming convention. + This method is *only* used in FreqaiDataKitchen class and therefore + it is only called if FreqAI is active. :param pair: pair to be used as informative :param df: strategy dataframe which will receive merges from informatives :param tf: timeframe of the dataframe which will modify the feature names From c2936d551b8ad6ccf7b57e2ac6cb55d8550622cf Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 28 Dec 2022 13:25:40 +0100 Subject: [PATCH 02/19] improve doc, update test strats, change function names --- docs/freqai-configuration.md | 175 +++++++------- docs/freqai-feature-engineering.md | 194 +++++++++------ docs/freqai-parameter-table.md | 8 +- docs/freqai-reinforcement-learning.md | 58 ++--- .../RL/BaseReinforcementLearningModel.py | 4 +- freqtrade/freqai/data_kitchen.py | 8 +- freqtrade/strategy/interface.py | 79 +++++- freqtrade/templates/FreqaiExampleStrategy.py | 228 ++++++++---------- tests/freqai/test_freqai_datakitchen.py | 2 +- tests/strategy/strats/freqai_rl_test_strat.py | 60 ++--- .../strategy/strats/freqai_test_classifier.py | 61 ++--- ...freqai_test_multimodel_classifier_strat.py | 66 ++--- .../strats/freqai_test_multimodel_strat.py | 94 +++----- tests/strategy/strats/freqai_test_strat.py | 72 ++---- 14 files changed, 515 insertions(+), 594 deletions(-) diff --git a/docs/freqai-configuration.md b/docs/freqai-configuration.md index 10f5838c9..9d89800be 100644 --- a/docs/freqai-configuration.md +++ b/docs/freqai-configuration.md @@ -43,116 +43,113 @@ The FreqAI strategy requires including the following lines of code in the standa def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: - # the model will return all labels created by user in `populate_any_indicators` + # the model will return all labels created by user in `set_freqai_labels()` # (& appended targets), an indication of whether or not the prediction should be accepted, # the target mean/std values for each of the labels created by user in - # `populate_any_indicators()` for each training period. + # `feature_engineering_*` for each training period. dataframe = self.freqai.start(dataframe, metadata, self) return dataframe - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): """ - Function designed to automatically generate, name and merge features - from user indicated timeframes in the configuration file. User controls the indicators - passed to the training/prediction by prepending indicators with `'%-' + pair ` - (see convention below). I.e. user should not prepend any supporting metrics - (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the - model. - :param pair: pair to be used as informative - :param df: strategy dataframe which will receive merges from informatives - :param tf: timeframe of the dataframe which will modify the feature names - :param informative: the dataframe associated with the informative pair + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. + + All features must be prepended with `%` to be recognized by FreqAI internals. + + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) """ - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) + dataframe["%-sma-period"] = ta.SMA(dataframe, timeperiod=period) + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, window=t) + return dataframe - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + def feature_engineering_expand_basic(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: + All features must be prepended with `%` to be recognized by FreqAI internals. - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) + """ + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] + return dataframe + + def feature_engineering_standard(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + dataframe["%-hour_of_day"] = (dataframe["date"].dt.hour + 1) / 25 + return dataframe + + def set_freqai_targets(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 ) - - return df - - ``` -Notice how the `populate_any_indicators()` is where [features](freqai-feature-engineering.md#feature-engineering) and labels/targets are added. A full example strategy is available in `templates/FreqaiExampleStrategy.py`. - -Notice also the location of the labels under `if set_generalized_indicators:` at the bottom of the example. This is where single features and labels/targets should be added to the feature set to avoid duplication of them from various configuration parameters that multiply the feature set, such as `include_timeframes`. +Notice how the `feature_engineering_*()` is where [features](freqai-feature-engineering.md#feature-engineering) are added. Meanwhile `set_freqai_targets()` adds the labels/targets. A full example strategy is available in `templates/FreqaiExampleStrategy.py`. !!! Note The `self.freqai.start()` function cannot be called outside the `populate_indicators()`. !!! Note - Features **must** be defined in `populate_any_indicators()`. Defining FreqAI features in `populate_indicators()` - will cause the algorithm to fail in live/dry mode. In order to add generalized features that are not associated with a specific pair or timeframe, the following structure inside `populate_any_indicators()` should be used - (as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`): - - ```python - def populate_any_indicators(self, pair, df, tf, informative=None, set_generalized_indicators=False): - - ... - - # Add generalized indicators here (because in live, it will call only this function to populate - # indicators for retraining). Notice how we ensure not to add them multiple times by associating - # these generalized indicators to the basepair/timeframe - if set_generalized_indicators: - df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7 - df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25 - - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 - ) - ``` - - Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`. + Features **must** be defined in `feature_engineering_*()`. Defining FreqAI features in `populate_indicators()` + will cause the algorithm to fail in live/dry mode. In order to add generalized features that are not associated with a specific pair or timeframe, you should use `feature_engineering_standard()` + (as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`). ## Important dataframe key patterns @@ -160,11 +157,11 @@ Below are the values you can expect to include/use inside a typical strategy dat | DataFrame Key | Description | |------------|-------------| -| `df['&*']` | Any dataframe column prepended with `&` in `populate_any_indicators()` is treated as a training target (label) inside FreqAI (typically following the naming convention `&-s*`). For example, to predict the close price 40 candles into the future, you would set `df['&-s_close'] = df['close'].shift(-self.freqai_info["feature_parameters"]["label_period_candles"])` with `"label_period_candles": 40` in the config. FreqAI makes the predictions and gives them back under the same key (`df['&-s_close']`) to be used in `populate_entry/exit_trend()`.
**Datatype:** Depends on the output of the model. +| `df['&*']` | Any dataframe column prepended with `&` in `set_freqai_targets()` is treated as a training target (label) inside FreqAI (typically following the naming convention `&-s*`). For example, to predict the close price 40 candles into the future, you would set `df['&-s_close'] = df['close'].shift(-self.freqai_info["feature_parameters"]["label_period_candles"])` with `"label_period_candles": 40` in the config. FreqAI makes the predictions and gives them back under the same key (`df['&-s_close']`) to be used in `populate_entry/exit_trend()`.
**Datatype:** Depends on the output of the model. | `df['&*_std/mean']` | Standard deviation and mean values of the defined labels during training (or live tracking with `fit_live_predictions_candles`). Commonly used to understand the rarity of a prediction (use the z-score as shown in `templates/FreqaiExampleStrategy.py` and explained [here](#creating-a-dynamic-target-threshold) to evaluate how often a particular prediction was observed during training or historically with `fit_live_predictions_candles`).
**Datatype:** Float. | `df['do_predict']` | Indication of an outlier data point. The return value is integer between -2 and 2, which lets you know if the prediction is trustworthy or not. `do_predict==1` means that the prediction is trustworthy. If the Dissimilarity Index (DI, see details [here](freqai-feature-engineering.md#identifying-outliers-with-the-dissimilarity-index-di)) of the input data point is above the threshold defined in the config, FreqAI will subtract 1 from `do_predict`, resulting in `do_predict==0`. If `use_SVM_to_remove_outliers()` is active, the Support Vector Machine (SVM, see details [here](freqai-feature-engineering.md#identifying-outliers-using-a-support-vector-machine-svm)) may also detect outliers in training and prediction data. In this case, the SVM will also subtract 1 from `do_predict`. If the input data point was considered an outlier by the SVM but not by the DI, or vice versa, the result will be `do_predict==0`. If both the DI and the SVM considers the input data point to be an outlier, the result will be `do_predict==-1`. As with the SVM, if `use_DBSCAN_to_remove_outliers` is active, DBSCAN (see details [here](freqai-feature-engineering.md#identifying-outliers-with-dbscan)) may also detect outliers and subtract 1 from `do_predict`. Hence, if both the SVM and DBSCAN are active and identify a datapoint that was above the DI threshold as an outlier, the result will be `do_predict==-2`. A particular case is when `do_predict == 2`, which means that the model has expired due to exceeding `expired_hours`.
**Datatype:** Integer between -2 and 2. | `df['DI_values']` | Dissimilarity Index (DI) values are proxies for the level of confidence FreqAI has in the prediction. A lower DI means the prediction is close to the training data, i.e., higher prediction confidence. See details about the DI [here](freqai-feature-engineering.md#identifying-outliers-with-the-dissimilarity-index-di).
**Datatype:** Float. -| `df['%*']` | Any dataframe column prepended with `%` in `populate_any_indicators()` is treated as a training feature. For example, you can include the RSI in the training feature set (similar to in `templates/FreqaiExampleStrategy.py`) by setting `df['%-rsi']`. See more details on how this is done [here](freqai-feature-engineering.md).
**Note:** Since the number of features prepended with `%` can multiply very quickly (10s of thousands of features are easily engineered using the multiplictative functionality of, e.g., `include_shifted_candles` and `include_timeframes` as described in the [parameter table](freqai-parameter-table.md)), these features are removed from the dataframe that is returned from FreqAI to the strategy. To keep a particular type of feature for plotting purposes, you would prepend it with `%%`.
**Datatype:** Depends on the output of the model. +| `df['%*']` | Any dataframe column prepended with `%` in `feature_engineering_*()` is treated as a training feature. For example, you can include the RSI in the training feature set (similar to in `templates/FreqaiExampleStrategy.py`) by setting `df['%-rsi']`. See more details on how this is done [here](freqai-feature-engineering.md).
**Note:** Since the number of features prepended with `%` can multiply very quickly (10s of thousands of features are easily engineered using the multiplictative functionality of, e.g., `include_shifted_candles` and `include_timeframes` as described in the [parameter table](freqai-parameter-table.md)), these features are removed from the dataframe that is returned from FreqAI to the strategy. To keep a particular type of feature for plotting purposes, you would prepend it with `%%`.
**Datatype:** Depends on the output of the model. ## Setting the `startup_candle_count` diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index 3462955cc..e2089d947 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -2,96 +2,132 @@ ## Defining the features -Low level feature engineering is performed in the user strategy within a function called `populate_any_indicators()`. That function sets the `base features` such as, `RSI`, `MFI`, `EMA`, `SMA`, time of day, volume, etc. The `base features` can be custom indicators or they can be imported from any technical-analysis library that you can find. One important syntax rule is that all `base features` string names are prepended with `%-{pair}`, while labels/targets are prepended with `&`. +Low level feature engineering is performed in the user strategy within a set of functions called `feature_engineering_*`. These function set the `base features` such as, `RSI`, `MFI`, `EMA`, `SMA`, time of day, volume, etc. The `base features` can be custom indicators or they can be imported from any technical-analysis library that you can find. One important syntax rule is that all `base features` string names defined within `feature_engineering_*` functions must be prepended with `%-{pair}`. FreqAI is equipped with a set of functions to simplify rapid large-scale feature engineering: + +| Function | Description | +|---------------|-------------| +| `feature_engineering__expand_all()` | This optional function will automatically expand the defined features on the config defined `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. +| `feature_engineering__expand_basic()` | This optional function will automatically expand the defined features on the config defined `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. Note: this function does *not* expand across `include_periods_candles`. +| `feature_engineering_standard()` | This optional function will be called once with the dataframe of the base timeframe. This is the final function to be called, which means that the dataframe entering this function will contain all the features and columns created by all other `feature_engineering_expand` functions. This function is a good place to do custom exotic feature extractions (e.g. tsfresh). This function is also a good place for any feature that should not be auto-expanded upon (e.g. day of the week). +| `set_freqai_targets()` | Required function to set the targets for the model. All targets must be prepended with `&` to be recognized by the FreqAI internals. + !!! Note Adding the full pair string, e.g. XYZ/USD, in the feature name enables improved performance for dataframe caching on the backend. If you decide *not* to add the full pair string in the feature string, FreqAI will operate in a reduced performance mode. Meanwhile, high level feature engineering is handled within `"feature_parameters":{}` in the FreqAI config. Within this file, it is possible to decide large scale feature expansions on top of the `base_features` such as "including correlated pairs" or "including informative timeframes" or even "including recent candles." -It is advisable to start from the template `populate_any_indicators()` in the source provided example strategy (found in `templates/FreqaiExampleStrategy.py`) to ensure that the feature definitions are following the correct conventions. Here is an example of how to set the indicators and labels in the strategy: +It is advisable to start from the template `feature_engineering_*` functions in the source provided example strategy (found in `templates/FreqaiExampleStrategy.py`) to ensure that the feature definitions are following the correct conventions. Here is an example of how to set the indicators and labels in the strategy: ```python - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): """ - Function designed to automatically generate, name, and merge features - from user-indicated timeframes in the configuration file. The user controls the indicators - passed to the training/prediction by prepending indicators with `'%-' + pair ` - (see convention below). I.e., the user should not prepend any supporting metrics - (e.g., bb_lowerband below) with % unless they explicitly want to pass that metric to the - model. - :param pair: pair to be used as informative - :param df: strategy dataframe which will receive merges from informatives - :param tf: timeframe of the dataframe which will modify the feature names - :param informative: the dataframe associated with the informative pair + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. + + All features must be prepended with `%` to be recognized by FreqAI internals. + + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) """ - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) + dataframe["%-sma-period"] = ta.SMA(dataframe, timeperiod=period) + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, window=t) + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(dataframe), window=period, stds=2.2 + ) + dataframe["bb_lowerband-period"] = bollinger["lower"] + dataframe["bb_middleband-period"] = bollinger["mid"] + dataframe["bb_upperband-period"] = bollinger["upper"] - bollinger = qtpylib.bollinger_bands( - qtpylib.typical_price(informative), window=t, stds=2.2 + dataframe["%-bb_width-period"] = ( + dataframe["bb_upperband-period"] + - dataframe["bb_lowerband-period"] + ) / dataframe["bb_middleband-period"] + dataframe["%-close-bb_lower-period"] = ( + dataframe["close"] / dataframe["bb_lowerband-period"] + ) + + dataframe["%-roc-period"] = ta.ROC(dataframe, timeperiod=period) + + dataframe["%-relative_volume-period"] = ( + dataframe["volume"] / dataframe["volume"].rolling(period).mean() + ) + + return dataframe + + def feature_engineering_expand_basic(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. + + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` + + All features must be prepended with `%` to be recognized by FreqAI internals. + + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) + """ + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] + return dataframe + + def feature_engineering_standard(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + dataframe["%-hour_of_day"] = (dataframe["date"].dt.hour + 1) / 25 + return dataframe + + def set_freqai_targets(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 ) - informative[f"{pair}bb_lowerband-period_{t}"] = bollinger["lower"] - informative[f"{pair}bb_middleband-period_{t}"] = bollinger["mid"] - informative[f"{pair}bb_upperband-period_{t}"] = bollinger["upper"] - - informative[f"%-{pair}bb_width-period_{t}"] = ( - informative[f"{pair}bb_upperband-period_{t}"] - - informative[f"{pair}bb_lowerband-period_{t}"] - ) / informative[f"{pair}bb_middleband-period_{t}"] - informative[f"%-{pair}close-bb_lower-period_{t}"] = ( - informative["close"] / informative[f"{pair}bb_lowerband-period_{t}"] - ) - - informative[f"%-{pair}relative_volume-period_{t}"] = ( - informative["volume"] / informative["volume"].rolling(t).mean() - ) - - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) - - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) - - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 - - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 - ) - - return df ``` In the presented example, the user does not wish to pass the `bb_lowerband` as a feature to the model, @@ -118,13 +154,13 @@ After having defined the `base features`, the next step is to expand upon them u } ``` -The `include_timeframes` in the config above are the timeframes (`tf`) of each call to `populate_any_indicators()` in the strategy. In the presented case, the user is asking for the `5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, and `bb_width` to be included in the feature set. +The `include_timeframes` in the config above are the timeframes (`tf`) of each call to `feature_engineering_expand_*()` in the strategy. In the presented case, the user is asking for the `5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, and `bb_width` to be included in the feature set. -You can ask for each of the defined features to be included also for informative pairs using the `include_corr_pairlist`. This means that the feature set will include all the features from `populate_any_indicators` on all the `include_timeframes` for each of the correlated pairs defined in the config (`ETH/USD`, `LINK/USD`, and `BNB/USD` in the presented example). +You can ask for each of the defined features to be included also for informative pairs using the `include_corr_pairlist`. This means that the feature set will include all the features from `feature_engineering_expand_*()` on all the `include_timeframes` for each of the correlated pairs defined in the config (`ETH/USD`, `LINK/USD`, and `BNB/USD` in the presented example). `include_shifted_candles` indicates the number of previous candles to include in the feature set. For example, `include_shifted_candles: 2` tells FreqAI to include the past 2 candles for each of the features in the feature set. -In total, the number of features the user of the presented example strat has created is: length of `include_timeframes` * no. features in `populate_any_indicators()` * length of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles` +In total, the number of features the user of the presented example strat has created is: length of `include_timeframes` * no. features in `feature_engineering_expand_*()` * length of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles` $= 3 * 3 * 3 * 2 * 2 = 108$. ### Returning additional info from training diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index d05ce80f3..aa7f5cc64 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -29,12 +29,12 @@ Mandatory parameters are marked as **Required** and have to be set in one of the |------------|-------------| | | **Feature parameters within the `freqai.feature_parameters` sub dictionary** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](freqai-feature-engineering.md).
**Datatype:** Dictionary. -| `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base indicators dataset.
**Datatype:** List of timeframes (strings). -| `include_corr_pairlist` | A list of correlated coins that FreqAI will add as additional features to all `pair_whitelist` coins. All indicators set in `populate_any_indicators` during feature engineering (see details [here](freqai-feature-engineering.md)) will be created for each correlated coin. The correlated coins features are added to the base indicators dataset.
**Datatype:** List of assets (strings). -| `label_period_candles` | Number of candles into the future that the labels are created for. This is used in `populate_any_indicators` (see `templates/FreqaiExampleStrategy.py` for detailed usage). You can create custom labels and choose whether to make use of this parameter or not.
**Datatype:** Positive integer. +| `include_timeframes` | A list of timeframes that all indicators in `feature_engineering_expand_*()` will be created for. The list is added as features to the base indicators dataset.
**Datatype:** List of timeframes (strings). +| `include_corr_pairlist` | A list of correlated coins that FreqAI will add as additional features to all `pair_whitelist` coins. All indicators set in `feature_engineering_expand_*()` during feature engineering (see details [here](freqai-feature-engineering.md)) will be created for each correlated coin. The correlated coins features are added to the base indicators dataset.
**Datatype:** List of assets (strings). +| `label_period_candles` | Number of candles into the future that the labels are created for. This is used in `feature_engineering_expand_all()` (see `templates/FreqaiExampleStrategy.py` for detailed usage). You can create custom labels and choose whether to make use of this parameter or not.
**Datatype:** Positive integer. | `include_shifted_candles` | Add features from previous candles to subsequent candles with the intent of adding historical information. If used, FreqAI will duplicate and shift all features from the `include_shifted_candles` previous candles so that the information is available for the subsequent candle.
**Datatype:** Positive integer. | `weight_factor` | Weight training data points according to their recency (see details [here](freqai-feature-engineering.md#weighting-features-for-temporal-importance)).
**Datatype:** Positive float (typically < 1). -| `indicator_max_period_candles` | **No longer used (#7325)**. Replaced by `startup_candle_count` which is set in the [strategy](freqai-configuration.md#building-a-freqai-strategy). `startup_candle_count` is timeframe independent and defines the maximum *period* used in `populate_any_indicators()` for indicator creation. FreqAI uses this parameter together with the maximum timeframe in `include_time_frames` to calculate how many data points to download such that the first data point does not include a NaN.
**Datatype:** Positive integer. +| `indicator_max_period_candles` | **No longer used (#7325)**. Replaced by `startup_candle_count` which is set in the [strategy](freqai-configuration.md#building-a-freqai-strategy). `startup_candle_count` is timeframe independent and defines the maximum *period* used in `feature_engineering_*()` for indicator creation. FreqAI uses this parameter together with the maximum timeframe in `include_time_frames` to calculate how many data points to download such that the first data point does not include a NaN.
**Datatype:** Positive integer. | `indicator_periods_candles` | Time periods to calculate indicators for. The indicators are added to the base indicator dataset.
**Datatype:** List of positive integers. | `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis)
**Datatype:** Boolean.
Default: `False`. | `plot_feature_importances` | Create a feature importance plot for each model for the top/bottom `plot_feature_importances` number of features. Plot is stored in `user_data/models//sub-train-_.html`.
**Datatype:** Integer.
Default: `0`. diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md index 22772c2ec..a09b4c5d0 100644 --- a/docs/freqai-reinforcement-learning.md +++ b/docs/freqai-reinforcement-learning.md @@ -34,55 +34,25 @@ Setting up and running a Reinforcement Learning model is the same as running a R freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json ``` -where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor: +where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `feature_engineering_*` as a typical Regressor. The difference lies in the creation of the targets, Reinforcement Learning doesnt require them. However, FreqAI requires a default (neutral) value to be set in the action column: ```python - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def set_freqai_targets(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + More details about feature engineering available: - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + https://www.freqtrade.io/en/latest/freqai-feature-engineering - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, window=t) - - # The following raw price values are necessary for RL models - informative[f"%-{pair}raw_close"] = informative["close"] - informative[f"%-{pair}raw_open"] = informative["open"] - informative[f"%-{pair}raw_high"] = informative["high"] - informative[f"%-{pair}raw_low"] = informative["low"] - - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) - - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) - - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - - # For RL, there are no direct targets to set. This is filler (neutral) - # until the agent sends an action. - df["&-action"] = 0 - - return df + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + # For RL, there are no direct targets to set. This is filler (neutral) + # until the agent sends an action. + df["&-action"] = 0 ``` Most of the function remains the same as for typical Regressors, however, the function above shows how the strategy must pass the raw price data to the agent so that it has access to raw OHLCV in the training environment: diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index af0726c0b..c091ce451 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -290,8 +290,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): prices_train = train_df.filter(ohlc_list, axis=1) if prices_train.empty: raise OperationalException('Reinforcement learning module didnt find the raw prices ' - 'assigned in populate_any_indicators. Please assign them ' - 'with:\n' + 'assigned in feature_engineering_standard(). ' + 'Please assign them with:\n' 'informative[f"%-{pair}raw_close"] = informative["close"]\n' 'informative[f"%-{pair}raw_open"] = informative["open"]\n' 'informative[f"%-{pair}raw_high"] = informative["high"]\n' diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index c3e5929de..3e9a8fed2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1252,12 +1252,12 @@ class FreqaiDataKitchen: informative_copy = informative_df.copy() for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: - df_features = strategy.freqai_feature_engineering_indicator_periods( + df_features = strategy.feature_engineering_expand_all( informative_copy.copy(), t) suffix = f"{t}" informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix) - generic_df = strategy.freqai_feature_engineering_generic(informative_copy.copy()) + generic_df = strategy.feature_engineering_expand_basic(informative_copy.copy()) suffix = "gen" informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix) @@ -1332,8 +1332,8 @@ class FreqaiDataKitchen: dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy, corr_dataframes, base_dataframes, True) - dataframe = strategy.freqai_feature_engineering_generalized_indicators(dataframe.copy()) - dataframe = strategy.freqai_set_targets(dataframe.copy()) + dataframe = strategy.feature_engineering_standard(dataframe.copy()) + dataframe = strategy.set_freqai_targets(dataframe.copy()) self.get_unique_classes_from_labels(dataframe) diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py index 6bcc2a23f..50ae2341e 100644 --- a/freqtrade/strategy/interface.py +++ b/freqtrade/strategy/interface.py @@ -611,40 +611,93 @@ class IStrategy(ABC, HyperStrategyMixin): """ return df - def freqai_feature_engineering_indicator_periods(self, dataframe: DataFrame, - period: int, **kwargs): + def feature_engineering_expand_all(self, dataframe: DataFrame, + period: int, **kwargs): """ - This function will be called for all include_timeframes in each indicator_periods_candles - (including corr_pairs). - After that, the features will be shifted by the number of candles in the - include_shifted_candles. + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + :param df: strategy dataframe which will receive the features :param period: period of the indicator - usage example: dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) """ return dataframe - def freqai_feature_engineering_generic(self, dataframe: DataFrame, **kwargs): + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): """ - This optional function will be called for all include_timeframes (including corr_pairs). - After that, the features will be shifted by the number of candles in the - include_shifted_candles. + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. + + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + :param df: strategy dataframe which will receive the features dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) """ return dataframe - def freqai_feature_engineering_generalized_indicators(self, dataframe: DataFrame, **kwargs): + def feature_engineering_standard(self, dataframe: DataFrame, **kwargs): """ - This optional function will be called once with the dataframe of the main timeframe. + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + :param df: strategy dataframe which will receive the features usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 """ return dataframe - def freqai_set_targets(self, dataframe, **kwargs): + def set_freqai_targets(self, dataframe, **kwargs): """ + *Only functional with FreqAI enabled strategies* Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + :param df: strategy dataframe which will receive the targets usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] """ diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 323919a47..0c5d74ca8 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -1,12 +1,11 @@ import logging from functools import reduce -import pandas as pd import talib.abstract as ta from pandas import DataFrame from technical import qtpylib -from freqtrade.strategy import CategoricalParameter, IStrategy, merge_informative_pair +from freqtrade.strategy import CategoricalParameter, IStrategy logger = logging.getLogger(__name__) @@ -18,8 +17,8 @@ class FreqaiExampleStrategy(IStrategy): IFreqaiModel to the strategy. Namely, the user uses: self.freqai.start(dataframe, metadata) - to make predictions on their data. populate_any_indicators() automatically - generates the variety of features indicated by the user in the + to make predictions on their data. feature_engineering_*() automatically + generate the variety of features indicated by the user in the canonical freqtrade configuration file under config['freqai']. """ @@ -47,16 +46,30 @@ class FreqaiExampleStrategy(IStrategy): std_dev_multiplier_sell = CategoricalParameter( [0.75, 1, 1.25, 1.5, 1.75], space="sell", default=1.25, optimize=True) - def freqai_feature_engineering_indicator_periods(self, dataframe, period, **kwargs): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): """ - This function will be called for all include_timeframes in each indicator_periods_candles - (including corr_pairs). - After that, the features will be shifted by the number of candles in the - include_shifted_candles. + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + :param df: strategy dataframe which will receive the features :param period: period of the indicator - usage example: dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) """ + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) @@ -86,32 +99,72 @@ class FreqaiExampleStrategy(IStrategy): return dataframe - def freqai_feature_engineering_generic(self, dataframe, **kwargs): + def feature_engineering_expand_basic(self, dataframe, **kwargs): """ - This optional function will be called for all include_timeframes (including corr_pairs). - After that, the features will be shifted by the number of candles in the - include_shifted_candles. + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. + + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + :param df: strategy dataframe which will receive the features dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) """ dataframe["%-pct-change"] = dataframe["close"].pct_change() dataframe["%-raw_volume"] = dataframe["volume"] dataframe["%-raw_price"] = dataframe["close"] return dataframe - def freqai_feature_engineering_generalized_indicators(self, dataframe, **kwargs): + def feature_engineering_standard(self, dataframe, **kwargs): """ - This optional function will be called once with the dataframe of the main timeframe. + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + :param df: strategy dataframe which will receive the features usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 """ - dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 - dataframe["%-hour_of_day"] = (dataframe["date"].dt.hour + 1) / 25 + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour return dataframe - def freqai_set_targets(self, dataframe, **kwargs): + def set_freqai_targets(self, dataframe, **kwargs): """ + *Only functional with FreqAI enabled strategies* Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + :param df: strategy dataframe which will receive the targets usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] """ @@ -123,128 +176,41 @@ class FreqaiExampleStrategy(IStrategy): / dataframe["close"] - 1 ) + + # Classifiers are typically set up with strings as targets: + # df['&s-up_or_down'] = np.where( df["close"].shift(-100) > + # df["close"], 'up', 'down') + + # If user wishes to use multiple targets, they can add more by + # appending more columns with '&'. User should keep in mind that multi targets + # requires a multioutput prediction model such as + # templates/CatboostPredictionMultiModel.py, + + # df["&-s_range"] = ( + # df["close"] + # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + # .max() + # - + # df["close"] + # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + # .min() + # ) + return dataframe - def populate_any_indicators_old( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): - """ - DEPRECATED - USE FEATURE ENGINEERING FUNCTIONS INSTEAD - Function designed to automatically generate, name and merge features - from user indicated timeframes in the configuration file. User can add - additional features here, but must follow the naming convention. - This method is *only* used in FreqaiDataKitchen class and therefore - it is only called if FreqAI is active. - :param pair: pair to be used as informative - :param df: strategy dataframe which will receive merges from informatives - :param tf: timeframe of the dataframe which will modify the feature names - :param informative: the dataframe associated with the informative pair - """ - - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) - - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: - - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, timeperiod=t) - informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t) - informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t) - - bollinger = qtpylib.bollinger_bands( - qtpylib.typical_price(informative), window=t, stds=2.2 - ) - informative[f"{pair}bb_lowerband-period_{t}"] = bollinger["lower"] - informative[f"{pair}bb_middleband-period_{t}"] = bollinger["mid"] - informative[f"{pair}bb_upperband-period_{t}"] = bollinger["upper"] - - informative[f"%-{pair}bb_width-period_{t}"] = ( - informative[f"{pair}bb_upperband-period_{t}"] - - informative[f"{pair}bb_lowerband-period_{t}"] - ) / informative[f"{pair}bb_middleband-period_{t}"] - informative[f"%-{pair}close-bb_lower-period_{t}"] = ( - informative["close"] / informative[f"{pair}bb_lowerband-period_{t}"] - ) - - informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative, timeperiod=t) - - informative[f"%-{pair}relative_volume-period_{t}"] = ( - informative["volume"] / informative["volume"].rolling(t).mean() - ) - - informative[f"%-{pair}pct-change"] = informative["close"].pct_change() - informative[f"%-{pair}raw_volume"] = informative["volume"] - informative[f"%-{pair}raw_price"] = informative["close"] - - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) - - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) - - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 - - # user adds targets here by prepending them with &- (see convention below) - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 - ) - - # Classifiers are typically set up with strings as targets: - # df['&s-up_or_down'] = np.where( df["close"].shift(-100) > - # df["close"], 'up', 'down') - - # If user wishes to use multiple targets, they can add more by - # appending more columns with '&'. User should keep in mind that multi targets - # requires a multioutput prediction model such as - # templates/CatboostPredictionMultiModel.py, - - # df["&-s_range"] = ( - # df["close"] - # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - # .max() - # - - # df["close"] - # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - # .min() - # ) - - return df - def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: - # All indicators must be populated by populate_any_indicators() for live functionality - # to work correctly. + # All indicators must be populated by feature_engineering_*() functions - # the model will return all labels created by user in `populate_any_indicators` + # the model will return all labels created by user in `feature_engineering_*` # (& appended targets), an indication of whether or not the prediction should be accepted, # the target mean/std values for each of the labels created by user in - # `populate_any_indicators()` for each training period. + # `set_freqai_targets()` for each training period. dataframe = self.freqai.start(dataframe, metadata, self) + for val in self.std_dev_multiplier_buy.range: dataframe[f'target_roi_{val}'] = ( dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * val diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 0dc897916..f322e4165 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -82,7 +82,7 @@ def test_compute_distances(mocker, freqai_conf): freqai = make_data_dictionary(mocker, freqai_conf) freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1}) avg_mean_dist = freqai.dk.compute_distances() - assert round(avg_mean_dist, 2) == 1.99 + assert round(avg_mean_dist, 2) == 1.98 def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog): diff --git a/tests/strategy/strats/freqai_rl_test_strat.py b/tests/strategy/strats/freqai_rl_test_strat.py index f32a4adca..7f8872d8b 100644 --- a/tests/strategy/strats/freqai_rl_test_strat.py +++ b/tests/strategy/strats/freqai_rl_test_strat.py @@ -1,11 +1,10 @@ import logging from functools import reduce -import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import IStrategy, merge_informative_pair +from freqtrade.strategy import IStrategy logger = logging.getLogger(__name__) @@ -25,49 +24,36 @@ class freqai_rl_test_strat(IStrategy): startup_candle_count: int = 30 can_short = False - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + return dataframe - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): - # The following columns are necessary for RL models. - informative[f"%-{pair}raw_close"] = informative["close"] - informative[f"%-{pair}raw_open"] = informative["open"] - informative[f"%-{pair}raw_high"] = informative["high"] - informative[f"%-{pair}raw_low"] = informative["low"] + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + dataframe["%-raw_close"] = dataframe["close"] + dataframe["%-raw_open"] = dataframe["open"] + dataframe["%-raw_high"] = dataframe["high"] + dataframe["%-raw_low"] = dataframe["low"] - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + return dataframe - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - # For RL, there are no direct targets to set. This is filler (neutral) - # until the agent sends an action. - df["&-action"] = 0 + def feature_engineering_standard(self, dataframe, **kwargs): - return df + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour + + return dataframe + + def set_freqai_targets(self, dataframe, **kwargs): + + dataframe["&-action"] = 0 + + return dataframe def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: diff --git a/tests/strategy/strats/freqai_test_classifier.py b/tests/strategy/strats/freqai_test_classifier.py index a1e8cb6bf..02427ab59 100644 --- a/tests/strategy/strats/freqai_test_classifier.py +++ b/tests/strategy/strats/freqai_test_classifier.py @@ -2,11 +2,10 @@ import logging from functools import reduce import numpy as np -import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy logger = logging.getLogger(__name__) @@ -57,55 +56,35 @@ class freqai_test_classifier(IStrategy): informative_pairs.append((pair, tf)) return informative_pairs - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): - coin = pair.split('/')[0] + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + return dataframe - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): - t = int(t) - informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] - informative[f"%-{coin}pct-change"] = informative["close"].pct_change() - informative[f"%-{coin}raw_volume"] = informative["volume"] - informative[f"%-{coin}raw_price"] = informative["close"] + return dataframe - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + def feature_engineering_standard(self, dataframe, **kwargs): - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + return dataframe - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df['&s-up_or_down'] = np.where(df["close"].shift(-100) > df["close"], 'up', 'down') + def set_freqai_targets(self, dataframe, **kwargs): - return df + dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) > + dataframe["close"], 'up', 'down') + + return dataframe def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: diff --git a/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py b/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py index 9188fa331..65f2e4540 100644 --- a/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py +++ b/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py @@ -2,11 +2,10 @@ import logging from functools import reduce import numpy as np -import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy logger = logging.getLogger(__name__) @@ -44,59 +43,38 @@ class freqai_test_multimodel_classifier_strat(IStrategy): ) max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True) - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): - coin = pair.split('/')[0] + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + return dataframe - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): - t = int(t) - informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] - informative[f"%-{coin}pct-change"] = informative["close"].pct_change() - informative[f"%-{coin}raw_volume"] = informative["volume"] - informative[f"%-{coin}raw_price"] = informative["close"] + return dataframe - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + def feature_engineering_standard(self, dataframe, **kwargs): - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + return dataframe - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df['&s-up_or_down'] = np.where(df["close"].shift(-50) > - df["close"], 'up', 'down') + def set_freqai_targets(self, dataframe, **kwargs): - df['&s-up_or_down2'] = np.where(df["close"].shift(-50) > - df["close"], 'up2', 'down2') + dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-50) > + dataframe["close"], 'up', 'down') - return df + dataframe['&s-up_or_down2'] = np.where(dataframe["close"].shift(-50) > + dataframe["close"], 'up2', 'down2') + + return dataframe def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: diff --git a/tests/strategy/strats/freqai_test_multimodel_strat.py b/tests/strategy/strats/freqai_test_multimodel_strat.py index ada4b25f0..5c9712629 100644 --- a/tests/strategy/strats/freqai_test_multimodel_strat.py +++ b/tests/strategy/strats/freqai_test_multimodel_strat.py @@ -1,11 +1,10 @@ import logging from functools import reduce -import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy logger = logging.getLogger(__name__) @@ -43,74 +42,53 @@ class freqai_test_multimodel_strat(IStrategy): ) max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True) - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): - coin = pair.split('/')[0] + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + return dataframe - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): - t = int(t) - informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] - informative[f"%-{coin}pct-change"] = informative["close"].pct_change() - informative[f"%-{coin}raw_volume"] = informative["volume"] - informative[f"%-{coin}raw_price"] = informative["close"] + return dataframe - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + def feature_engineering_standard(self, dataframe, **kwargs): - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + return dataframe - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 + def set_freqai_targets(self, dataframe, **kwargs): + + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 ) - df["&-s_range"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .max() - - - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .min() - ) + dataframe["&-s_range"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .max() + - + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .min() + ) - return df + return dataframe def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: diff --git a/tests/strategy/strats/freqai_test_strat.py b/tests/strategy/strats/freqai_test_strat.py index cdfb7f4d0..b52c95908 100644 --- a/tests/strategy/strats/freqai_test_strat.py +++ b/tests/strategy/strats/freqai_test_strat.py @@ -1,11 +1,10 @@ import logging from functools import reduce -import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy logger = logging.getLogger(__name__) @@ -43,62 +42,41 @@ class freqai_test_strat(IStrategy): ) max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True) - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): - coin = pair.split('/')[0] + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + return dataframe - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + def feature_engineering_expand_basic(self, dataframe: DataFrame, **kwargs): - t = int(t) - informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] - informative[f"%-{coin}pct-change"] = informative["close"].pct_change() - informative[f"%-{coin}raw_volume"] = informative["volume"] - informative[f"%-{coin}raw_price"] = informative["close"] + return dataframe - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + def feature_engineering_standard(self, dataframe, **kwargs): - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour - # Add generalized indicators here (because in live, it will call this - # function to populate indicators during training). Notice how we ensure not to - # add them multiple times - if set_generalized_indicators: - df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 - df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + return dataframe - # user adds targets here by prepending them with &- (see convention below) - # If user wishes to use multiple targets, a multioutput prediction model - # needs to be used such as templates/CatboostPredictionMultiModel.py - df["&-s_close"] = ( - df["close"] - .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) - .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) - .mean() - / df["close"] - - 1 + def set_freqai_targets(self, dataframe, **kwargs): + + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 ) - return df + return dataframe def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: From 6f7eb71bbb35c73b38ec1332d38e622f1d74d5ff Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 28 Dec 2022 14:52:33 +0100 Subject: [PATCH 03/19] ensure RL works with new naming scheme --- .../RL/BaseReinforcementLearningModel.py | 20 +++++++++++-------- tests/freqai/test_freqai_datakitchen.py | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index c091ce451..4ef2ca9bf 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -280,22 +280,26 @@ class BaseReinforcementLearningModel(IFreqaiModel): train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] + # %-raw_volume_gen_shift-2_ETH/USDT_1h # price data for model training and evaluation tf = self.config['timeframe'] - ohlc_list = [f'%-{pair}raw_open_{tf}', f'%-{pair}raw_low_{tf}', - f'%-{pair}raw_high_{tf}', f'%-{pair}raw_close_{tf}'] - rename_dict = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low', - f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'} + ohlc_list = [f'%-raw_open_gen_{pair}_{tf}', f'%-raw_low_gen_{pair}_{tf}', + f'%-raw_high_gen_{pair}_{tf}', f'%-raw_close_gen_{pair}_{tf}'] + rename_dict = {f'%-raw_open_gen_{pair}_{tf}': 'open', + f'%-raw_low_gen_{pair}_{tf}': 'low', + f'%-raw_high_gen_{pair}_{tf}': ' high', + f'%-raw_close_gen_{pair}_{tf}': 'close'} prices_train = train_df.filter(ohlc_list, axis=1) if prices_train.empty: raise OperationalException('Reinforcement learning module didnt find the raw prices ' 'assigned in feature_engineering_standard(). ' 'Please assign them with:\n' - 'informative[f"%-{pair}raw_close"] = informative["close"]\n' - 'informative[f"%-{pair}raw_open"] = informative["open"]\n' - 'informative[f"%-{pair}raw_high"] = informative["high"]\n' - 'informative[f"%-{pair}raw_low"] = informative["low"]\n') + 'dataframe["%-raw_close"] = dataframe["close"]\n' + 'dataframe["%-raw_open"] = dataframe["open"]\n' + 'dataframe["%-raw_high"] = dataframe["high"]\n' + 'dataframe["%-raw_low"] = dataframe["low"]\n' + 'inside `feature_engineering_expand_basic()`') prices_train.rename(columns=rename_dict, inplace=True) prices_train.reset_index(drop=True) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index f322e4165..95665a775 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -90,7 +90,7 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1}) freqai.dk.use_SVM_to_remove_outliers(predict=False) assert log_has_re( - "SVM detected 7.36%", + "SVM detected 7.83%", caplog, ) From 2b89f643b7bd74332be399238012ddedc60acba9 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Wed, 28 Dec 2022 19:03:41 -0300 Subject: [PATCH 04/19] adjust backtest to new feature engineering functions --- freqtrade/freqai/freqai_interface.py | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 9025f358a..accd3373f 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -149,12 +149,9 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"] - ) if not self.config.get("freqai_backtest_live_models", False): logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") - dk = self.start_backtesting(dataframe, metadata, self.dk) + dk = self.start_backtesting(dataframe, metadata, self.dk, strategy) dataframe = dk.remove_features_from_df(dk.return_dataframe) else: logger.info( @@ -255,7 +252,7 @@ class IFreqaiModel(ABC): self.dd.save_metric_tracker_to_disk() def start_backtesting( - self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen + self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen, strategy: IStrategy ) -> FreqaiDataKitchen: """ The main broad execution for backtesting. For backtesting, each pair enters and then gets @@ -267,12 +264,14 @@ class IFreqaiModel(ABC): :param dataframe: DataFrame = strategy passed dataframe :param metadata: Dict = pair metadata :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only + :param strategy: Strategy to train on :return: FreqaiDataKitchen = Data management/analysis tool associated to present pair only """ self.pair_it += 1 train_it = 0 + populate_indicators = True # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month # tr_backtest is the backtesting time range e.g. the week directly @@ -301,14 +300,26 @@ class IFreqaiModel(ABC): dk.set_new_model_names(pair, timestamp_model_id) if dk.check_if_backtest_prediction_is_valid(len_backtest_df): - self.dd.load_metadata(dk) - dk.find_features(dataframe) - self.check_if_feature_list_matches_strategy(dk) + # self.dd.load_metadata(dk) + # dk.find_features(dataframe) + # self.check_if_feature_list_matches_strategy(dk) append_df = dk.get_backtesting_prediction() dk.append_predictions(append_df) else: - dataframe_train = dk.slice_dataframe(tr_train, dataframe) - dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) + if populate_indicators: + dataframe = self.dk.use_strategy_to_populate_indicators( + strategy, prediction_dataframe=dataframe, pair=metadata["pair"] + ) + populate_indicators = False + + dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :] + dataframe_base_train = strategy.set_freqai_targets(dataframe_base_train) + dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :] + dataframe_base_backtest = strategy.set_freqai_targets(dataframe_base_backtest) + + dataframe_train = dk.slice_dataframe(tr_train, dataframe_base_train) + dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe_base_backtest) + if not self.model_exists(dk): dk.find_features(dataframe_train) dk.find_labels(dataframe_train) @@ -913,7 +924,6 @@ class IFreqaiModel(ABC): dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) dk.return_dataframe = pd.merge( dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") - # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) return dk # Following methods which are overridden by user made prediction models. From 2e30bdb9b23594579e85a41966007043c0a09da1 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Thu, 29 Dec 2022 16:35:11 -0300 Subject: [PATCH 05/19] freqai bt - fix tests --- freqtrade/freqai/data_kitchen.py | 9 +++++--- freqtrade/freqai/freqai_interface.py | 19 ++++++++++++++--- tests/freqai/test_freqai_interface.py | 30 ++++++++++++--------------- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 3e9a8fed2..3ddc0892f 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1308,14 +1308,17 @@ class FreqaiDataKitchen: pairs: List[str] = self.freqai_config["feature_parameters"].get( "include_corr_pairlist", []) - if not prediction_dataframe.empty: - dataframe = prediction_dataframe.copy() - for tf in tfs: + for tf in tfs: + if tf not in base_dataframes: base_dataframes[tf] = pd.DataFrame() + if not corr_dataframes.keys(): for p in pairs: if p not in corr_dataframes: corr_dataframes[p] = {} corr_dataframes[p][tf] = pd.DataFrame() + + if not prediction_dataframe.empty: + dataframe = prediction_dataframe.copy() else: dataframe = base_dataframes[self.config["timeframe"]].copy() diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index accd3373f..df4317095 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -271,14 +271,19 @@ class IFreqaiModel(ABC): self.pair_it += 1 train_it = 0 + pair = metadata["pair"] + populate_indicators = True + timerange = TimeRange.parse_timerange(self.dk.full_timerange) + self.dd.load_all_pair_histories(timerange, self.dk) + corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) + # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month # tr_backtest is the backtesting time range e.g. the week directly # following tr_train. Both of these windows slide through the # entire backtest for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges): - pair = metadata["pair"] (_, _, _) = self.dd.get_pair_dict_info(pair) train_it += 1 total_trains = len(dk.backtesting_timeranges) @@ -308,7 +313,8 @@ class IFreqaiModel(ABC): else: if populate_indicators: dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"] + strategy, prediction_dataframe=dataframe, pair=metadata["pair"], + corr_dataframes=corr_df, base_dataframes=base_df ) populate_indicators = False @@ -323,7 +329,14 @@ class IFreqaiModel(ABC): if not self.model_exists(dk): dk.find_features(dataframe_train) dk.find_labels(dataframe_train) - self.model = self.train(dataframe_train, pair, dk) + + try: + self.model = self.train(dataframe_train, pair, dk) + except Exception as msg: + logger.warning( + f"Training {pair} raised exception {msg.__class__.__name__}. " + f"Message: {msg}, skipping.") + self.dd.pair_dict[pair]["trained_timestamp"] = int( tr_train.stopts) if self.plot_features: diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index af104f3d2..ac155b1f6 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -232,15 +232,14 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dd.load_all_pair_histories(timerange, freqai.dk) sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = base_df[freqai_conf["timeframe"]] - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - df = freqai.cache_corr_pairlist_dfs(df, freqai.dk) for i in range(5): df[f'%-constant_{i}'] = i metadata = {"pair": "LTC/BTC"} - freqai.start_backtesting(df, metadata, freqai.dk) + freqai.start_backtesting(df, metadata, freqai.dk, strategy) model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] assert len(model_folders) == num_files @@ -271,12 +270,11 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf): timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dd.load_all_pair_histories(timerange, freqai.dk) sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) - - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = base_df[freqai_conf["timeframe"]] metadata = {"pair": "LTC/BTC"} - freqai.start_backtesting(df, metadata, freqai.dk) + freqai.start_backtesting(df, metadata, freqai.dk, strategy) model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] assert len(model_folders) == 9 @@ -297,14 +295,13 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dd.load_all_pair_histories(timerange, freqai.dk) sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) - - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = base_df[freqai_conf["timeframe"]] pair = "ADA/BTC" metadata = {"pair": pair} freqai.dk.pair = pair - freqai.start_backtesting(df, metadata, freqai.dk) + freqai.start_backtesting(df, metadata, freqai.dk, strategy) model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] assert len(model_folders) == 2 @@ -322,14 +319,13 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dd.load_all_pair_histories(timerange, freqai.dk) sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) - - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = base_df[freqai_conf["timeframe"]] pair = "ADA/BTC" metadata = {"pair": pair} freqai.dk.pair = pair - freqai.start_backtesting(df, metadata, freqai.dk) + freqai.start_backtesting(df, metadata, freqai.dk, strategy) assert log_has_re( "Found backtesting prediction file ", @@ -339,7 +335,7 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): pair = "ETH/BTC" metadata = {"pair": pair} freqai.dk.pair = pair - freqai.start_backtesting(df, metadata, freqai.dk) + freqai.start_backtesting(df, metadata, freqai.dk, strategy) path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder) prediction_files = [x for x in path.iterdir() if x.is_file()] From b2bab68fbaa1126d366c397218b327a8397c5010 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 30 Dec 2022 13:02:39 +0100 Subject: [PATCH 06/19] move price assignment to feature_engineering_standard() to reduce un-requested feature additions in RL. Ensure old method of price assignment still works, add deprecation warning to help users migrate their strategies --- .../RL/BaseReinforcementLearningModel.py | 40 +++++++++++-------- tests/strategy/strats/freqai_rl_test_strat.py | 10 ++--- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 4ef2ca9bf..3a4d0d0e6 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -283,27 +283,33 @@ class BaseReinforcementLearningModel(IFreqaiModel): # %-raw_volume_gen_shift-2_ETH/USDT_1h # price data for model training and evaluation tf = self.config['timeframe'] - ohlc_list = [f'%-raw_open_gen_{pair}_{tf}', f'%-raw_low_gen_{pair}_{tf}', - f'%-raw_high_gen_{pair}_{tf}', f'%-raw_close_gen_{pair}_{tf}'] - rename_dict = {f'%-raw_open_gen_{pair}_{tf}': 'open', - f'%-raw_low_gen_{pair}_{tf}': 'low', - f'%-raw_high_gen_{pair}_{tf}': ' high', - f'%-raw_close_gen_{pair}_{tf}': 'close'} + rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low', + '%-raw_high': ' high', '%-raw_close': 'close'} + rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low', + f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'} + + prices_train = train_df.filter(rename_dict.keys(), axis=1) + prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1) + if prices_train.empty or not prices_train_old.empty: + if not prices_train_old.empty: + prices_train = prices_train_old + rename_dict = rename_dict_old + logger.warning('Reinforcement learning module didnt find the correct raw prices ' + 'assigned in feature_engineering_standard(). ' + 'Please assign them with:\n' + 'dataframe["%-raw_close"] = dataframe["close"]\n' + 'dataframe["%-raw_open"] = dataframe["open"]\n' + 'dataframe["%-raw_high"] = dataframe["high"]\n' + 'dataframe["%-raw_low"] = dataframe["low"]\n' + 'inside `feature_engineering_standard()') + elif prices_train.empty: + raise OperationalException("No prices found, please follow log warning " + "instructions to correct the strategy.") - prices_train = train_df.filter(ohlc_list, axis=1) - if prices_train.empty: - raise OperationalException('Reinforcement learning module didnt find the raw prices ' - 'assigned in feature_engineering_standard(). ' - 'Please assign them with:\n' - 'dataframe["%-raw_close"] = dataframe["close"]\n' - 'dataframe["%-raw_open"] = dataframe["open"]\n' - 'dataframe["%-raw_high"] = dataframe["high"]\n' - 'dataframe["%-raw_low"] = dataframe["low"]\n' - 'inside `feature_engineering_expand_basic()`') prices_train.rename(columns=rename_dict, inplace=True) prices_train.reset_index(drop=True) - prices_test = test_df.filter(ohlc_list, axis=1) + prices_test = test_df.filter(rename_dict.keys(), axis=1) prices_test.rename(columns=rename_dict, inplace=True) prices_test.reset_index(drop=True) diff --git a/tests/strategy/strats/freqai_rl_test_strat.py b/tests/strategy/strats/freqai_rl_test_strat.py index 7f8872d8b..7d0297691 100644 --- a/tests/strategy/strats/freqai_rl_test_strat.py +++ b/tests/strategy/strats/freqai_rl_test_strat.py @@ -35,11 +35,6 @@ class freqai_rl_test_strat(IStrategy): dataframe["%-pct-change"] = dataframe["close"].pct_change() dataframe["%-raw_volume"] = dataframe["volume"] - dataframe["%-raw_close"] = dataframe["close"] - dataframe["%-raw_open"] = dataframe["open"] - dataframe["%-raw_high"] = dataframe["high"] - dataframe["%-raw_low"] = dataframe["low"] - return dataframe def feature_engineering_standard(self, dataframe, **kwargs): @@ -47,6 +42,11 @@ class freqai_rl_test_strat(IStrategy): dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek dataframe["%-hour_of_day"] = dataframe["date"].dt.hour + dataframe["%-raw_close"] = dataframe["close"] + dataframe["%-raw_open"] = dataframe["open"] + dataframe["%-raw_high"] = dataframe["high"] + dataframe["%-raw_low"] = dataframe["low"] + return dataframe def set_freqai_targets(self, dataframe, **kwargs): From b39fc6b924e99d0264d2bc08eee10b9013b75488 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Fri, 30 Dec 2022 10:42:31 -0300 Subject: [PATCH 07/19] remove add pair to column from docs, fix keyerror bug and adjust hybrid strategy example --- docs/freqai-feature-engineering.md | 6 +- docs/freqai-running.md | 2 +- freqtrade/freqai/data_kitchen.py | 8 +- freqtrade/freqai/freqai_interface.py | 7 +- .../templates/FreqaiExampleHybridStrategy.py | 165 ++++++++++++------ 5 files changed, 125 insertions(+), 63 deletions(-) diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index e2089d947..0fa47ba73 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -2,7 +2,7 @@ ## Defining the features -Low level feature engineering is performed in the user strategy within a set of functions called `feature_engineering_*`. These function set the `base features` such as, `RSI`, `MFI`, `EMA`, `SMA`, time of day, volume, etc. The `base features` can be custom indicators or they can be imported from any technical-analysis library that you can find. One important syntax rule is that all `base features` string names defined within `feature_engineering_*` functions must be prepended with `%-{pair}`. FreqAI is equipped with a set of functions to simplify rapid large-scale feature engineering: +Low level feature engineering is performed in the user strategy within a set of functions called `feature_engineering_*`. These function set the `base features` such as, `RSI`, `MFI`, `EMA`, `SMA`, time of day, volume, etc. The `base features` can be custom indicators or they can be imported from any technical-analysis library that you can find. FreqAI is equipped with a set of functions to simplify rapid large-scale feature engineering: | Function | Description | |---------------|-------------| @@ -11,10 +11,6 @@ Low level feature engineering is performed in the user strategy within a set of | `feature_engineering_standard()` | This optional function will be called once with the dataframe of the base timeframe. This is the final function to be called, which means that the dataframe entering this function will contain all the features and columns created by all other `feature_engineering_expand` functions. This function is a good place to do custom exotic feature extractions (e.g. tsfresh). This function is also a good place for any feature that should not be auto-expanded upon (e.g. day of the week). | `set_freqai_targets()` | Required function to set the targets for the model. All targets must be prepended with `&` to be recognized by the FreqAI internals. - -!!! Note - Adding the full pair string, e.g. XYZ/USD, in the feature name enables improved performance for dataframe caching on the backend. If you decide *not* to add the full pair string in the feature string, FreqAI will operate in a reduced performance mode. - Meanwhile, high level feature engineering is handled within `"feature_parameters":{}` in the FreqAI config. Within this file, it is possible to decide large scale feature expansions on top of the `base_features` such as "including correlated pairs" or "including informative timeframes" or even "including recent candles." It is advisable to start from the template `feature_engineering_*` functions in the source provided example strategy (found in `templates/FreqaiExampleStrategy.py`) to ensure that the feature definitions are following the correct conventions. Here is an example of how to set the indicators and labels in the strategy: diff --git a/docs/freqai-running.md b/docs/freqai-running.md index b046e7bb8..bb84bd533 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -135,7 +135,7 @@ freqtrade hyperopt --hyperopt-loss SharpeHyperOptLoss --strategy FreqaiExampleSt `hyperopt` requires you to have the data pre-downloaded in the same fashion as if you were doing [backtesting](#backtesting). In addition, you must consider some restrictions when trying to hyperopt FreqAI strategies: - The `--analyze-per-epoch` hyperopt parameter is not compatible with FreqAI. -- It's not possible to hyperopt indicators in the `populate_any_indicators()` function. This means that you cannot optimize model parameters using hyperopt. Apart from this exception, it is possible to optimize all other [spaces](hyperopt.md#running-hyperopt-with-smaller-search-space). +- It's not possible to hyperopt indicators in the `feature_engineering_*()` and `set_freqai_targets()` functions. This means that you cannot optimize model parameters using hyperopt. Apart from this exception, it is possible to optimize all other [spaces](hyperopt.md#running-hyperopt-with-smaller-search-space). - The backtesting instructions also apply to hyperopt. The best method for combining hyperopt and FreqAI is to focus on hyperopting entry/exit thresholds/criteria. You need to focus on hyperopting parameters that are not used in your features. For example, you should not try to hyperopt rolling window lengths in the feature creation, or any part of the FreqAI config which changes predictions. In order to efficiently hyperopt the FreqAI strategy, FreqAI stores predictions as dataframes and reuses them. Hence the requirement to hyperopt entry/exit thresholds/criteria only. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 3ddc0892f..c85ecdca3 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1311,10 +1311,10 @@ class FreqaiDataKitchen: for tf in tfs: if tf not in base_dataframes: base_dataframes[tf] = pd.DataFrame() - if not corr_dataframes.keys(): - for p in pairs: - if p not in corr_dataframes: - corr_dataframes[p] = {} + for p in pairs: + if p not in corr_dataframes: + corr_dataframes[p] = {} + if tf not in corr_dataframes[p]: corr_dataframes[p][tf] = pd.DataFrame() if not prediction_dataframe.empty: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index df4317095..ea596e798 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -272,11 +272,7 @@ class IFreqaiModel(ABC): self.pair_it += 1 train_it = 0 pair = metadata["pair"] - populate_indicators = True - timerange = TimeRange.parse_timerange(self.dk.full_timerange) - self.dd.load_all_pair_histories(timerange, self.dk) - corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month @@ -312,6 +308,9 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) else: if populate_indicators: + timerange = TimeRange.parse_timerange(self.dk.full_timerange) + self.dd.load_all_pair_histories(timerange, self.dk) + corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"], corr_dataframes=corr_df, base_dataframes=base_df diff --git a/freqtrade/templates/FreqaiExampleHybridStrategy.py b/freqtrade/templates/FreqaiExampleHybridStrategy.py index 9d1842cd7..c5dbe8dbd 100644 --- a/freqtrade/templates/FreqaiExampleHybridStrategy.py +++ b/freqtrade/templates/FreqaiExampleHybridStrategy.py @@ -95,65 +95,132 @@ class FreqaiExampleHybridStrategy(IStrategy): short_rsi = IntParameter(low=51, high=100, default=70, space='sell', optimize=True, load=True) exit_short_rsi = IntParameter(low=1, high=50, default=30, space='buy', optimize=True, load=True) - # FreqAI required function, user can add or remove indicators, but general structure - # must stay the same. - def populate_any_indicators( - self, pair, df, tf, informative=None, set_generalized_indicators=False - ): + def feature_engineering_expand_all(self, dataframe, period, **kwargs): """ - User feeds these indicators to FreqAI to train a classifier to decide - if the market will go up or down. + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. - :param pair: pair to be used as informative - :param df: strategy dataframe which will receive merges from informatives - :param tf: timeframe of the dataframe which will modify the feature names - :param informative: the dataframe associated with the informative pair + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) """ - if informative is None: - informative = self.dp.get_pair_dataframe(pair, tf) + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) + dataframe["%-sma-period"] = ta.SMA(dataframe, timeperiod=period) + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) - # first loop is automatically duplicating indicators for time periods - for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(dataframe), window=period, stds=2.2 + ) + dataframe["bb_lowerband-period"] = bollinger["lower"] + dataframe["bb_middleband-period"] = bollinger["mid"] + dataframe["bb_upperband-period"] = bollinger["upper"] - t = int(t) - informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) - informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) - informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, timeperiod=t) - informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t) - informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t) - informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative, timeperiod=t) - informative[f"%-{pair}relative_volume-period_{t}"] = ( - informative["volume"] / informative["volume"].rolling(t).mean() - ) + dataframe["%-bb_width-period"] = ( + dataframe["bb_upperband-period"] + - dataframe["bb_lowerband-period"] + ) / dataframe["bb_middleband-period"] + dataframe["%-close-bb_lower-period"] = ( + dataframe["close"] / dataframe["bb_lowerband-period"] + ) - # FreqAI needs the following lines in order to detect features and automatically - # expand upon them. - indicators = [col for col in informative if col.startswith("%")] - # This loop duplicates and shifts all indicators to add a sense of recency to data - for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): - if n == 0: - continue - informative_shift = informative[indicators].shift(n) - informative_shift = informative_shift.add_suffix("_shift-" + str(n)) - informative = pd.concat((informative, informative_shift), axis=1) + dataframe["%-roc-period"] = ta.ROC(dataframe, timeperiod=period) - df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) - skip_columns = [ - (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] - ] - df = df.drop(columns=skip_columns) + dataframe["%-relative_volume-period"] = ( + dataframe["volume"] / dataframe["volume"].rolling(period).mean() + ) - # User can set the "target" here (in present case it is the - # "up" or "down") - if set_generalized_indicators: - # User "looks into the future" here to figure out if the future - # will be "up" or "down". This same column name is available to - # the user - df['&s-up_or_down'] = np.where(df["close"].shift(-50) > - df["close"], 'up', 'down') + return dataframe - return df + def feature_engineering_expand_basic(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. + + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) + """ + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] + return dataframe + + def feature_engineering_standard(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour + return dataframe + + def set_freqai_targets(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-50) > + dataframe["close"], 'up', 'down') + + return dataframe # flake8: noqa: C901 def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: From c8aa7720a2ce5ac7bff3e871121be9b5a6eea0d3 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Fri, 30 Dec 2022 11:16:35 -0300 Subject: [PATCH 08/19] added again feature check in BT from pred files --- freqtrade/freqai/freqai_interface.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index ea596e798..7d302b9b8 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -273,7 +273,7 @@ class IFreqaiModel(ABC): train_it = 0 pair = metadata["pair"] populate_indicators = True - + check_features = True # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month # tr_backtest is the backtesting time range e.g. the week directly @@ -301,9 +301,14 @@ class IFreqaiModel(ABC): dk.set_new_model_names(pair, timestamp_model_id) if dk.check_if_backtest_prediction_is_valid(len_backtest_df): - # self.dd.load_metadata(dk) - # dk.find_features(dataframe) - # self.check_if_feature_list_matches_strategy(dk) + if check_features: + self.dd.load_metadata(dk) + dataframe_dummy_features = self.dk.use_strategy_to_populate_indicators( + strategy, prediction_dataframe=dataframe.tail(1), pair=metadata["pair"] + ) + dk.find_features(dataframe_dummy_features) + self.check_if_feature_list_matches_strategy(dk) + check_features = False append_df = dk.get_backtesting_prediction() dk.append_predictions(append_df) else: From 52dfb0452ca76c78feb8f79bfb03eb585b7b9b52 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 2 Jan 2023 16:06:54 +0100 Subject: [PATCH 09/19] Update freqai-feature-engineering.md --- docs/freqai-feature-engineering.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index 0fa47ba73..6b8636e28 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -124,6 +124,8 @@ It is advisable to start from the template `feature_engineering_*` functions in / dataframe["close"] - 1 ) + + return dataframe ``` In the presented example, the user does not wish to pass the `bb_lowerband` as a feature to the model, From 73114b93c2dc8a4637c54c956899301255028d02 Mon Sep 17 00:00:00 2001 From: paranoidandy Date: Tue, 3 Jan 2023 15:11:46 +0000 Subject: [PATCH 10/19] Update FreqaiExampleStrategy.py Change can_short to True to enable shorting --- freqtrade/templates/FreqaiExampleStrategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 0c5d74ca8..0ec59c77f 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -39,7 +39,7 @@ class FreqaiExampleStrategy(IStrategy): use_exit_signal = True # this is the maximum period fed to talib (timeframe independent) startup_candle_count: int = 40 - can_short = False + can_short = True std_dev_multiplier_buy = CategoricalParameter( [0.75, 1, 1.25, 1.5, 1.75], default=1.25, space="buy", optimize=True) From 314c0925bff3e5648baef12617446987b7d51d76 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 3 Jan 2023 14:02:42 -0300 Subject: [PATCH 11/19] fix get dataframe data to include startup_candle --- freqtrade/freqai/freqai_interface.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 7d302b9b8..03f458e47 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -313,7 +313,10 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) else: if populate_indicators: - timerange = TimeRange.parse_timerange(self.dk.full_timerange) + timerange_str_from_main_df = ( + dataframe["date"].min().strftime("%Y%m%d") + "-" + + dataframe["date"].max().strftime("%Y%m%d")) + timerange = TimeRange.parse_timerange(timerange_str_from_main_df) self.dd.load_all_pair_histories(timerange, self.dk) corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) dataframe = self.dk.use_strategy_to_populate_indicators( From 801ab39a248fcc0241dedced8c622e11c66bd246 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Wed, 4 Jan 2023 10:36:19 -0300 Subject: [PATCH 12/19] fix get dataframe data to include startup_candle --- freqtrade/freqai/freqai_interface.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 03f458e47..a0f29a301 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -313,10 +313,9 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) else: if populate_indicators: - timerange_str_from_main_df = ( - dataframe["date"].min().strftime("%Y%m%d") + "-" + - dataframe["date"].max().strftime("%Y%m%d")) - timerange = TimeRange.parse_timerange(timerange_str_from_main_df) + tr_from_main_df = (f'{dataframe["date"].min().strftime("%Y%m%d")}' + f'-{dataframe["date"].max().strftime("%Y%m%d")}') + timerange = TimeRange.parse_timerange(tr_from_main_df) self.dd.load_all_pair_histories(timerange, self.dk) corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) dataframe = self.dk.use_strategy_to_populate_indicators( From ed99e7f85750e1c7569722488c346d5e411d1d97 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Wed, 4 Jan 2023 14:21:37 -0300 Subject: [PATCH 13/19] fix corr_pairs startup candle count bug --- docs/freqai-running.md | 6 ++++++ freqtrade/freqai/freqai_interface.py | 8 +------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index bb84bd533..42c56c06d 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -67,6 +67,12 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to- *want* to retrain a new model with the same config file, you should simply change the `identifier`. This way, you can return to using any model you wish by simply specifying the `identifier`. +!!! Note + Backtesting calls the `set_freqai_targets()` function for every window defined in `backtest_period_days` parameter + to better simulate the dry/run live behavior, but it's analyzes the whole time-range at once in `feature_engineering_*()` for performance reasons. + Because of this, strategy authors need to make sure that strategies do not look-ahead into the future at `feature_engineering_*()` functions. + Strategy authors should carefully read the [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies) + --- ### Saving prediction data diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index a0f29a301..c4e87176c 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -313,14 +313,8 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) else: if populate_indicators: - tr_from_main_df = (f'{dataframe["date"].min().strftime("%Y%m%d")}' - f'-{dataframe["date"].max().strftime("%Y%m%d")}') - timerange = TimeRange.parse_timerange(tr_from_main_df) - self.dd.load_all_pair_histories(timerange, self.dk) - corr_df, base_df = self.dd.get_base_and_corr_dataframes(timerange, pair, dk) dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"], - corr_dataframes=corr_df, base_dataframes=base_df + strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) populate_indicators = False From d91ac8b6698d23068abb9d33d377ee468ae6b30a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Thu, 5 Jan 2023 20:13:48 +0100 Subject: [PATCH 14/19] improve wording in freqai doc --- docs/freqai-running.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 42c56c06d..a75e30e83 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -68,10 +68,8 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to- This way, you can return to using any model you wish by simply specifying the `identifier`. !!! Note - Backtesting calls the `set_freqai_targets()` function for every window defined in `backtest_period_days` parameter - to better simulate the dry/run live behavior, but it's analyzes the whole time-range at once in `feature_engineering_*()` for performance reasons. - Because of this, strategy authors need to make sure that strategies do not look-ahead into the future at `feature_engineering_*()` functions. - Strategy authors should carefully read the [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies) + Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire backtest timerange. This means that you should be sure that features do look-ahead into the future. + More details about look-ahead bias can be found in [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies). --- From abdeb72eb0f2f46dc969c672eb45957b290481e1 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Thu, 5 Jan 2023 17:54:56 -0300 Subject: [PATCH 15/19] fix tests --- tests/freqai/test_freqai_interface.py | 9 ++++++++- tests/strategy/strats/freqai_rl_test_strat.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index ac155b1f6..4ef99720a 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -222,6 +222,9 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog) if 'test_4ac' in model: freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models") + freqai_conf.get("freqai", {}).get("feature_parameters", {}).update( + {"indicator_periods_candles": [2]}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) strategy.dp = DataProvider(freqai_conf, exchange) @@ -260,6 +263,8 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf): freqai_conf.update({"timerange": "20180120-20180124"}) freqai_conf.get("freqai", {}).update({"backtest_period_days": 0.5}) freqai_conf.get("freqai", {}).update({"save_backtest_models": True}) + freqai_conf.get("freqai", {}).get("feature_parameters", {}).update( + {"indicator_periods_candles": [2]}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) strategy.dp = DataProvider(freqai_conf, exchange) @@ -285,6 +290,8 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf): def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): freqai_conf.update({"timerange": "20180120-20180130"}) freqai_conf.get("freqai", {}).update({"save_backtest_models": True}) + freqai_conf.get("freqai", {}).get("feature_parameters", {}).update( + {"indicator_periods_candles": [2]}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) strategy.dp = DataProvider(freqai_conf, exchange) @@ -294,7 +301,7 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): freqai.dk = FreqaiDataKitchen(freqai_conf) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dd.load_all_pair_histories(timerange, freqai.dk) - sub_timerange = TimeRange.parse_timerange("20180110-20180130") + sub_timerange = TimeRange.parse_timerange("20180101-20180130") _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = base_df[freqai_conf["timeframe"]] diff --git a/tests/strategy/strats/freqai_rl_test_strat.py b/tests/strategy/strats/freqai_rl_test_strat.py index 7d0297691..6fa926fc9 100644 --- a/tests/strategy/strats/freqai_rl_test_strat.py +++ b/tests/strategy/strats/freqai_rl_test_strat.py @@ -21,7 +21,7 @@ class freqai_rl_test_strat(IStrategy): process_only_new_candles = True stoploss = -0.05 use_exit_signal = True - startup_candle_count: int = 30 + startup_candle_count: int = 300 can_short = False def feature_engineering_expand_all(self, dataframe, period, **kwargs): From a61274ae18541788d8d39756c433c2e90fb76011 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 9 Jan 2023 20:04:36 +0100 Subject: [PATCH 16/19] ensure cached corr-pairs works with new framework --- docs/freqai-feature-engineering.md | 2 +- freqtrade/freqai/data_kitchen.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/freqai-feature-engineering.md b/docs/freqai-feature-engineering.md index 6b8636e28..6c8c5bb46 100644 --- a/docs/freqai-feature-engineering.md +++ b/docs/freqai-feature-engineering.md @@ -8,7 +8,7 @@ Low level feature engineering is performed in the user strategy within a set of |---------------|-------------| | `feature_engineering__expand_all()` | This optional function will automatically expand the defined features on the config defined `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. | `feature_engineering__expand_basic()` | This optional function will automatically expand the defined features on the config defined `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. Note: this function does *not* expand across `include_periods_candles`. -| `feature_engineering_standard()` | This optional function will be called once with the dataframe of the base timeframe. This is the final function to be called, which means that the dataframe entering this function will contain all the features and columns created by all other `feature_engineering_expand` functions. This function is a good place to do custom exotic feature extractions (e.g. tsfresh). This function is also a good place for any feature that should not be auto-expanded upon (e.g. day of the week). +| `feature_engineering_standard()` | This optional function will be called once with the dataframe of the base timeframe. This is the final function to be called, which means that the dataframe entering this function will contain all the features and columns from the base asset created by the other `feature_engineering_expand` functions. This function is a good place to do custom exotic feature extractions (e.g. tsfresh). This function is also a good place for any feature that should not be auto-expanded upon (e.g. day of the week). | `set_freqai_targets()` | Required function to set the targets for the model. All targets must be prepended with `&` to be recognized by the FreqAI internals. Meanwhile, high level feature engineering is handled within `"feature_parameters":{}` in the FreqAI config. Within this file, it is possible to decide large scale feature expansions on top of the `base_features` such as "including correlated pairs" or "including informative timeframes" or even "including recent candles." diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index c85ecdca3..3eb0906b1 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1147,9 +1147,9 @@ class FreqaiDataKitchen: for pair in pairs: pair = pair.replace(':', '') # lightgbm doesnt like colons - valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"] - pair_cols = [col for col in dataframe.columns if - any(substr in col for substr in valid_strs)] + pair_cols = [col for col in dataframe.columns if col.startswith("%") + and f"{pair}_" in col] + if pair_cols: pair_cols.insert(0, 'date') corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1) @@ -1327,6 +1327,7 @@ class FreqaiDataKitchen: dataframe = self.populate_features(dataframe.copy(), pair, strategy, corr_dataframes, base_dataframes) + dataframe = strategy.feature_engineering_standard(dataframe.copy()) # ensure corr pairs are always last for corr_pair in corr_pairs: if pair == corr_pair: @@ -1335,7 +1336,6 @@ class FreqaiDataKitchen: dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy, corr_dataframes, base_dataframes, True) - dataframe = strategy.feature_engineering_standard(dataframe.copy()) dataframe = strategy.set_freqai_targets(dataframe.copy()) self.get_unique_classes_from_labels(dataframe) From 93aff9325e083954121617fa4f5cf42cce83dd1a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 9 Jan 2023 20:15:03 +0100 Subject: [PATCH 17/19] improve deprecation note --- freqtrade/freqai/data_kitchen.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 3eb0906b1..719504122 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1351,8 +1351,11 @@ class FreqaiDataKitchen: # the user is using the populate_any_indicators functions which is deprecated logger.warning("DEPRECATION WARNING: " "You are using the deprecated populate_any_indicators function. " - "Please update your strategy to use " - "the new feature_engineering functions.") + "This function will raise an error on March 1 2023. " + "Please update your strategy by using " + "the new feature_engineering functions. See \n" + "https://www.freqtrade.io/en/latest/freqai-feature-engineering/" + "for details.") df = self.use_strategy_to_populate_indicators_old_version( strategy, corr_dataframes, base_dataframes, pair, @@ -1395,7 +1398,6 @@ class FreqaiDataKitchen: corr_dataframes[p][tf] = None else: dataframe = base_dataframes[self.config["timeframe"]].copy() - # dataframe = strategy.dp.get_pair_dataframe(pair, self.config["timeframe"]) sgi = False for tf in tfs: From 67495530b7e4d91e5a14f00f719d3bba3fd18f61 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 10 Jan 2023 07:22:28 +0100 Subject: [PATCH 18/19] Add FreqAI migration documentation --- docs/strategy_migration.md | 251 +++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 252 insertions(+) diff --git a/docs/strategy_migration.md b/docs/strategy_migration.md index f93efd067..22e3d2c22 100644 --- a/docs/strategy_migration.md +++ b/docs/strategy_migration.md @@ -477,3 +477,254 @@ after: "ignore_buying_expired_candle_after": 120 } ``` + +## FreqAI strategy + +The `populate_any_indicators()` method has been split into `feature_engineering_expand_all()`, `feature_engineering_expand_basic()`, `feature_engineering_standard()` and`set_freqai_targets()`. + +For each new function, the pair (and timeframe where necessary) will be automatically added to the column. +As such, the definition of features becomes much simpler with the new logic. + +For a full explanation of each method, please go to the corresponding [freqAI documentation page](freqai-feature-engineering.md#defining-the-features) + +``` python linenums="1" hl_lines="12-37 39-42 63-65 67-75" + +def populate_any_indicators( + self, pair, df, tf, informative=None, set_generalized_indicators=False + ): + + if informative is None: + informative = self.dp.get_pair_dataframe(pair, tf) + + # first loop is automatically duplicating indicators for time periods + for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + + t = int(t) + informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) + informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) + informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, timeperiod=t) + informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t) + informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t) + + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(informative), window=t, stds=2.2 + ) + informative[f"{pair}bb_lowerband-period_{t}"] = bollinger["lower"] + informative[f"{pair}bb_middleband-period_{t}"] = bollinger["mid"] + informative[f"{pair}bb_upperband-period_{t}"] = bollinger["upper"] + + informative[f"%-{pair}bb_width-period_{t}"] = ( + informative[f"{pair}bb_upperband-period_{t}"] + - informative[f"{pair}bb_lowerband-period_{t}"] + ) / informative[f"{pair}bb_middleband-period_{t}"] + informative[f"%-{pair}close-bb_lower-period_{t}"] = ( + informative["close"] / informative[f"{pair}bb_lowerband-period_{t}"] + ) + + informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative, timeperiod=t) + + informative[f"%-{pair}relative_volume-period_{t}"] = ( + informative["volume"] / informative["volume"].rolling(t).mean() + ) # (1) + + informative[f"%-{pair}pct-change"] = informative["close"].pct_change() + informative[f"%-{pair}raw_volume"] = informative["volume"] + informative[f"%-{pair}raw_price"] = informative["close"] + # (2) + + indicators = [col for col in informative if col.startswith("%")] + # This loop duplicates and shifts all indicators to add a sense of recency to data + for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): + if n == 0: + continue + informative_shift = informative[indicators].shift(n) + informative_shift = informative_shift.add_suffix("_shift-" + str(n)) + informative = pd.concat((informative, informative_shift), axis=1) + + df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) + skip_columns = [ + (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] + ] + df = df.drop(columns=skip_columns) + + # Add generalized indicators here (because in live, it will call this + # function to populate indicators during training). Notice how we ensure not to + # add them multiple times + if set_generalized_indicators: + df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 + df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + # (3) + + # user adds targets here by prepending them with &- (see convention below) + df["&-s_close"] = ( + df["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / df["close"] + - 1 + ) # (4) + + return df +``` + +1. Features - Move to `feature_engineering_expand_all` +2. Basic features, not expanded across `include_periods_candles` - move to`feature_engineering_expand_basic()`. +3. Standard features which should not be expanded - move to `feature_engineering_standard()`. +4. Targets - Move this part to `set_freqai_targets()`. + +### freqai - feature engineering expand all + +Features will now expand automatically. As such, the expansion loops, as well as the `{pair}` / `{timeframe}` parts will need to be removed. + +``` python linenums="1" + def feature_engineering_expand_all(self, dataframe, period, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `indicator_periods_candles`, `include_timeframes`, `include_shifted_candles`, and + `include_corr_pairs`. In other words, a single feature defined in this function + will automatically expand to a total of + `indicator_periods_candles` * `include_timeframes` * `include_shifted_candles` * + `include_corr_pairs` numbers of features added to the model. + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + + :param df: strategy dataframe which will receive the features + :param period: period of the indicator - usage example: + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) + """ + + dataframe["%-rsi-period"] = ta.RSI(dataframe, timeperiod=period) + dataframe["%-mfi-period"] = ta.MFI(dataframe, timeperiod=period) + dataframe["%-adx-period"] = ta.ADX(dataframe, timeperiod=period) + dataframe["%-sma-period"] = ta.SMA(dataframe, timeperiod=period) + dataframe["%-ema-period"] = ta.EMA(dataframe, timeperiod=period) + + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(dataframe), window=period, stds=2.2 + ) + dataframe["bb_lowerband-period"] = bollinger["lower"] + dataframe["bb_middleband-period"] = bollinger["mid"] + dataframe["bb_upperband-period"] = bollinger["upper"] + + dataframe["%-bb_width-period"] = ( + dataframe["bb_upperband-period"] + - dataframe["bb_lowerband-period"] + ) / dataframe["bb_middleband-period"] + dataframe["%-close-bb_lower-period"] = ( + dataframe["close"] / dataframe["bb_lowerband-period"] + ) + + dataframe["%-roc-period"] = ta.ROC(dataframe, timeperiod=period) + + dataframe["%-relative_volume-period"] = ( + dataframe["volume"] / dataframe["volume"].rolling(period).mean() + ) + + return dataframe + +``` + +### Freqai - feature engineering basic + +Basic features. Make sure to remove the `{pair}` part from your features. + +``` python linenums="1" + def feature_engineering_expand_basic(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This function will automatically expand the defined features on the config defined + `include_timeframes`, `include_shifted_candles`, and `include_corr_pairs`. + In other words, a single feature defined in this function + will automatically expand to a total of + `include_timeframes` * `include_shifted_candles` * `include_corr_pairs` + numbers of features added to the model. + + Features defined here will *not* be automatically duplicated on user defined + `indicator_periods_candles` + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details on how these config defined parameters accelerate feature engineering + in the documentation at: + + https://www.freqtrade.io/en/latest/freqai-parameter-table/#feature-parameters + + https://www.freqtrade.io/en/latest/freqai-feature-engineering/#defining-the-features + + :param df: strategy dataframe which will receive the features + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-ema-200"] = ta.EMA(dataframe, timeperiod=200) + """ + dataframe["%-pct-change"] = dataframe["close"].pct_change() + dataframe["%-raw_volume"] = dataframe["volume"] + dataframe["%-raw_price"] = dataframe["close"] + return dataframe +``` + +### FreqAI - feature engineering standard + +``` python linenums="1" + def feature_engineering_standard(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + This optional function will be called once with the dataframe of the base timeframe. + This is the final function to be called, which means that the dataframe entering this + function will contain all the features and columns created by all other + freqai_feature_engineering_* functions. + + This function is a good place to do custom exotic feature extractions (e.g. tsfresh). + This function is a good place for any feature that should not be auto-expanded upon + (e.g. day of the week). + + All features must be prepended with `%` to be recognized by FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + + :param df: strategy dataframe which will receive the features + usage example: dataframe["%-day_of_week"] = (dataframe["date"].dt.dayofweek + 1) / 7 + """ + dataframe["%-day_of_week"] = dataframe["date"].dt.dayofweek + dataframe["%-hour_of_day"] = dataframe["date"].dt.hour + return dataframe +``` + +### FreqAI - set Targets + +Targets now get their own, dedicated method. + +``` python linenums="1" + def set_freqai_targets(self, dataframe, **kwargs): + """ + *Only functional with FreqAI enabled strategies* + Required function to set the targets for the model. + All targets must be prepended with `&` to be recognized by the FreqAI internals. + + More details about feature engineering available: + + https://www.freqtrade.io/en/latest/freqai-feature-engineering + + :param df: strategy dataframe which will receive the targets + usage example: dataframe["&-target"] = dataframe["close"].shift(-1) / dataframe["close"] + """ + dataframe["&-s_close"] = ( + dataframe["close"] + .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + .mean() + / dataframe["close"] + - 1 + ) + + return dataframe +``` diff --git a/mkdocs.yml b/mkdocs.yml index c44e4640e..b56a3404b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -59,6 +59,7 @@ theme: favicon: "images/logo.png" custom_dir: "docs/overrides" features: + - content.code.annotate - search.share palette: - scheme: default From 2241f2429042e394f989f028a25567992c9292bd Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 10 Jan 2023 09:10:30 -0300 Subject: [PATCH 19/19] moved deprecated warning to start function --- freqtrade/freqai/data_kitchen.py | 7 ------- freqtrade/freqai/freqai_interface.py | 27 ++++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 719504122..9fdc2c98e 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1349,13 +1349,6 @@ class FreqaiDataKitchen: else: # the user is using the populate_any_indicators functions which is deprecated - logger.warning("DEPRECATION WARNING: " - "You are using the deprecated populate_any_indicators function. " - "This function will raise an error on March 1 2023. " - "Please update your strategy by using " - "the new feature_engineering functions. See \n" - "https://www.freqtrade.io/en/latest/freqai-feature-engineering/" - "for details.") df = self.use_strategy_to_populate_indicators_old_version( strategy, corr_dataframes, base_dataframes, pair, diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index c4e87176c..830970ba0 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -1,3 +1,4 @@ +import inspect import logging import threading import time @@ -106,6 +107,8 @@ class IFreqaiModel(ABC): self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1) self.can_short = True # overridden in start() with strategy.can_short + self.warned_deprecated_populate_any_indicators = False + record_params(config, self.full_path) def __getstate__(self): @@ -136,6 +139,9 @@ class IFreqaiModel(ABC): self.data_provider = strategy.dp self.can_short = strategy.can_short + # check if the strategy has deprecated populate_any_indicators function + self.check_deprecated_populate_any_indicators(strategy) + if self.live: self.inference_timer('start') self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) @@ -373,7 +379,6 @@ class IFreqaiModel(ABC): :returns: dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only """ - # update follower if self.follow_mode: self.dd.update_follower_metadata() @@ -939,6 +944,26 @@ class IFreqaiModel(ABC): dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") return dk + def check_deprecated_populate_any_indicators(self, strategy: IStrategy): + """ + Check and warn if the deprecated populate_any_indicators function is used. + :param strategy: strategy object + """ + + if not self.warned_deprecated_populate_any_indicators: + self.warned_deprecated_populate_any_indicators = True + old_version = inspect.getsource(strategy.populate_any_indicators) != ( + inspect.getsource(IStrategy.populate_any_indicators)) + + if old_version: + logger.warning("DEPRECATION WARNING: " + "You are using the deprecated populate_any_indicators function. " + "This function will raise an error on March 1 2023. " + "Please update your strategy by using " + "the new feature_engineering functions. See \n" + "https://www.freqtrade.io/en/latest/freqai-feature-engineering/" + "for details.") + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example.