auto populate features based on a prepended % in the strategy (remove feature assignment from config). Update doc/constants/example strategy to reflect change

2022-05-17 18:15:03 +02:00 · 2022-05-17 18:15:03 +02:00 · d1d451c27e
commit d1d451c27e
parent 8664e8f9a3
6 changed files with 80 additions and 69 deletions
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@ -56,20 +56,9 @@
        ],
        "train_period": 30,
        "backtest_period": 7,
-        "identifier": "new_corrlist",
+        "identifier": "example",
        "live_trained_timerange": "20220330-20220429",
        "live_full_backtestrange": "20220302-20220501",
        "base_features": [
            "rsi",
            "close_over_20sma",
            "relative_volume",
            "bb_width",
            "mfi",
            "roc",
            "pct-change",
            "adx",
            "macd"
        ],
        "corr_pairlist": [
            "BTC/USDT",
            "ETH/USDT",
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -72,11 +72,6 @@ config setup includes:
                "train_period" : 30,
                "backtest_period" : 7,
                "identifier" :  "unique-id",
                "base_features": [
                        "rsi",
                        "mfi",
                        "roc",
                ],
                "corr_pairlist": [
                        "ETH/USD",
                        "LINK/USD",
@ -102,11 +97,31 @@ config setup includes:
 ### Building the feature set
-Most of these parameters are controlling the feature data set. The `base_features`
+Most of these parameters are controlling the feature data set. Features are added by the user 
-indicates the basic indicators the user wishes to include in the feature set.
+inside the `populate_any_indicators()` method of the strategy by prepending indicators with `%`:
-The `timeframes` are the timeframes of each base_feature that the user wishes to
+
-include in the feature set. In the present case, the user is asking for the
+```python
-`5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, etc. to be included
+    def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
        informative['%-''%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
        informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
        informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
        informative[coin + "bb_lowerband"] = bollinger["lower"]
        informative[coin + "bb_middleband"] = bollinger["mid"]
        informative[coin + "bb_upperband"] = bollinger["upper"]
        informative['%-' + coin + "bb_width"] = (
            informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
        ) / informative[coin + "bb_middleband"]
 ```
 The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, 
 and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the
 model for training/prediction and has therfore prepended it with `%`._
 (Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)
 The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()`
 included metric for inclusion in the feature set. In the present case, the user is asking for the
 `5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, and `bb_width` to be included
 in the feature set.
 In addition, the user can ask for each of these features to be included from
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@ -442,7 +442,6 @@ CONF_SCHEMA = {
                "identifier": {"type": "str", "default": "example"},
                "live_trained_timerange": {"type": "str"},
                "live_full_backtestrange": {"type": "str"},
                "base_features": {"type": "list"},
                "corr_pairlist": {"type": "list"},
                "feature_parameters": {
                    "type": "object",
@ -537,4 +536,4 @@ TradeList = List[List]
 LongShort = Literal['long', 'short']
 EntryExit = Literal['entry', 'exit']
-BuySell = Literal['buy', 'sell']
+BuySell = Literal['buy', 'sell']
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -483,31 +483,38 @@ class FreqaiDataKitchen:
        return
-    def build_feature_list(self, config: dict, metadata: dict) -> list:
+    def find_features(self, dataframe: DataFrame) -> list:
-        """
+        column_names = dataframe.columns
-        Build the list of features that will be used to filter
+        features = [c for c in column_names if '%' in c]
-        the full dataframe. Feature list is construced from the
+        assert features, ("Could not find any features!")
        user configuration file.
        :params:
        :config: Canonical freqtrade config file containing all
        user defined input in config['freqai] dictionary.
        """
        features = []
        for tf in config["freqai"]["timeframes"]:
            for ft in config["freqai"]["base_features"]:
                for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
                    shift = ""
                    if n > 0:
                        shift = "_shift-" + str(n)
                    features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
                    for p in config["freqai"]["corr_pairlist"]:
                        if metadata['pair'] in p:
                            continue  # avoid duplicate features
                        features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
        # logger.info("number of features %s", len(features))
        return features
    # def build_feature_list(self, config: dict, metadata: dict) -> list:
    #     """
    #     SUPERCEDED BY self.find_features()
    #     Build the list of features that will be used to filter
    #     the full dataframe. Feature list is construced from the
    #     user configuration file.
    #     :params:
    #     :config: Canonical freqtrade config file containing all
    #     user defined input in config['freqai] dictionary.
    #     """
    #     features = []
    #     for tf in config["freqai"]["timeframes"]:
    #         for ft in config["freqai"]["base_features"]:
    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
    #                 shift = ""
    #                 if n > 0:
    #                     shift = "_shift-" + str(n)
    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
    #                 for p in config["freqai"]["corr_pairlist"]:
    #                     if metadata['pair'] in p:
    #                         continue  # avoid duplicate features
    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
    #     # logger.info("number of features %s", len(features))
    #     return features
    def check_if_pred_in_training_spaces(self) -> None:
        """
        Compares the distance from each prediction point to each training data
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@ -53,9 +53,8 @@ class CatboostPredictionModel(IFreqaiModel):
        logger.info("--------------------Starting training--------------------")
        # create the full feature list based on user config info
-        self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata)
+        self.dh.training_features_list = self.dh.find_features(unfiltered_dataframe)
        unfiltered_labels = self.make_labels(unfiltered_dataframe)
        # filter the features requested by user in the configuration file and elegantly handle NaNs
        features_filtered, labels_filtered = self.dh.filter_features(
            unfiltered_dataframe,
@ -127,7 +126,7 @@ class CatboostPredictionModel(IFreqaiModel):
        # logger.info("--------------------Starting prediction--------------------")
-        original_feature_list = self.dh.build_feature_list(self.config, metadata)
+        original_feature_list = self.dh.find_features(unfiltered_dataframe)
        filtered_dataframe, _ = self.dh.filter_features(
            unfiltered_dataframe, original_feature_list, training_filter=False
        )
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@ -62,8 +62,11 @@ class FreqaiExampleStrategy(IStrategy):
    def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
        """
        Function designed to automatically generate, name and merge features
-        from user indicated timeframes in the configuration file. User can add
+        from user indicated timeframes in the configuration file. User controls the indicators
-        additional features here, but must follow the naming convention.
+        passed to the training/prediction by prepending indicators with `'%-' + coin `
        (see convention below). I.e. user should not prepend any supporting metrics
        (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
        model.
        :params:
        :pair: pair to be used as informative
        :df: strategy dataframe which will receive merges from informatives
@ -74,49 +77,50 @@ class FreqaiExampleStrategy(IStrategy):
        if informative is None:
            informative = self.dp.get_pair_dataframe(pair, tf)
-        informative[coin + "rsi"] = ta.RSI(informative, timeperiod=14)
+        informative['%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
-        informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)
+        informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
-        informative[coin + "adx"] = ta.ADX(informative, window=20)
+        informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
        informative[coin + "20sma"] = ta.SMA(informative, timeperiod=20)
        informative[coin + "21ema"] = ta.EMA(informative, timeperiod=21)
-        informative[coin + "bmsb"] = np.where(
+        informative['%-' + coin + "bmsb"] = np.where(
            informative[coin + "20sma"].lt(informative[coin + "21ema"]), 1, 0
        )
-        informative[coin + "close_over_20sma"] = informative["close"] / informative[coin + "20sma"]
+        informative['%-' + coin + "close_over_20sma"] = informative["close"] / informative[
                                                                                    coin + "20sma"]
-        informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)
+        informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
        informative[coin + "ema21"] = ta.EMA(informative, timeperiod=21)
        informative[coin + "sma20"] = ta.SMA(informative, timeperiod=20)
        stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
-        informative[coin + "srsi-fk"] = stoch["fastk"]
+        informative['%-' + coin + "srsi-fk"] = stoch["fastk"]
-        informative[coin + "srsi-fd"] = stoch["fastd"]
+        informative['%-' + coin + "srsi-fd"] = stoch["fastd"]
        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
        informative[coin + "bb_lowerband"] = bollinger["lower"]
        informative[coin + "bb_middleband"] = bollinger["mid"]
        informative[coin + "bb_upperband"] = bollinger["upper"]
-        informative[coin + "bb_width"] = (
+        informative['%-' + coin + "bb_width"] = (
            informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
        ) / informative[coin + "bb_middleband"]
-        informative[coin + "close-bb_lower"] = (
+        informative['%-' + coin + "close-bb_lower"] = (
            informative["close"] / informative[coin + "bb_lowerband"]
        )
-        informative[coin + "roc"] = ta.ROC(informative, timeperiod=3)
+        informative['%-' + coin + "roc"] = ta.ROC(informative, timeperiod=3)
-        informative[coin + "adx"] = ta.ADX(informative, window=14)
+        informative['%-' + coin + "adx"] = ta.ADX(informative, window=14)
        macd = ta.MACD(informative)
-        informative[coin + "macd"] = macd["macd"]
+        informative['%-' + coin + "macd"] = macd["macd"]
        informative[coin + "pct-change"] = informative["close"].pct_change()
-        informative[coin + "relative_volume"] = (
+        informative['%-' + coin + "relative_volume"] = (
            informative["volume"] / informative["volume"].rolling(10).mean()
        )
        informative[coin + "pct-change"] = informative["close"].pct_change()
-        indicators = [col for col in informative if col.startswith(coin)]
+        indicators = [col for col in informative if col.startswith('%')]
        for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
            if n == 0:
@ -154,7 +158,6 @@ class FreqaiExampleStrategy(IStrategy):
                    pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
                )
        print('dataframe_built')
        # the model will return 4 values, its prediction, an indication of whether or not the
        # prediction should be accepted, the target mean/std values from the labels used during
        # each training period.
@ -181,7 +184,6 @@ class FreqaiExampleStrategy(IStrategy):
        return dataframe
    def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
        sell_conditions = [
            (dataframe["prediction"] < dataframe["sell_roi"]) & (dataframe["do_predict"] == 1)
        ]