ensure full pair string is used for caching dataframes. If not, revert to old behavior. Update docs.

2022-10-29 22:26:49 +02:00
parent a9db668082
commit 650bb8b7d7
5 changed files with 92 additions and 74 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1133,17 +1133,19 @@ class FreqaiDataKitchen:
        """
        Find the columns of the dataframe corresponding to the corr_pairlist, save them
        in a dictionary to be reused and attached to other pairs.
-        :params:
-        :dataframe: fully populated dataframe (current pair + corr_pairs)
-        :return:
-        :corr_dataframes: dictionary of dataframes to be attached to other pairs in same candle.
+
+        :param dataframe: fully populated dataframe (current pair + corr_pairs)
+        :return: corr_dataframes, dictionary of dataframes to be attached
+                 to other pairs in same candle.
        """
        corr_dataframes: Dict[str, DataFrame] = {}
        pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])

        for pair in pairs:
-            coin = pair.split('/')[0]
-            pair_cols = [col for col in dataframe.columns if coin in col]
+            valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
+            pair_cols = [col for col in dataframe.columns if
+                         any(substr in col for substr in valid_strs)]
+            pair_cols.insert(0, 'date')
            corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)

        return corr_dataframes
@@ -1153,10 +1155,10 @@ class FreqaiDataKitchen:
                                 current_pair: str) -> DataFrame:
        """
        Attach the existing corr_pair dataframes to the current pair dataframe before training
-        :params:
-        :dataframe: current pair strategy dataframe, indicators populated already
-        :corr_dataframes: dictionary of saved dataframes from earlier in the same candle
-        :current_pair: current pair to which we will attach corr pair dataframe
+
+        :param dataframe: current pair strategy dataframe, indicators populated already
+        :param corr_dataframes: dictionary of saved dataframes from earlier in the same candle
+        :param current_pair: current pair to which we will attach corr pair dataframe
        :return:
        :dataframe: current pair dataframe of populated indicators, concatenated with corr_pairs
                    ready for training
@@ -1164,8 +1166,8 @@ class FreqaiDataKitchen:
        pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])

        for pair in pairs:
-            if current_pair not in pair:
-                dataframe = pd.concat([dataframe, corr_dataframes[pair]], axis=1)
+            if current_pair != pair:
+                dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')

        return dataframe

@@ -1186,15 +1188,15 @@ class FreqaiDataKitchen:
        :param base_dataframes: dict = dict containing the current pair dataframes
                                (for user defined timeframes)
        :param metadata: dict = strategy furnished pair metadata
-        :returns:
+        :return:
        dataframe: DataFrame = dataframe containing populated indicators
        """

        # for prediction dataframe creation, we let dataprovider handle everything in the strategy
        # so we create empty dictionaries, which allows us to pass None to
        # `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
-        tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
-        pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
+        tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
+        pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
        if not prediction_dataframe.empty:
            dataframe = prediction_dataframe.copy()
            for tf in tfs:
@@ -1219,16 +1221,16 @@ class FreqaiDataKitchen:
            )

        # ensure corr pairs are always last
-        for i in pairs:
-            if pair in i:
+        for corr_pair in pairs:
+            if pair == corr_pair:
                continue  # dont repeat anything from whitelist
            for tf in tfs:
                if pairs and do_corr_pairs:
                    dataframe = strategy.populate_any_indicators(
-                        i,
+                        corr_pair,
                        dataframe.copy(),
                        tf,
-                        informative=corr_dataframes[i][tf]
+                        informative=corr_dataframes[corr_pair][tf]
                    )

        self.get_unique_classes_from_labels(dataframe)
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -72,6 +72,7 @@ class IFreqaiModel(ABC):
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
        self.ft_params = self.freqai_info["feature_parameters"]
+        self.corr_pairlist = self.ft_params.get("include_corr_pairlist", [])
        self.keras: bool = self.freqai_info.get("keras", False)
        if self.keras and self.ft_params.get("DI_threshold", 0):
            self.ft_params["DI_threshold"] = 0
@@ -375,12 +376,8 @@ class IFreqaiModel(ABC):
            self.dd.return_null_values_to_strategy(dataframe, dk)
            return dk

-        if self.get_corr_dataframes:
-            self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
-            self.get_corr_dataframes = False
-        else:
-            dataframe = dk.attach_corr_pair_columns(
-                dataframe, self.corr_dataframes, metadata["pair"])
+        if self.corr_pairlist:
+            dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)

        dk.find_labels(dataframe)

@@ -687,7 +684,8 @@ class IFreqaiModel(ABC):
                                   " avoid blinding open trades and degrading performance.")
                self.pair_it = 0
                self.inference_time = 0
-                self.get_corr_dataframes = True
+                if self.corr_pairlist:
+                    self.get_corr_dataframes = True
        return

    def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
@@ -746,6 +744,29 @@ class IFreqaiModel(ABC):
                    f'Best approximation queue: {best_queue}')
        return best_queue

+    def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
+        current candle.
+        :param dataframe: strategy fed dataframe
+        :param dk: datakitchen object for current asset
+        :return: dataframe to attach/extract cached corr_pair dfs to/from.
+        """
+
+        if self.get_corr_dataframes:
+            self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
+            if not self.corr_dataframes:
+                logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
+                               "Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
+                               "is included in the column names when you are creating features "
+                               "in `populate_any_indicators()`.")
+            self.get_corr_dataframes = not bool(self.corr_dataframes)
+        else:
+            dataframe = dk.attach_corr_pair_columns(
+                dataframe, self.corr_dataframes, dk.pair)
+
+        return dataframe
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.

--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -53,7 +53,7 @@ class FreqaiExampleStrategy(IStrategy):
        """
        Function designed to automatically generate, name and merge features
        from user indicated timeframes in the configuration file. User controls the indicators
-        passed to the training/prediction by prepending indicators with `'%-' + coin `
+        passed to the training/prediction by prepending indicators with `f'%-{pair}`
        (see convention below). I.e. user should not prepend any supporting metrics
        (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
        model.
@@ -63,8 +63,6 @@ class FreqaiExampleStrategy(IStrategy):
        :param informative: the dataframe associated with the informative pair
        """

-        coin = pair.split('/')[0]
-
        if informative is None:
            informative = self.dp.get_pair_dataframe(pair, tf)

@@ -72,36 +70,36 @@ class FreqaiExampleStrategy(IStrategy):
        for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:

            t = int(t)
-            informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
-            informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
-            informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, timeperiod=t)
-            informative[f"%-{coin}sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
-            informative[f"%-{coin}ema-period_{t}"] = ta.EMA(informative, timeperiod=t)
+            informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
+            informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
+            informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, timeperiod=t)
+            informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
+            informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t)

            bollinger = qtpylib.bollinger_bands(
                qtpylib.typical_price(informative), window=t, stds=2.2
            )
-            informative[f"{coin}bb_lowerband-period_{t}"] = bollinger["lower"]
-            informative[f"{coin}bb_middleband-period_{t}"] = bollinger["mid"]
-            informative[f"{coin}bb_upperband-period_{t}"] = bollinger["upper"]
+            informative[f"{pair}bb_lowerband-period_{t}"] = bollinger["lower"]
+            informative[f"{pair}bb_middleband-period_{t}"] = bollinger["mid"]
+            informative[f"{pair}bb_upperband-period_{t}"] = bollinger["upper"]

-            informative[f"%-{coin}bb_width-period_{t}"] = (
-                informative[f"{coin}bb_upperband-period_{t}"]
-                - informative[f"{coin}bb_lowerband-period_{t}"]
-            ) / informative[f"{coin}bb_middleband-period_{t}"]
-            informative[f"%-{coin}close-bb_lower-period_{t}"] = (
-                informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
+            informative[f"%-{pair}bb_width-period_{t}"] = (
+                informative[f"{pair}bb_upperband-period_{t}"]
+                - informative[f"{pair}bb_lowerband-period_{t}"]
+            ) / informative[f"{pair}bb_middleband-period_{t}"]
+            informative[f"%-{pair}close-bb_lower-period_{t}"] = (
+                informative["close"] / informative[f"{pair}bb_lowerband-period_{t}"]
            )

-            informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
+            informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)

-            informative[f"%-{coin}relative_volume-period_{t}"] = (
+            informative[f"%-{pair}relative_volume-period_{t}"] = (
                informative["volume"] / informative["volume"].rolling(t).mean()
            )

-        informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
-        informative[f"%-{coin}raw_volume"] = informative["volume"]
-        informative[f"%-{coin}raw_price"] = informative["close"]
+        informative[f"%-{pair}pct-change"] = informative["close"].pct_change()
+        informative[f"%-{pair}raw_volume"] = informative["volume"]
+        informative[f"%-{pair}raw_price"] = informative["close"]

        indicators = [col for col in informative if col.startswith("%")]
        # This loop duplicates and shifts all indicators to add a sense of recency to data