Merge pull request #7325 from wagnercosta/develop

Improve Freqai backtesting performance
2022-09-03 19:28:04 +02:00
parent 80b5f035ab 599c1c79fb
commit 63514b0443
6 changed files with 121 additions and 47 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -69,6 +69,8 @@ class FreqaiDataKitchen:
        self.label_list: List = []
        self.training_features_list: List = []
        self.model_filename: str = ""
+        self.backtesting_results_path = Path()
+        self.backtest_predictions_folder: str = "backtesting_predictions"
        self.live = live
        self.pair = pair

@@ -778,9 +780,10 @@ class FreqaiDataKitchen:
        weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
        return weights

-    def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
+    def get_predictions_to_append(self, predictions: DataFrame,
+                                  do_predict: npt.ArrayLike) -> DataFrame:
        """
-        Append backtest prediction from current backtest period to all previous periods
+        Get backtest prediction from current backtest period
        """

        append_df = DataFrame()
@@ -795,13 +798,18 @@ class FreqaiDataKitchen:
        if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
            append_df["DI_values"] = self.DI_values

+        return append_df
+
+    def append_predictions(self, append_df: DataFrame) -> None:
+        """
+        Append backtest prediction from current backtest period to all previous periods
+        """
+
        if self.full_df.empty:
            self.full_df = append_df
        else:
            self.full_df = pd.concat([self.full_df, append_df], axis=0)

-        return
-
    def fill_predictions(self, dataframe):
        """
        Back fill values to before the backtesting range so that the dataframe matches size
@@ -1060,3 +1068,50 @@ class FreqaiDataKitchen:
        if self.unique_classes:
            for label in self.unique_classes:
                self.unique_class_list += list(self.unique_classes[label])
+
+    def save_backtesting_prediction(
+        self, append_df: DataFrame
+    ) -> None:
+
+        """
+        Save prediction dataframe from backtesting to h5 file format
+        :param append_df: dataframe for backtesting period
+        """
+        full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder)
+        if not full_predictions_folder.is_dir():
+            full_predictions_folder.mkdir(parents=True, exist_ok=True)
+
+        append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
+
+    def get_backtesting_prediction(
+        self
+    ) -> DataFrame:
+
+        """
+        Get prediction dataframe from h5 file format
+        """
+        append_df = pd.read_hdf(self.backtesting_results_path)
+        return append_df
+
+    def check_if_backtest_prediction_exists(
+        self
+    ) -> bool:
+        """
+        Check if a backtesting prediction already exists
+        :param dk: FreqaiDataKitchen
+        :return:
+        :boolean: whether the prediction file exists or not.
+        """
+        path_to_predictionfile = Path(self.full_path /
+                                      self.backtest_predictions_folder /
+                                      f"{self.model_filename}_prediction.h5")
+        self.backtesting_results_path = path_to_predictionfile
+
+        file_exists = path_to_predictionfile.is_file()
+        if file_exists:
+            logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
+        else:
+            logger.info(
+                f"Could not find backtesting prediction file at {path_to_predictionfile}"
+            )
+        return file_exists
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -71,6 +71,9 @@ class IFreqaiModel(ABC):
        self.first = True
        self.set_full_path()
        self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
+        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
+        if self.save_backtest_models:
+            logger.info('Backtesting module configured to save all models.')
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
@@ -124,10 +127,9 @@ class IFreqaiModel(ABC):
        elif not self.follow_mode:
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
            logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
-            with self.analysis_lock:
-                dataframe = self.dk.use_strategy_to_populate_indicators(
-                    strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
-                )
+            dataframe = self.dk.use_strategy_to_populate_indicators(
+                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
+            )
            dk = self.start_backtesting(dataframe, metadata, self.dk)

        dataframe = dk.remove_features_from_df(dk.return_dataframe)
@@ -224,28 +226,39 @@ class IFreqaiModel(ABC):
                "trains"
            )

+            trained_timestamp_int = int(trained_timestamp.stopts)
            dk.data_path = Path(
                dk.full_path
                /
-                f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
+                f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                )
-            if not self.model_exists(
-                metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
-            ):
-                dk.find_features(dataframe_train)
-                self.model = self.train(dataframe_train, metadata["pair"], dk)
-                self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
-                    trained_timestamp.stopts)
-                dk.set_new_model_names(metadata["pair"], trained_timestamp)
-                self.dd.save_data(self.model, metadata["pair"], dk)
+
+            dk.set_new_model_names(metadata["pair"], trained_timestamp)
+
+            if dk.check_if_backtest_prediction_exists():
+                append_df = dk.get_backtesting_prediction()
+                dk.append_predictions(append_df)
            else:
-                self.model = self.dd.load_data(metadata["pair"], dk)
+                if not self.model_exists(
+                    metadata["pair"], dk, trained_timestamp=trained_timestamp_int
+                ):
+                    dk.find_features(dataframe_train)
+                    self.model = self.train(dataframe_train, metadata["pair"], dk)
+                    self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
+                        trained_timestamp.stopts)

-            self.check_if_feature_list_matches_strategy(dataframe_train, dk)
+                    if self.save_backtest_models:
+                        logger.info('Saving backtest model to disk.')
+                        self.dd.save_data(self.model, metadata["pair"], dk)
+                else:
+                    self.model = self.dd.load_data(metadata["pair"], dk)

-            pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                self.check_if_feature_list_matches_strategy(dataframe_train, dk)

-            dk.append_predictions(pred_df, do_preds)
+                pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                append_df = dk.get_predictions_to_append(pred_df, do_preds)
+                dk.append_predictions(append_df)
+                dk.save_backtesting_prediction(append_df)

        dk.fill_predictions(dataframe)

@@ -456,11 +469,6 @@ class IFreqaiModel(ABC):
        :return:
        :boolean: whether the model file exists or not.
        """
-        coin, _ = pair.split("/")
-
-        if not self.live:
-            dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
-
        path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
        file_exists = path_to_modelfile.is_file()
        if file_exists and not scanning: