refactoring freqai backtesting

2022-08-31 11:23:48 -03:00 · 2022-08-31 11:23:48 -03:00 · df51da22ee
commit df51da22ee
parent 4aec2db14d
2 changed files with 119 additions and 16 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -1,6 +1,7 @@
 import copy
 import datetime
 import logging
 import os
 import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@ -780,9 +781,10 @@ class FreqaiDataKitchen:
        weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
        return weights
-    def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
+    def get_predictions_to_append(self, predictions: DataFrame,
                                  do_predict: npt.ArrayLike) -> DataFrame:
        """
-        Append backtest prediction from current backtest period to all previous periods
+        Get backtest prediction from current backtest period
        """
        append_df = DataFrame()
@ -797,12 +799,19 @@ class FreqaiDataKitchen:
        if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
            append_df["DI_values"] = self.DI_values
        return append_df
    def append_predictions(self, append_df: DataFrame) -> None:
        """
        Append backtest prediction from current backtest period to all previous periods
        """
        if self.full_df.empty:
            self.full_df = append_df
        else:
            self.full_df = pd.concat([self.full_df, append_df], axis=0)
-        return
+        return append_df
    def fill_predictions(self, dataframe):
        """
@ -1089,3 +1098,25 @@ class FreqaiDataKitchen:
        if self.unique_classes:
            for label in self.unique_classes:
                self.unique_class_list += list(self.unique_classes[label])
    def save_backtesting_prediction(
        self, file_name: str, root_folder: str, append_df: DataFrame
    ) -> None:
        """
        Save prediction dataframe from backtesting to h5 file format
        :param file_name: h5 file name
        :param root_folder: folder to save h5 file
        """
        os.makedirs(root_folder, exist_ok=True)
        append_df.to_hdf(file_name, key='append_df', mode='w')
    def get_backtesting_prediction(self, prediction_file_name: str) -> DataFrame:
        """
        Retrive from disk the prediction dataframe
        :param prediction_file_name: prediction file full path
        :return:
        :Dataframe: Backtesting prediction from current backtesting period
        """
        append_df = pd.read_hdf(prediction_file_name)
        return append_df
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -224,28 +224,50 @@ class IFreqaiModel(ABC):
                "trains"
            )
            trained_timestamp_int = int(trained_timestamp.stopts)
            dk.data_path = Path(
                dk.full_path
                /
-                f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
+                f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                )
-            if not self.model_exists(
+
-                metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
+            if self.backtest_prediction_exists(
                metadata["pair"], dk, trained_timestamp=trained_timestamp_int
            ):
-                dk.find_features(dataframe_train)
+                prediction_filename, _ = self.get_backtesting_prediction_file_name(
-                self.model = self.train(dataframe_train, metadata["pair"], dk)
+                    metadata["pair"],
-                self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
+                    dk,
-                    trained_timestamp.stopts)
+                    trained_timestamp=int(trained_timestamp.stopts))
-                dk.set_new_model_names(metadata["pair"], trained_timestamp)
+
-                self.dd.save_data(self.model, metadata["pair"], dk)
+                append_df = dk.get_backtesting_prediction(prediction_filename)
                dk.append_predictions(append_df)
            else:
-                self.model = self.dd.load_data(metadata["pair"], dk)
+                if not self.model_exists(
                    metadata["pair"], dk, trained_timestamp=trained_timestamp_int
                ):
                    dk.find_features(dataframe_train)
                    self.model = self.train(dataframe_train, metadata["pair"], dk)
                    self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
                        trained_timestamp.stopts)
                    dk.set_new_model_names(metadata["pair"], trained_timestamp)
                    self.dd.save_data(self.model, metadata["pair"], dk)
                else:
                    self.model = self.dd.load_data(metadata["pair"], dk)
-            self.check_if_feature_list_matches_strategy(dataframe_train, dk)
+                self.check_if_feature_list_matches_strategy(dataframe_train, dk)
-            pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                pred_df, do_preds = self.predict(dataframe_backtest, dk)
                append_df = dk.get_predictions_to_append(pred_df, do_preds)
                dk.append_predictions(append_df)
-            dk.append_predictions(pred_df, do_preds)
+                prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
                    metadata["pair"],
                    dk,
                    trained_timestamp_int)
                dk.save_backtesting_prediction(prediction_file_name,
                                               root_prediction,
                                               append_df)
        dk.fill_predictions(dataframe)
@ -643,6 +665,56 @@ class IFreqaiModel(ABC):
                self.train_time = 0
        return
    def backtest_prediction_exists(
        self,
        pair: str,
        dk: FreqaiDataKitchen,
        trained_timestamp: int,
        scanning: bool = False,
    ) -> bool:
        """
        Given a pair and path, check if a backtesting prediction already exists
        :param pair: pair e.g. BTC/USD
        :param path: path to prediction
        :return:
        :boolean: whether the prediction file exists or not.
        """
        if not self.live:
            prediction_file_name, _ = self.get_backtesting_prediction_file_name(
                pair, dk, trained_timestamp
            )
            path_to_predictionfile = Path(prediction_file_name)
            file_exists = path_to_predictionfile.is_file()
            if file_exists and not scanning:
                logger.info("Found backtesting prediction file at %s", prediction_file_name)
            elif not scanning:
                logger.info(
                    "Could not find backtesting prediction file at %s", prediction_file_name
                )
            return file_exists
        else:
            return False
    def get_backtesting_prediction_file_name(
        self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int
    ):
        """
        Given a pair, path and a trained timestamp,
        returns the path and name of the predictions file
        :param pair: pair e.g. BTC/USD
        :param dk: FreqaiDataKitchen
        :trained_timestamp: current backtesting timestamp period
        :return:
        :str: prediction file name
        :str: prediction root path
        """
        coin, _ = pair.split("/")
        prediction_base_filename = f"{coin.lower()}_{trained_timestamp}"
        root_prediction = f'{dk.full_path}/backtesting_predictions'
        prediction_file_name = f"{root_prediction}/{prediction_base_filename}_predictions.h5"
        return prediction_file_name, root_prediction
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.