update code to use historic_predictions for freqai_backtest_live_models

2022-11-19 14:15:58 -03:00
parent 3d3195847c
commit 80d070e9ee
8 changed files with 86 additions and 174 deletions
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
 | `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk).
 | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
-| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
+| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. <br> Default: `True`
 | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
 | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
 | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -83,8 +83,8 @@ To save the models generated during a particular backtest so that you can start

 FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:

-1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster.
-2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
+1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster.
+2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.

 The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.

--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -81,6 +81,7 @@ class FreqaiDataDrawer:
        self.historic_predictions_bkp_path = Path(
            self.full_path / "historic_predictions.backup.pkl")
        self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
+        self.global_metadata_path = Path(self.full_path / "global_metadata.json")
        self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
        self.follow_mode = follow_mode
        if follow_mode:
@@ -125,6 +126,17 @@ class FreqaiDataDrawer:
        self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
        self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)

+    def load_global_metadata_from_disk(self):
+        """
+        Locate and load a previously saved global metadata in present model folder.
+        """
+        exists = self.global_metadata_path.is_file()
+        if exists:
+            with open(self.global_metadata_path, "r") as fp:
+                metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
+                return metatada_dict
+        return {}
+
    def load_drawer_from_disk(self):
        """
        Locate and load a previously saved data drawer full of all pair model metadata in
@@ -225,6 +237,15 @@ class FreqaiDataDrawer:
            rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
                           number_mode=rapidjson.NM_NATIVE)

+    def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
+        """
+        Save global metadata json to disk
+        """
+        with self.save_lock:
+            with open(self.global_metadata_path, 'w') as fp:
+                rapidjson.dump(metadata, fp, default=self.np_encoder,
+                               number_mode=rapidjson.NM_NATIVE)
+
    def create_follower_dict(self):
        """
        Create or dictionary for each follower to maintain unique persistent prediction targets
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
-from pandas import DataFrame, read_feather
+from pandas import DataFrame
 from scipy import stats
 from sklearn import linear_model
 from sklearn.cluster import DBSCAN
@@ -74,9 +74,6 @@ class FreqaiDataKitchen:
        self.training_features_list: List = []
        self.model_filename: str = ""
        self.backtesting_results_path = Path()
-        self.backtesting_live_model_folder_path = Path()
-        self.backtesting_live_model_path = Path()
-        self.backtesting_live_model_bkp_path = Path()
        self.backtest_predictions_folder: str = "backtesting_predictions"
        self.live = live
        self.pair = pair
@@ -90,7 +87,9 @@ class FreqaiDataKitchen:
            self.full_path = self.get_full_models_path(self.config)

            if self.backtest_live_models:
-                if self.pair:
+                if self.pair and not (
+                    self.freqai_config.get("backtest_using_historic_predictions", True)
+                ):
                    self.set_timerange_from_ready_models()
                    (self.training_timeranges,
                     self.backtesting_timeranges) = self.split_timerange_live_models()
@@ -1488,101 +1487,30 @@ class FreqaiDataKitchen:

        return dataframe

-    def set_backtesting_live_dataframe_folder_path(
-        self
-    ) -> None:
-        """
-        Set live backtesting dataframe path
-        :param pair: current pair
-        """
-        self.backtesting_live_model_folder_path = Path(
-            self.full_path / self.backtest_predictions_folder / "live_data")
-
-    def set_backtesting_live_dataframe_path(
-        self, pair: str
-    ) -> None:
-        """
-        Set live backtesting dataframe path
-        :param pair: current pair
-        """
-        self.set_backtesting_live_dataframe_folder_path()
-        if not self.backtesting_live_model_folder_path.is_dir():
-            self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True)
-
-        pair_path = pair.split(":")[0].replace("/", "_").lower()
-        file_name = f"live_backtesting_{pair_path}.feather"
-        self.backtesting_live_model_path = Path(
-            self.full_path /
-            self.backtesting_live_model_folder_path /
-            file_name)
-        self.backtesting_live_model_bkp_path = Path(
-            self.full_path /
-            self.backtesting_live_model_folder_path /
-            file_name.replace(".feather", ".backup.feather"))
-
-    def save_backtesting_live_dataframe(
-        self, dataframe: DataFrame, pair: str
-    ) -> None:
-        """
-        Save live backtesting dataframe to feather file format
-        :param dataframe: current live dataframe
-        :param pair: current pair
-        """
-        self.set_backtesting_live_dataframe_path(pair)
-        last_row_df = dataframe.tail(1)
-        if self.backtesting_live_model_path.is_file():
-            saved_dataframe = self.get_backtesting_live_dataframe()
-            concat_dataframe = pd.concat([saved_dataframe, last_row_df])
-            self.save_backtesting_live_dataframe_to_feather(concat_dataframe)
-        else:
-            self.save_backtesting_live_dataframe_to_feather(last_row_df)
-
-        shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
-
-    def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame):
-        dataframe.reset_index(drop=True).to_feather(
-            self.backtesting_live_model_path, compression_level=9, compression='lz4')
-
-    def get_backtesting_live_dataframe(
-        self
-    ) -> DataFrame:
-        """
-        Get live backtesting dataframe from feather file format
-        return: saved dataframe from previous dry/run or live
-        """
-        if self.backtesting_live_model_path.is_file():
-            saved_dataframe = DataFrame()
-            try:
-                saved_dataframe = read_feather(self.backtesting_live_model_path)
-            except Exception:
-                saved_dataframe = read_feather(self.backtesting_live_model_bkp_path)
-            return saved_dataframe
-        else:
-            raise OperationalException(
-                "Saved live backtesting dataframe file not found."
-            )
-
    def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
        """
-        Returns timerange information based on live backtesting dataframe file
+        Returns timerange information based on historic predictions file
        :return: timerange calculated from saved live data
        """
-        all_assets_start_dates = []
-        all_assets_end_dates = []
-        self.set_backtesting_live_dataframe_folder_path()
-        if not self.backtesting_live_model_folder_path.is_dir():
+        from freqtrade.freqai.data_drawer import FreqaiDataDrawer
+        dd = FreqaiDataDrawer(Path(self.full_path), self.config)
+        if not dd.historic_predictions_path.is_file():
            raise OperationalException(
-                'Saved live data not found. Saved lived data is required '
+                'Historic predictions not found. Historic predictions data is required '
                'to run backtest with the freqai-backtest-live-models option '
-                'and save_live_data_backtest config option as true'
+                'and backtest_using_historic_predictions config option as true'
            )
-        for file_in_dir in self.backtesting_live_model_folder_path.iterdir():
-            if file_in_dir.is_file() and "backup" not in file_in_dir.name:
-                saved_dataframe = read_feather(file_in_dir)
-                all_assets_start_dates.append(saved_dataframe.date.min())
-                all_assets_end_dates.append(saved_dataframe.date.max())
-        start_date = min(all_assets_start_dates)
-        end_date = max(all_assets_end_dates)
+
+        dd.load_historic_predictions_from_disk()
+
+        all_pairs_end_dates = []
+        for pair in dd.historic_predictions:
+            pair_historic_data = dd.historic_predictions[pair]
+            all_pairs_end_dates.append(pair_historic_data.date_pred.max())
+
+        global_metadata = dd.load_global_metadata_from_disk()
+        start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"]))
+        end_date = max(all_pairs_end_dates)
        # add 1 day to string timerange to ensure BT module will load all dataframe data
        end_date = end_date + timedelta(days=1)
        backtesting_timerange = TimeRange(
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -53,6 +53,7 @@ class IFreqaiModel(ABC):
    def __init__(self, config: Config) -> None:

        self.config = config
+        self.metadata: Dict[str, Any] = {}
        self.assert_config(self.config)
        self.freqai_info: Dict[str, Any] = config["freqai"]
        self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
@@ -67,10 +68,10 @@ class IFreqaiModel(ABC):
        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
        if self.save_backtest_models:
            logger.info('Backtesting module configured to save all models.')
-        self.save_live_data_backtest: bool = self.freqai_info.get(
-            "save_live_data_backtest", False)
-        if self.save_live_data_backtest:
-            logger.info('Live configured to save data for backtest.')
+        self.backtest_using_historic_predictions: bool = self.freqai_info.get(
+            "backtest_using_historic_predictions", True)
+        if self.backtest_using_historic_predictions:
+            logger.info('Backtesting live models configured to use historic predictions.')

        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        # set current candle to arbitrary historical date
@@ -103,6 +104,7 @@ class IFreqaiModel(ABC):
        self.get_corr_dataframes: bool = True
        self._threads: List[threading.Thread] = []
        self._stop_event = threading.Event()
+        self.metadata = self.dd.load_global_metadata_from_disk()

        record_params(config, self.full_path)

@@ -136,6 +138,7 @@ class IFreqaiModel(ABC):
            self.inference_timer('start')
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
            dk = self.start_live(dataframe, metadata, strategy, self.dk)
+            dataframe = dk.remove_features_from_df(dk.return_dataframe)

        # For backtesting, each pair enters and then gets trained for each window along the
        # sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
@@ -145,14 +148,19 @@ class IFreqaiModel(ABC):
        elif not self.follow_mode:
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
            if self.dk.backtest_live_models:
-                logger.info(
-                    f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
+                if self.backtest_using_historic_predictions:
+                    logger.info(
+                        "Backtesting using historic predictions (live models)")
+                else:
+                    logger.info(
+                        f"Backtesting {len(self.dk.backtesting_timeranges)} "
+                        "timeranges (live models)")
            else:
                logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
            dataframe = self.dk.use_strategy_to_populate_indicators(
                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
            )
-            if not self.save_live_data_backtest:
+            if not self.backtest_using_historic_predictions:
                dk = self.start_backtesting(dataframe, metadata, self.dk)
                dataframe = dk.remove_features_from_df(dk.return_dataframe)
            else:
@@ -163,8 +171,7 @@ class IFreqaiModel(ABC):
        self.clean_up()
        if self.live:
            self.inference_timer('stop', metadata["pair"])
-            if self.save_live_data_backtest:
-                dk.save_backtesting_live_dataframe(dataframe, metadata["pair"])
+            self.set_start_dry_live_date(dataframe)

        return dataframe

@@ -335,14 +342,12 @@ class IFreqaiModel(ABC):
        """
        pair = metadata["pair"]
        dk.return_dataframe = dataframe
-        self.dk.set_backtesting_live_dataframe_path(pair)
-        saved_dataframe = self.dk.get_backtesting_live_dataframe()
-        columns_to_drop = list(set(dk.return_dataframe.columns).difference(
-            ["date", "open", "high", "low", "close", "volume"]))
-        saved_dataframe = saved_dataframe.drop(
-            columns=["open", "high", "low", "close", "volume"])
+        saved_dataframe = self.dd.historic_predictions[pair]
+        columns_to_drop = list(set(saved_dataframe.columns).intersection(
+            dk.return_dataframe.columns))
        dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
-        dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date')
+        dk.return_dataframe = pd.merge(
+            dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
        # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
        return dk

@@ -886,6 +891,22 @@ class IFreqaiModel(ABC):

        return

+    def update_metadata(self, metadata: Dict[str, Any]):
+        """
+        Update global metadata and save the updated json file
+        :param metadata: new global metadata dict
+        """
+        self.dd.save_global_metadata_to_disk(metadata)
+        self.metadata = metadata
+
+    def set_start_dry_live_date(self, live_dataframe: DataFrame):
+        key_name = "start_dry_live_date"
+        if key_name not in self.metadata:
+            metadata = self.metadata
+            metadata[key_name] = int(
+                pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
+            self.update_metadata(metadata)
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.

--- a/freqtrade/freqai/utils.py
+++ b/freqtrade/freqai/utils.py
@@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str:
    dk = FreqaiDataKitchen(config)
    models_path = dk.get_full_models_path(config)
    timerange: TimeRange = TimeRange()
-    if not config.get("save_live_data_backtest", False):
+    if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
        timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
    else:
        timerange = dk.get_timerange_from_backtesting_live_dataframe()
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -261,45 +261,18 @@ def test_get_full_model_path(mocker, freqai_conf, model):
    assert model_path.is_dir() is True


-def test_save_backtesting_live_dataframe(mocker, freqai_conf):
-    freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
-    dataframe_without_last_candle = dataframe.copy()
-    dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True)
-    freqai_conf.update({"save_live_data_backtest": True})
-    freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC")
-    saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
-    assert len(saved_dataframe) == 1
-    assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0]
-    freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC")
-    saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
-    assert len(saved_dataframe) == 2
-    assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0]
-    assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0]
-
-
 def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
    freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
-    freqai_conf.update({"save_live_data_backtest": True})
-    freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
-    freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe)
+    freqai_conf.update({"backtest_using_historic_predictions": True})
    timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe()
    assert timerange.startts == 1516406400
    assert timerange.stopts == 1517356500


-def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf):
+def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf):
    freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
    with pytest.raises(
            OperationalException,
-            match=r'Saved live data not found.*'
+            match=r'Historic predictions not found.*'
            ):
        freqai.dk.get_timerange_from_backtesting_live_dataframe()
-
-
-def test_saved_live_bt_file_not_found(mocker, freqai_conf):
-    freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
-    with pytest.raises(
-            OperationalException,
-            match=r'.*live backtesting dataframe file not found.*'
-            ):
-        freqai.dk.get_backtesting_live_dataframe()
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -300,37 +300,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
    shutil.rmtree(Path(freqai.dk.full_path))


-def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog):
-    freqai_conf.update({"save_live_data_backtest": True})
-    freqai_conf.update({"freqai_backtest_live_models": True})
-
-    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
-    exchange = get_patched_exchange(mocker, freqai_conf)
-    strategy.dp = DataProvider(freqai_conf, exchange)
-    strategy.freqai_info = freqai_conf.get("freqai", {})
-    freqai = strategy.freqai
-    freqai.live = False
-    freqai.dk = FreqaiDataKitchen(freqai_conf)
-    timerange = TimeRange.parse_timerange("20180110-20180130")
-    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
-    sub_timerange = TimeRange.parse_timerange("20180110-20180130")
-    corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
-    df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
-    metadata = {"pair": "ADA/BTC"}
-
-    # create a dummy live dataframe file with 10 rows
-    dataframe_predictions = df.tail(10).copy()
-    dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1
-    freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
-    freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions)
-
-    freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk)
-    assert len(freqai.dk.return_dataframe) == len(df)
-    assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == (
-        len(dataframe_predictions))
-    shutil.rmtree(Path(freqai.dk.full_path))
-
-
 def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
    freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10})
    strategy = get_patched_freqai_strategy(mocker, freqai_conf)