update code to freqai_backtest_live_models only from historic predictions

2022-11-22 13:09:09 -03:00
parent fdc82af883
commit c01f25ddc9
9 changed files with 36 additions and 227 deletions
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -15,7 +15,6 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
 | `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk).
 | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
-| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. <br> Default: `True`
 | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
 | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
 | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -81,17 +81,9 @@ To save the models generated during a particular backtest so that you can start

 ### Backtest live models

-FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:
+FreqAI allow you to reuse live historic predictions through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study.

-1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster.
-2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
-
-The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.
-
-Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution.
-
-!!! Note
-    Currently, there is no checking for expired models, even if the `expired_hours` parameter is set.
+The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in historic predictions file.


 ### Downloading data to cover the full backtest period
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -715,7 +715,7 @@ class FreqaiDataDrawer:

        return corr_dataframes, base_dataframes

-    def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
+    def get_timerange_from_live_historic_predictions(self) -> TimeRange:
        """
        Returns timerange information based on historic predictions file
        :return: timerange calculated from saved live data
@@ -724,7 +724,6 @@ class FreqaiDataDrawer:
            raise OperationalException(
                'Historic predictions not found. Historic predictions data is required '
                'to run backtest with the freqai-backtest-live-models option '
-                'and backtest_using_historic_predictions config option as true'
            )

        self.load_historic_predictions_from_disk()
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1,7 +1,7 @@
 import copy
 import logging
 import shutil
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timezone
 from math import cos, sin
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -86,14 +86,7 @@ class FreqaiDataKitchen:
        if not self.live:
            self.full_path = self.get_full_models_path(self.config)

-            if self.backtest_live_models:
-                if self.pair and not (
-                    self.freqai_config.get("backtest_using_historic_predictions", True)
-                ):
-                    self.set_timerange_from_ready_models()
-                    (self.training_timeranges,
-                     self.backtesting_timeranges) = self.split_timerange_live_models()
-            else:
+            if not self.backtest_live_models:
                self.full_timerange = self.create_fulltimerange(
                    self.config["timerange"], self.freqai_config.get("train_period_days", 0)
                )
@@ -458,28 +451,28 @@ class FreqaiDataKitchen:
        # print(tr_training_list, tr_backtesting_list)
        return tr_training_list_timerange, tr_backtesting_list_timerange

-    def split_timerange_live_models(
-        self
-    ) -> Tuple[list, list]:
+    # def split_timerange_live_models(
+    #     self
+    # ) -> Tuple[list, list]:

-        tr_backtesting_list_timerange = []
-        asset = self.pair.split("/")[0]
-        if asset not in self.backtest_live_models_data["assets_end_dates"]:
-            raise OperationalException(
-                f"Model not available for pair {self.pair}. "
-                "Please, try again after removing this pair from the configuration file."
-            )
-        asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
-        backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
-        model_end_dates = [x for x in asset_data]
-        model_end_dates.append(backtesting_timerange.stopts)
-        model_end_dates.sort()
-        for index, item in enumerate(model_end_dates):
-            if len(model_end_dates) > (index + 1):
-                tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
-                tr_backtesting_list_timerange.append(tr_to_add)
+    #     tr_backtesting_list_timerange = []
+    #     asset = self.pair.split("/")[0]
+    #     if asset not in self.backtest_live_models_data["assets_end_dates"]:
+    #         raise OperationalException(
+    #             f"Model not available for pair {self.pair}. "
+    #             "Please, try again after removing this pair from the configuration file."
+    #         )
+    #     asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
+    #     backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
+    #     model_end_dates = [x for x in asset_data]
+    #     model_end_dates.append(backtesting_timerange.stopts)
+    #     model_end_dates.sort()
+    #     for index, item in enumerate(model_end_dates):
+    #         if len(model_end_dates) > (index + 1):
+    #             tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
+    #             tr_backtesting_list_timerange.append(tr_to_add)

-        return tr_backtesting_list_timerange, tr_backtesting_list_timerange
+    #     return tr_backtesting_list_timerange, tr_backtesting_list_timerange

    def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
        """
@@ -1371,17 +1364,6 @@ class FreqaiDataKitchen:
            )
            return False

-    def set_timerange_from_ready_models(self):
-        backtesting_timerange, \
-            assets_end_dates = (
-                self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
-
-        self.backtest_live_models_data = {
-            "backtesting_timerange": backtesting_timerange,
-            "assets_end_dates": assets_end_dates
-            }
-        return
-
    def get_full_models_path(self, config: Config) -> Path:
        """
        Returns default FreqAI model path
@@ -1392,88 +1374,6 @@ class FreqaiDataKitchen:
            config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
        )

-    def get_timerange_and_assets_end_dates_from_ready_models(
-            self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
-        """
-        Returns timerange information based on a FreqAI model directory
-        :param models_path: FreqAI model path
-
-        :return: a Tuple with (Timerange calculated from directory and
-        a Dict with pair and model end training dates info)
-        """
-        all_models_end_dates = []
-        assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models(
-                                           models_path)
-        for key in assets_end_dates:
-            for model_end_date in assets_end_dates[key]:
-                if model_end_date not in all_models_end_dates:
-                    all_models_end_dates.append(model_end_date)
-
-        if len(all_models_end_dates) == 0:
-            raise OperationalException(
-                'At least 1 saved model is required to '
-                'run backtest with the freqai-backtest-live-models option'
-            )
-
-        if len(all_models_end_dates) == 1:
-            logger.warning(
-                "Only 1 model was found. Backtesting will run with the "
-                "timerange from the end of the training date to the current date"
-            )
-
-        finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
-        if len(all_models_end_dates) > 1:
-            # After last model end date, use the same period from previous model
-            # to finish the backtest
-            all_models_end_dates.sort(reverse=True)
-            finish_timestamp = all_models_end_dates[0] + \
-                (all_models_end_dates[0] - all_models_end_dates[1])
-
-        all_models_end_dates.append(finish_timestamp)
-        all_models_end_dates.sort()
-        start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates),
-                      timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
-        end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates),
-                    timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
-
-        # add 1 day to string timerange to ensure BT module will load all dataframe data
-        end_date = end_date + timedelta(days=1)
-        backtesting_timerange = TimeRange(
-            'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
-        )
-        return backtesting_timerange, assets_end_dates
-
-    def get_assets_timestamps_training_from_ready_models(
-            self, models_path: Path) -> Dict[str, Any]:
-        """
-        Scan the models path and returns all assets end training dates (timestamp)
-        :param models_path: FreqAI model path
-
-        :return: a Dict with asset and model end training dates info
-        """
-        assets_end_dates: Dict[str, Any] = {}
-        if not models_path.is_dir():
-            raise OperationalException(
-                'Model folders not found. Saved models are required '
-                'to run backtest with the freqai-backtest-live-models option'
-            )
-        for model_dir in models_path.iterdir():
-            if str(model_dir.name).startswith("sub-train"):
-                model_end_date = int(model_dir.name.split("_")[1])
-                asset = model_dir.name.split("_")[0].replace("sub-train-", "")
-                model_file_name = (
-                    f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
-                    "_model.joblib"
-                )
-
-                model_path_file = Path(model_dir / model_file_name)
-                if model_path_file.is_file():
-                    if asset not in assets_end_dates:
-                        assets_end_dates[asset] = []
-                    assets_end_dates[asset].append(model_end_date)
-
-        return assets_end_dates
-
    def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
        """
        Remove all special characters from feature strings (:)
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -68,10 +68,6 @@ class IFreqaiModel(ABC):
        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
        if self.save_backtest_models:
            logger.info('Backtesting module configured to save all models.')
-        self.backtest_using_historic_predictions: bool = self.freqai_info.get(
-            "backtest_using_historic_predictions", True)
-        if self.backtest_using_historic_predictions:
-            logger.info('Backtesting live models configured to use historic predictions.')

        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        # set current candle to arbitrary historical date
@@ -148,23 +144,18 @@ class IFreqaiModel(ABC):
        elif not self.follow_mode:
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
            if self.dk.backtest_live_models:
-                if self.backtest_using_historic_predictions:
                logger.info(
                    "Backtesting using historic predictions (live models)")
-                else:
-                    logger.info(
-                        f"Backtesting {len(self.dk.backtesting_timeranges)} "
-                        "timeranges (live models)")
            else:
                logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
            dataframe = self.dk.use_strategy_to_populate_indicators(
                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
            )
-            if not self.backtest_using_historic_predictions:
+            if not self.config.get("freqai_backtest_live_models", False):
                dk = self.start_backtesting(dataframe, metadata, self.dk)
                dataframe = dk.remove_features_from_df(dk.return_dataframe)
            else:
-                dk = self.start_backtesting_from_live_saved_files(
+                dk = self.start_backtesting_from_historic_predictions(
                    dataframe, metadata, self.dk)
                dataframe = dk.return_dataframe

@@ -330,7 +321,7 @@ class IFreqaiModel(ABC):

        return dk

-    def start_backtesting_from_live_saved_files(
+    def start_backtesting_from_historic_predictions(
        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
    ) -> FreqaiDataKitchen:
        """
--- a/freqtrade/freqai/utils.py
+++ b/freqtrade/freqai/utils.py
@@ -230,11 +230,6 @@ def get_timerange_backtest_live_models(config: Config) -> str:
    """
    dk = FreqaiDataKitchen(config)
    models_path = dk.get_full_models_path(config)
-    timerange: TimeRange = TimeRange()
-    if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
-        timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
-    else:
    dd = FreqaiDataDrawer(models_path, config)
-        timerange = dd.get_timerange_from_backtesting_live_dataframe()
-
+    timerange = dd.get_timerange_from_live_historic_predictions()
    return timerange.timerange_str
--- a/tests/freqai/test_freqai_backtesting.py
+++ b/tests/freqai/test_freqai_backtesting.py
@@ -81,7 +81,7 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda
    bt_config = setup_optimize_configuration(args, RunMode.BACKTEST)

    with pytest.raises(OperationalException,
-                       match=r".* Saved models are required to run backtest .*"):
+                       match=r".* Historic predictions data is required to run backtest .*"):
        Backtesting(bt_config)

    Backtesting.cleanup()
--- a/tests/freqai/test_freqai_datadrawer.py
+++ b/tests/freqai/test_freqai_datadrawer.py
@@ -98,7 +98,7 @@ def test_use_strategy_to_populate_indicators(mocker, freqai_conf):
    shutil.rmtree(Path(freqai.dk.full_path))


-def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
+def test_get_timerange_from_live_historic_predictions(mocker, freqai_conf):
    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
    exchange = get_patched_exchange(mocker, freqai_conf)
    strategy.dp = DataProvider(freqai_conf, exchange)
@@ -115,7 +115,7 @@ def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
    freqai.dd.save_historic_predictions_to_disk()
    freqai.dd.save_global_metadata_to_disk({"start_dry_live_date": 1516406400})

-    timerange = freqai.dd.get_timerange_from_backtesting_live_dataframe()
+    timerange = freqai.dd.get_timerange_from_live_historic_predictions()
    assert timerange.startts == 1516406400
    assert timerange.stopts == 1517356500

@@ -129,4 +129,4 @@ def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_co
            OperationalException,
            match=r'Historic predictions not found.*'
            ):
-        freqai.dd.get_timerange_from_backtesting_live_dataframe()
+        freqai.dd.get_timerange_from_live_historic_predictions()
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -9,7 +9,6 @@ from freqtrade.configuration import TimeRange
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.utils import get_timerange_backtest_live_models
 from tests.conftest import get_patched_exchange, log_has_re
 from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
                                   make_data_dictionary, make_unfiltered_dataframe)
@@ -166,72 +165,6 @@ def test_make_train_test_datasets(mocker, freqai_conf):
    assert len(data_dictionary['train_features'].index) == 1916


-def test_get_pairs_timestamp_validation(mocker, freqai_conf):
-    exchange = get_patched_exchange(mocker, freqai_conf)
-    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
-    strategy.dp = DataProvider(freqai_conf, exchange)
-    strategy.freqai_info = freqai_conf.get("freqai", {})
-    freqai = strategy.freqai
-    freqai.live = True
-    freqai.dk = FreqaiDataKitchen(freqai_conf)
-    freqai_conf['freqai'].update({"identifier": "invalid_id"})
-    model_path = freqai.dk.get_full_models_path(freqai_conf)
-    with pytest.raises(
-            OperationalException,
-            match=r'.*required to run backtest with the freqai-backtest-live-models.*'
-            ):
-        freqai.dk.get_assets_timestamps_training_from_ready_models(model_path)
-
-
-@pytest.mark.parametrize('model', [
-    'LightGBMRegressor'
-    ])
-def test_get_timerange_from_ready_models(mocker, freqai_conf, model):
-    freqai_conf.update({"freqaimodel": model})
-    freqai_conf.update({"timerange": "20180110-20180130"})
-    freqai_conf.update({"strategy": "freqai_test_strat"})
-    freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False})
-
-    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
-    exchange = get_patched_exchange(mocker, freqai_conf)
-    strategy.dp = DataProvider(freqai_conf, exchange)
-    strategy.freqai_info = freqai_conf.get("freqai", {})
-    freqai = strategy.freqai
-    freqai.live = True
-    freqai.dk = FreqaiDataKitchen(freqai_conf)
-    timerange = TimeRange.parse_timerange("20180101-20180130")
-    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
-
-    freqai.dd.pair_dict = MagicMock()
-
-    data_load_timerange = TimeRange.parse_timerange("20180101-20180130")
-
-    # 1516233600 (2018-01-18 00:00) - Start Training 1
-    # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1)
-    # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2)
-    # 1516838400 (2018-01-25 00:00) - End Timerange
-
-    new_timerange = TimeRange("date", "date", 1516233600, 1516406400)
-    freqai.extract_data_and_train_model(
-        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
-
-    new_timerange = TimeRange("date", "date", 1516406400, 1516579200)
-    freqai.extract_data_and_train_model(
-        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
-
-    model_path = freqai.dk.get_full_models_path(freqai_conf)
-    (backtesting_timerange,
-     pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models(
-                        models_path=model_path)
-
-    assert len(pairs_end_dates["ADA"]) == 2
-    assert backtesting_timerange.startts == 1516406400
-    assert backtesting_timerange.stopts == 1516838400
-
-    backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf)
-    assert backtesting_string_timerange == '20180120-20180125'
-
-
@pytest.mark.parametrize('model', [
    'LightGBMRegressor'
    ])