From 3903b04d3f42f4465d85165913e265c75ffc9f76 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 17 Nov 2022 15:20:07 -0300 Subject: [PATCH] save_live_data_backtest - added docs and tests --- docs/freqai-parameter-table.md | 1 + docs/freqai-running.md | 5 ++- freqtrade/freqai/data_kitchen.py | 10 +++-- freqtrade/freqai/freqai_interface.py | 10 ++--- tests/freqai/test_freqai_datakitchen.py | 44 ++++++++++++++++++++ tests/freqai/test_freqai_interface.py | 55 +++++++++++++++++++++++++ 6 files changed, 114 insertions(+), 11 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index c027a12b1..2961b1b8d 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -15,6 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire). | `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved). +| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. diff --git a/docs/freqai-running.md b/docs/freqai-running.md index d2f9595be..4c90a4885 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -81,7 +81,10 @@ To save the models generated during a particular backtest so that you can start ### Backtest live models -FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `False` in the config. +FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options: + +1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster. +2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ed78cfee5..d93060568 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1541,14 +1541,16 @@ class FreqaiDataKitchen: if self.backtesting_live_model_path.is_file(): saved_dataframe = self.get_backtesting_live_dataframe() concat_dataframe = pd.concat([saved_dataframe, last_row_df]) - concat_dataframe.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') + self.save_backtesting_live_dataframe_to_feather(concat_dataframe) else: - last_row_df.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') + self.save_backtesting_live_dataframe_to_feather(last_row_df) shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) + def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame): + dataframe.reset_index(drop=True).to_feather( + self.backtesting_live_model_path, compression_level=9, compression='lz4') + def get_backtesting_live_dataframe( self ) -> DataFrame: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 8d84d70c5..a0dac5725 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -694,7 +694,8 @@ class IFreqaiModel(ABC): for label in full_labels: if self.dd.historic_predictions[dk.pair][label].dtype == object: continue - f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) + f = spy.stats.norm.fit( + self.dd.historic_predictions[dk.pair][label].fillna(0).tail(num_candles)) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] return @@ -882,11 +883,7 @@ class IFreqaiModel(ABC): if index >= fit_live_predictions_candles: self.dd.historic_predictions[self.dk.pair] = ( dk.full_df.iloc[index - fit_live_predictions_candles:index]) - else: - self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index] - - self.fit_live_predictions(self.dk, self.dk.pair) - if index >= fit_live_predictions_candles: + self.fit_live_predictions(self.dk, self.dk.pair) for label in label_columns: if dk.full_df[label].dtype == object: continue @@ -899,6 +896,7 @@ class IFreqaiModel(ABC): for extra_col in self.dk.data["extra_returns_per_train"]: dk.full_df.at[index, f"{extra_col}"] = ( self.dk.data["extra_returns_per_train"][extra_col]) + return # Following methods which are overridden by user made prediction models. diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 9abe60edb..ca7c19c94 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -259,3 +259,47 @@ def test_get_full_model_path(mocker, freqai_conf, model): model_path = freqai.dk.get_full_models_path(freqai_conf) assert model_path.is_dir() is True + + +def test_save_backtesting_live_dataframe(mocker, freqai_conf): + freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + dataframe_without_last_candle = dataframe.copy() + dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True) + freqai_conf.update({"save_live_data_backtest": True}) + freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC") + saved_dataframe = freqai.dk.get_backtesting_live_dataframe() + assert len(saved_dataframe) == 1 + assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0] + freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC") + saved_dataframe = freqai.dk.get_backtesting_live_dataframe() + assert len(saved_dataframe) == 2 + assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0] + assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0] + + +def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): + freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai_conf.update({"save_live_data_backtest": True}) + freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") + freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe) + timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe() + assert timerange.startts == 1516406400 + assert timerange.stopts == 1517356500 + + +def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf): + freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) + with pytest.raises( + OperationalException, + match=r'Saved live data not found.*' + ): + freqai.dk.get_timerange_from_backtesting_live_dataframe() + + +def test_saved_live_bt_file_not_found(mocker, freqai_conf): + freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) + with pytest.raises( + OperationalException, + match=r'.*live backtesting dataframe file not found.*' + ): + freqai.dk.get_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 25bc99580..ed634de55 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -300,6 +300,61 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): shutil.rmtree(Path(freqai.dk.full_path)) +def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog): + freqai_conf.update({"save_live_data_backtest": True}) + freqai_conf.update({"freqai_backtest_live_models": True}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = False + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + sub_timerange = TimeRange.parse_timerange("20180110-20180130") + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + metadata = {"pair": "ADA/BTC"} + + # create a dummy live dataframe file with 10 rows + dataframe_predictions = df.tail(10).copy() + dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1 + freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") + freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions) + + freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk) + assert len(freqai.dk.return_dataframe) == len(df) + assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == ( + len(dataframe_predictions)) + shutil.rmtree(Path(freqai.dk.full_path)) + + +def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog): + freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = False + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180128-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + sub_timerange = TimeRange.parse_timerange("20180129-20180130") + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + freqai.dk.pair = "ADA/BTC" + freqai.dk.full_df = df + assert "&-s_close_mean" not in freqai.dk.full_df.columns + assert "&-s_close_std" not in freqai.dk.full_df.columns + freqai.backtesting_fit_live_predictions(freqai.dk) + assert "&-s_close_mean" in freqai.dk.full_df.columns + assert "&-s_close_std" in freqai.dk.full_df.columns + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_follow_mode(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"})