save_live_data_backtest - added docs and tests

This commit is contained in:
Wagner Costa Santos 2022-11-17 15:20:07 -03:00
parent 99bff9cbfa
commit 3903b04d3f
6 changed files with 114 additions and 11 deletions

View File

@ -15,6 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire). | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
| `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk). | `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk).
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer. | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.

View File

@ -81,7 +81,10 @@ To save the models generated during a particular backtest so that you can start
### Backtest live models ### Backtest live models
FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `False` in the config. FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:
1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster.
2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.

View File

@ -1541,14 +1541,16 @@ class FreqaiDataKitchen:
if self.backtesting_live_model_path.is_file(): if self.backtesting_live_model_path.is_file():
saved_dataframe = self.get_backtesting_live_dataframe() saved_dataframe = self.get_backtesting_live_dataframe()
concat_dataframe = pd.concat([saved_dataframe, last_row_df]) concat_dataframe = pd.concat([saved_dataframe, last_row_df])
concat_dataframe.reset_index(drop=True).to_feather( self.save_backtesting_live_dataframe_to_feather(concat_dataframe)
self.backtesting_live_model_path, compression_level=9, compression='lz4')
else: else:
last_row_df.reset_index(drop=True).to_feather( self.save_backtesting_live_dataframe_to_feather(last_row_df)
self.backtesting_live_model_path, compression_level=9, compression='lz4')
shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame):
dataframe.reset_index(drop=True).to_feather(
self.backtesting_live_model_path, compression_level=9, compression='lz4')
def get_backtesting_live_dataframe( def get_backtesting_live_dataframe(
self self
) -> DataFrame: ) -> DataFrame:

View File

@ -694,7 +694,8 @@ class IFreqaiModel(ABC):
for label in full_labels: for label in full_labels:
if self.dd.historic_predictions[dk.pair][label].dtype == object: if self.dd.historic_predictions[dk.pair][label].dtype == object:
continue continue
f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) f = spy.stats.norm.fit(
self.dd.historic_predictions[dk.pair][label].fillna(0).tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
return return
@ -882,11 +883,7 @@ class IFreqaiModel(ABC):
if index >= fit_live_predictions_candles: if index >= fit_live_predictions_candles:
self.dd.historic_predictions[self.dk.pair] = ( self.dd.historic_predictions[self.dk.pair] = (
dk.full_df.iloc[index - fit_live_predictions_candles:index]) dk.full_df.iloc[index - fit_live_predictions_candles:index])
else:
self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index]
self.fit_live_predictions(self.dk, self.dk.pair) self.fit_live_predictions(self.dk, self.dk.pair)
if index >= fit_live_predictions_candles:
for label in label_columns: for label in label_columns:
if dk.full_df[label].dtype == object: if dk.full_df[label].dtype == object:
continue continue
@ -899,6 +896,7 @@ class IFreqaiModel(ABC):
for extra_col in self.dk.data["extra_returns_per_train"]: for extra_col in self.dk.data["extra_returns_per_train"]:
dk.full_df.at[index, f"{extra_col}"] = ( dk.full_df.at[index, f"{extra_col}"] = (
self.dk.data["extra_returns_per_train"][extra_col]) self.dk.data["extra_returns_per_train"][extra_col])
return return
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.

View File

@ -259,3 +259,47 @@ def test_get_full_model_path(mocker, freqai_conf, model):
model_path = freqai.dk.get_full_models_path(freqai_conf) model_path = freqai.dk.get_full_models_path(freqai_conf)
assert model_path.is_dir() is True assert model_path.is_dir() is True
def test_save_backtesting_live_dataframe(mocker, freqai_conf):
freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
dataframe_without_last_candle = dataframe.copy()
dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True)
freqai_conf.update({"save_live_data_backtest": True})
freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC")
saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
assert len(saved_dataframe) == 1
assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0]
freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC")
saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
assert len(saved_dataframe) == 2
assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0]
assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0]
def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai_conf.update({"save_live_data_backtest": True})
freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe)
timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe()
assert timerange.startts == 1516406400
assert timerange.stopts == 1517356500
def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf):
freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
with pytest.raises(
OperationalException,
match=r'Saved live data not found.*'
):
freqai.dk.get_timerange_from_backtesting_live_dataframe()
def test_saved_live_bt_file_not_found(mocker, freqai_conf):
freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
with pytest.raises(
OperationalException,
match=r'.*live backtesting dataframe file not found.*'
):
freqai.dk.get_backtesting_live_dataframe()

View File

@ -300,6 +300,61 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog):
freqai_conf.update({"save_live_data_backtest": True})
freqai_conf.update({"freqai_backtest_live_models": True})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"}
# create a dummy live dataframe file with 10 rows
dataframe_predictions = df.tail(10).copy()
dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1
freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions)
freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk)
assert len(freqai.dk.return_dataframe) == len(df)
assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == (
len(dataframe_predictions))
shutil.rmtree(Path(freqai.dk.full_path))
def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180128-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180129-20180130")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
freqai.dk.pair = "ADA/BTC"
freqai.dk.full_df = df
assert "&-s_close_mean" not in freqai.dk.full_df.columns
assert "&-s_close_std" not in freqai.dk.full_df.columns
freqai.backtesting_fit_live_predictions(freqai.dk)
assert "&-s_close_mean" in freqai.dk.full_df.columns
assert "&-s_close_std" in freqai.dk.full_df.columns
shutil.rmtree(Path(freqai.dk.full_path))
def test_follow_mode(mocker, freqai_conf): def test_follow_mode(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})