update code to freqai_backtest_live_models only from historic predictions

This commit is contained in:
Wagner Costa 2022-11-22 13:09:09 -03:00
parent fdc82af883
commit c01f25ddc9
9 changed files with 36 additions and 227 deletions

View File

@ -15,7 +15,6 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
| `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk).
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. <br> Default: `True`
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.

View File

@ -81,17 +81,9 @@ To save the models generated during a particular backtest so that you can start
### Backtest live models
FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:
FreqAI allow you to reuse live historic predictions through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study.
1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster.
2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.
Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution.
!!! Note
Currently, there is no checking for expired models, even if the `expired_hours` parameter is set.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in historic predictions file.
### Downloading data to cover the full backtest period

View File

@ -715,7 +715,7 @@ class FreqaiDataDrawer:
return corr_dataframes, base_dataframes
def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
def get_timerange_from_live_historic_predictions(self) -> TimeRange:
"""
Returns timerange information based on historic predictions file
:return: timerange calculated from saved live data
@ -724,7 +724,6 @@ class FreqaiDataDrawer:
raise OperationalException(
'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
'and backtest_using_historic_predictions config option as true'
)
self.load_historic_predictions_from_disk()

View File

@ -1,7 +1,7 @@
import copy
import logging
import shutil
from datetime import datetime, timedelta, timezone
from datetime import datetime, timezone
from math import cos, sin
from pathlib import Path
from typing import Any, Dict, List, Tuple
@ -86,14 +86,7 @@ class FreqaiDataKitchen:
if not self.live:
self.full_path = self.get_full_models_path(self.config)
if self.backtest_live_models:
if self.pair and not (
self.freqai_config.get("backtest_using_historic_predictions", True)
):
self.set_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
else:
if not self.backtest_live_models:
self.full_timerange = self.create_fulltimerange(
self.config["timerange"], self.freqai_config.get("train_period_days", 0)
)
@ -458,28 +451,28 @@ class FreqaiDataKitchen:
# print(tr_training_list, tr_backtesting_list)
return tr_training_list_timerange, tr_backtesting_list_timerange
def split_timerange_live_models(
self
) -> Tuple[list, list]:
# def split_timerange_live_models(
# self
# ) -> Tuple[list, list]:
tr_backtesting_list_timerange = []
asset = self.pair.split("/")[0]
if asset not in self.backtest_live_models_data["assets_end_dates"]:
raise OperationalException(
f"Model not available for pair {self.pair}. "
"Please, try again after removing this pair from the configuration file."
)
asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
model_end_dates = [x for x in asset_data]
model_end_dates.append(backtesting_timerange.stopts)
model_end_dates.sort()
for index, item in enumerate(model_end_dates):
if len(model_end_dates) > (index + 1):
tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
tr_backtesting_list_timerange.append(tr_to_add)
# tr_backtesting_list_timerange = []
# asset = self.pair.split("/")[0]
# if asset not in self.backtest_live_models_data["assets_end_dates"]:
# raise OperationalException(
# f"Model not available for pair {self.pair}. "
# "Please, try again after removing this pair from the configuration file."
# )
# asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
# backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
# model_end_dates = [x for x in asset_data]
# model_end_dates.append(backtesting_timerange.stopts)
# model_end_dates.sort()
# for index, item in enumerate(model_end_dates):
# if len(model_end_dates) > (index + 1):
# tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
# tr_backtesting_list_timerange.append(tr_to_add)
return tr_backtesting_list_timerange, tr_backtesting_list_timerange
# return tr_backtesting_list_timerange, tr_backtesting_list_timerange
def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
"""
@ -1371,17 +1364,6 @@ class FreqaiDataKitchen:
)
return False
def set_timerange_from_ready_models(self):
backtesting_timerange, \
assets_end_dates = (
self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
self.backtest_live_models_data = {
"backtesting_timerange": backtesting_timerange,
"assets_end_dates": assets_end_dates
}
return
def get_full_models_path(self, config: Config) -> Path:
"""
Returns default FreqAI model path
@ -1392,88 +1374,6 @@ class FreqaiDataKitchen:
config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
)
def get_timerange_and_assets_end_dates_from_ready_models(
self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
"""
Returns timerange information based on a FreqAI model directory
:param models_path: FreqAI model path
:return: a Tuple with (Timerange calculated from directory and
a Dict with pair and model end training dates info)
"""
all_models_end_dates = []
assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models(
models_path)
for key in assets_end_dates:
for model_end_date in assets_end_dates[key]:
if model_end_date not in all_models_end_dates:
all_models_end_dates.append(model_end_date)
if len(all_models_end_dates) == 0:
raise OperationalException(
'At least 1 saved model is required to '
'run backtest with the freqai-backtest-live-models option'
)
if len(all_models_end_dates) == 1:
logger.warning(
"Only 1 model was found. Backtesting will run with the "
"timerange from the end of the training date to the current date"
)
finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
if len(all_models_end_dates) > 1:
# After last model end date, use the same period from previous model
# to finish the backtest
all_models_end_dates.sort(reverse=True)
finish_timestamp = all_models_end_dates[0] + \
(all_models_end_dates[0] - all_models_end_dates[1])
all_models_end_dates.append(finish_timestamp)
all_models_end_dates.sort()
start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates),
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates),
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
)
return backtesting_timerange, assets_end_dates
def get_assets_timestamps_training_from_ready_models(
self, models_path: Path) -> Dict[str, Any]:
"""
Scan the models path and returns all assets end training dates (timestamp)
:param models_path: FreqAI model path
:return: a Dict with asset and model end training dates info
"""
assets_end_dates: Dict[str, Any] = {}
if not models_path.is_dir():
raise OperationalException(
'Model folders not found. Saved models are required '
'to run backtest with the freqai-backtest-live-models option'
)
for model_dir in models_path.iterdir():
if str(model_dir.name).startswith("sub-train"):
model_end_date = int(model_dir.name.split("_")[1])
asset = model_dir.name.split("_")[0].replace("sub-train-", "")
model_file_name = (
f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
"_model.joblib"
)
model_path_file = Path(model_dir / model_file_name)
if model_path_file.is_file():
if asset not in assets_end_dates:
assets_end_dates[asset] = []
assets_end_dates[asset].append(model_end_date)
return assets_end_dates
def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
"""
Remove all special characters from feature strings (:)

View File

@ -68,10 +68,6 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.backtest_using_historic_predictions: bool = self.freqai_info.get(
"backtest_using_historic_predictions", True)
if self.backtest_using_historic_predictions:
logger.info('Backtesting live models configured to use historic predictions.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date
@ -148,23 +144,18 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
if self.dk.backtest_live_models:
if self.backtest_using_historic_predictions:
logger.info(
"Backtesting using historic predictions (live models)")
else:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} "
"timeranges (live models)")
logger.info(
"Backtesting using historic predictions (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
if not self.backtest_using_historic_predictions:
if not self.config.get("freqai_backtest_live_models", False):
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
dk = self.start_backtesting_from_live_saved_files(
dk = self.start_backtesting_from_historic_predictions(
dataframe, metadata, self.dk)
dataframe = dk.return_dataframe
@ -330,7 +321,7 @@ class IFreqaiModel(ABC):
return dk
def start_backtesting_from_live_saved_files(
def start_backtesting_from_historic_predictions(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
"""

View File

@ -230,11 +230,6 @@ def get_timerange_backtest_live_models(config: Config) -> str:
"""
dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config)
timerange: TimeRange = TimeRange()
if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
else:
dd = FreqaiDataDrawer(models_path, config)
timerange = dd.get_timerange_from_backtesting_live_dataframe()
dd = FreqaiDataDrawer(models_path, config)
timerange = dd.get_timerange_from_live_historic_predictions()
return timerange.timerange_str

View File

@ -81,7 +81,7 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda
bt_config = setup_optimize_configuration(args, RunMode.BACKTEST)
with pytest.raises(OperationalException,
match=r".* Saved models are required to run backtest .*"):
match=r".* Historic predictions data is required to run backtest .*"):
Backtesting(bt_config)
Backtesting.cleanup()

View File

@ -98,7 +98,7 @@ def test_use_strategy_to_populate_indicators(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
def test_get_timerange_from_live_historic_predictions(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
@ -115,7 +115,7 @@ def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
freqai.dd.save_historic_predictions_to_disk()
freqai.dd.save_global_metadata_to_disk({"start_dry_live_date": 1516406400})
timerange = freqai.dd.get_timerange_from_backtesting_live_dataframe()
timerange = freqai.dd.get_timerange_from_live_historic_predictions()
assert timerange.startts == 1516406400
assert timerange.stopts == 1517356500
@ -129,4 +129,4 @@ def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_co
OperationalException,
match=r'Historic predictions not found.*'
):
freqai.dd.get_timerange_from_backtesting_live_dataframe()
freqai.dd.get_timerange_from_live_historic_predictions()

View File

@ -9,7 +9,6 @@ from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import get_timerange_backtest_live_models
from tests.conftest import get_patched_exchange, log_has_re
from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
make_data_dictionary, make_unfiltered_dataframe)
@ -166,72 +165,6 @@ def test_make_train_test_datasets(mocker, freqai_conf):
assert len(data_dictionary['train_features'].index) == 1916
def test_get_pairs_timestamp_validation(mocker, freqai_conf):
exchange = get_patched_exchange(mocker, freqai_conf)
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai_conf['freqai'].update({"identifier": "invalid_id"})
model_path = freqai.dk.get_full_models_path(freqai_conf)
with pytest.raises(
OperationalException,
match=r'.*required to run backtest with the freqai-backtest-live-models.*'
):
freqai.dk.get_assets_timestamps_training_from_ready_models(model_path)
@pytest.mark.parametrize('model', [
'LightGBMRegressor'
])
def test_get_timerange_from_ready_models(mocker, freqai_conf, model):
freqai_conf.update({"freqaimodel": model})
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_strat"})
freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180101-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180101-20180130")
# 1516233600 (2018-01-18 00:00) - Start Training 1
# 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1)
# 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2)
# 1516838400 (2018-01-25 00:00) - End Timerange
new_timerange = TimeRange("date", "date", 1516233600, 1516406400)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
new_timerange = TimeRange("date", "date", 1516406400, 1516579200)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
model_path = freqai.dk.get_full_models_path(freqai_conf)
(backtesting_timerange,
pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models(
models_path=model_path)
assert len(pairs_end_dates["ADA"]) == 2
assert backtesting_timerange.startts == 1516406400
assert backtesting_timerange.stopts == 1516838400
backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf)
assert backtesting_string_timerange == '20180120-20180125'
@pytest.mark.parametrize('model', [
'LightGBMRegressor'
])