update code to use historic_predictions for freqai_backtest_live_models

This commit is contained in:
Wagner Costa Santos 2022-11-19 14:15:58 -03:00
parent 3d3195847c
commit 80d070e9ee
8 changed files with 86 additions and 174 deletions

View File

@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
| `purge_old_models` | Delete obsolete models. <br> **Datatype:** Boolean. <br> Default: `False` (all historic models remain on disk).
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. <br> Default: `True`
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.

View File

@ -83,8 +83,8 @@ To save the models generated during a particular backtest so that you can start
FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:
1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster.
2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster.
2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.

View File

@ -81,6 +81,7 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
@ -125,6 +126,17 @@ class FreqaiDataDrawer:
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
def load_global_metadata_from_disk(self):
"""
Locate and load a previously saved global metadata in present model folder.
"""
exists = self.global_metadata_path.is_file()
if exists:
with open(self.global_metadata_path, "r") as fp:
metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
return metatada_dict
return {}
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
@ -225,6 +237,15 @@ class FreqaiDataDrawer:
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
"""
Save global metadata json to disk
"""
with self.save_lock:
with open(self.global_metadata_path, 'w') as fp:
rapidjson.dump(metadata, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets

View File

@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame, read_feather
from pandas import DataFrame
from scipy import stats
from sklearn import linear_model
from sklearn.cluster import DBSCAN
@ -74,9 +74,6 @@ class FreqaiDataKitchen:
self.training_features_list: List = []
self.model_filename: str = ""
self.backtesting_results_path = Path()
self.backtesting_live_model_folder_path = Path()
self.backtesting_live_model_path = Path()
self.backtesting_live_model_bkp_path = Path()
self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live
self.pair = pair
@ -90,7 +87,9 @@ class FreqaiDataKitchen:
self.full_path = self.get_full_models_path(self.config)
if self.backtest_live_models:
if self.pair:
if self.pair and not (
self.freqai_config.get("backtest_using_historic_predictions", True)
):
self.set_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
@ -1488,101 +1487,30 @@ class FreqaiDataKitchen:
return dataframe
def set_backtesting_live_dataframe_folder_path(
self
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.backtesting_live_model_folder_path = Path(
self.full_path / self.backtest_predictions_folder / "live_data")
def set_backtesting_live_dataframe_path(
self, pair: str
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True)
pair_path = pair.split(":")[0].replace("/", "_").lower()
file_name = f"live_backtesting_{pair_path}.feather"
self.backtesting_live_model_path = Path(
self.full_path /
self.backtesting_live_model_folder_path /
file_name)
self.backtesting_live_model_bkp_path = Path(
self.full_path /
self.backtesting_live_model_folder_path /
file_name.replace(".feather", ".backup.feather"))
def save_backtesting_live_dataframe(
self, dataframe: DataFrame, pair: str
) -> None:
"""
Save live backtesting dataframe to feather file format
:param dataframe: current live dataframe
:param pair: current pair
"""
self.set_backtesting_live_dataframe_path(pair)
last_row_df = dataframe.tail(1)
if self.backtesting_live_model_path.is_file():
saved_dataframe = self.get_backtesting_live_dataframe()
concat_dataframe = pd.concat([saved_dataframe, last_row_df])
self.save_backtesting_live_dataframe_to_feather(concat_dataframe)
else:
self.save_backtesting_live_dataframe_to_feather(last_row_df)
shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame):
dataframe.reset_index(drop=True).to_feather(
self.backtesting_live_model_path, compression_level=9, compression='lz4')
def get_backtesting_live_dataframe(
self
) -> DataFrame:
"""
Get live backtesting dataframe from feather file format
return: saved dataframe from previous dry/run or live
"""
if self.backtesting_live_model_path.is_file():
saved_dataframe = DataFrame()
try:
saved_dataframe = read_feather(self.backtesting_live_model_path)
except Exception:
saved_dataframe = read_feather(self.backtesting_live_model_bkp_path)
return saved_dataframe
else:
raise OperationalException(
"Saved live backtesting dataframe file not found."
)
def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
"""
Returns timerange information based on live backtesting dataframe file
Returns timerange information based on historic predictions file
:return: timerange calculated from saved live data
"""
all_assets_start_dates = []
all_assets_end_dates = []
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
dd = FreqaiDataDrawer(Path(self.full_path), self.config)
if not dd.historic_predictions_path.is_file():
raise OperationalException(
'Saved live data not found. Saved lived data is required '
'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
'and save_live_data_backtest config option as true'
'and backtest_using_historic_predictions config option as true'
)
for file_in_dir in self.backtesting_live_model_folder_path.iterdir():
if file_in_dir.is_file() and "backup" not in file_in_dir.name:
saved_dataframe = read_feather(file_in_dir)
all_assets_start_dates.append(saved_dataframe.date.min())
all_assets_end_dates.append(saved_dataframe.date.max())
start_date = min(all_assets_start_dates)
end_date = max(all_assets_end_dates)
dd.load_historic_predictions_from_disk()
all_pairs_end_dates = []
for pair in dd.historic_predictions:
pair_historic_data = dd.historic_predictions[pair]
all_pairs_end_dates.append(pair_historic_data.date_pred.max())
global_metadata = dd.load_global_metadata_from_disk()
start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"]))
end_date = max(all_pairs_end_dates)
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(

View File

@ -53,6 +53,7 @@ class IFreqaiModel(ABC):
def __init__(self, config: Config) -> None:
self.config = config
self.metadata: Dict[str, Any] = {}
self.assert_config(self.config)
self.freqai_info: Dict[str, Any] = config["freqai"]
self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
@ -67,10 +68,10 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.save_live_data_backtest: bool = self.freqai_info.get(
"save_live_data_backtest", False)
if self.save_live_data_backtest:
logger.info('Live configured to save data for backtest.')
self.backtest_using_historic_predictions: bool = self.freqai_info.get(
"backtest_using_historic_predictions", True)
if self.backtest_using_historic_predictions:
logger.info('Backtesting live models configured to use historic predictions.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date
@ -103,6 +104,7 @@ class IFreqaiModel(ABC):
self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
self.metadata = self.dd.load_global_metadata_from_disk()
record_params(config, self.full_path)
@ -136,6 +138,7 @@ class IFreqaiModel(ABC):
self.inference_timer('start')
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
# For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
@ -145,14 +148,19 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
if self.dk.backtest_live_models:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
if self.backtest_using_historic_predictions:
logger.info(
"Backtesting using historic predictions (live models)")
else:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} "
"timeranges (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
if not self.save_live_data_backtest:
if not self.backtest_using_historic_predictions:
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
@ -163,8 +171,7 @@ class IFreqaiModel(ABC):
self.clean_up()
if self.live:
self.inference_timer('stop', metadata["pair"])
if self.save_live_data_backtest:
dk.save_backtesting_live_dataframe(dataframe, metadata["pair"])
self.set_start_dry_live_date(dataframe)
return dataframe
@ -335,14 +342,12 @@ class IFreqaiModel(ABC):
"""
pair = metadata["pair"]
dk.return_dataframe = dataframe
self.dk.set_backtesting_live_dataframe_path(pair)
saved_dataframe = self.dk.get_backtesting_live_dataframe()
columns_to_drop = list(set(dk.return_dataframe.columns).difference(
["date", "open", "high", "low", "close", "volume"]))
saved_dataframe = saved_dataframe.drop(
columns=["open", "high", "low", "close", "volume"])
saved_dataframe = self.dd.historic_predictions[pair]
columns_to_drop = list(set(saved_dataframe.columns).intersection(
dk.return_dataframe.columns))
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date')
dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
@ -886,6 +891,22 @@ class IFreqaiModel(ABC):
return
def update_metadata(self, metadata: Dict[str, Any]):
"""
Update global metadata and save the updated json file
:param metadata: new global metadata dict
"""
self.dd.save_global_metadata_to_disk(metadata)
self.metadata = metadata
def set_start_dry_live_date(self, live_dataframe: DataFrame):
key_name = "start_dry_live_date"
if key_name not in self.metadata:
metadata = self.metadata
metadata[key_name] = int(
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
self.update_metadata(metadata)
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.

View File

@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str:
dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config)
timerange: TimeRange = TimeRange()
if not config.get("save_live_data_backtest", False):
if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
else:
timerange = dk.get_timerange_from_backtesting_live_dataframe()

View File

@ -261,45 +261,18 @@ def test_get_full_model_path(mocker, freqai_conf, model):
assert model_path.is_dir() is True
def test_save_backtesting_live_dataframe(mocker, freqai_conf):
freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
dataframe_without_last_candle = dataframe.copy()
dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True)
freqai_conf.update({"save_live_data_backtest": True})
freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC")
saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
assert len(saved_dataframe) == 1
assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0]
freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC")
saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
assert len(saved_dataframe) == 2
assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0]
assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0]
def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai_conf.update({"save_live_data_backtest": True})
freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe)
freqai_conf.update({"backtest_using_historic_predictions": True})
timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe()
assert timerange.startts == 1516406400
assert timerange.stopts == 1517356500
def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf):
def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf):
freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
with pytest.raises(
OperationalException,
match=r'Saved live data not found.*'
match=r'Historic predictions not found.*'
):
freqai.dk.get_timerange_from_backtesting_live_dataframe()
def test_saved_live_bt_file_not_found(mocker, freqai_conf):
freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
with pytest.raises(
OperationalException,
match=r'.*live backtesting dataframe file not found.*'
):
freqai.dk.get_backtesting_live_dataframe()

View File

@ -300,37 +300,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog):
freqai_conf.update({"save_live_data_backtest": True})
freqai_conf.update({"freqai_backtest_live_models": True})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"}
# create a dummy live dataframe file with 10 rows
dataframe_predictions = df.tail(10).copy()
dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1
freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions)
freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk)
assert len(freqai.dk.return_dataframe) == len(df)
assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == (
len(dataframe_predictions))
shutil.rmtree(Path(freqai.dk.full_path))
def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)