diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md
index 2961b1b8d..059d56a1f 100644
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire).
| `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk).
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved).
-| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
+| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
Default: `True`
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer.
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`.
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`.
diff --git a/docs/freqai-running.md b/docs/freqai-running.md
index 4c90a4885..d777b180e 100644
--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -83,8 +83,8 @@ To save the models generated during a particular backtest so that you can start
FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options:
-1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster.
-2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
+1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster.
+2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.
diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 038ddaf2e..e83b05aaa 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -81,6 +81,7 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
+ self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
@@ -125,6 +126,17 @@ class FreqaiDataDrawer:
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
+ def load_global_metadata_from_disk(self):
+ """
+ Locate and load a previously saved global metadata in present model folder.
+ """
+ exists = self.global_metadata_path.is_file()
+ if exists:
+ with open(self.global_metadata_path, "r") as fp:
+ metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
+ return metatada_dict
+ return {}
+
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
@@ -225,6 +237,15 @@ class FreqaiDataDrawer:
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
+ def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
+ """
+ Save global metadata json to disk
+ """
+ with self.save_lock:
+ with open(self.global_metadata_path, 'w') as fp:
+ rapidjson.dump(metadata, fp, default=self.np_encoder,
+ number_mode=rapidjson.NM_NATIVE)
+
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index be2fb68b1..641c95725 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
-from pandas import DataFrame, read_feather
+from pandas import DataFrame
from scipy import stats
from sklearn import linear_model
from sklearn.cluster import DBSCAN
@@ -74,9 +74,6 @@ class FreqaiDataKitchen:
self.training_features_list: List = []
self.model_filename: str = ""
self.backtesting_results_path = Path()
- self.backtesting_live_model_folder_path = Path()
- self.backtesting_live_model_path = Path()
- self.backtesting_live_model_bkp_path = Path()
self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live
self.pair = pair
@@ -90,7 +87,9 @@ class FreqaiDataKitchen:
self.full_path = self.get_full_models_path(self.config)
if self.backtest_live_models:
- if self.pair:
+ if self.pair and not (
+ self.freqai_config.get("backtest_using_historic_predictions", True)
+ ):
self.set_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
@@ -1488,101 +1487,30 @@ class FreqaiDataKitchen:
return dataframe
- def set_backtesting_live_dataframe_folder_path(
- self
- ) -> None:
- """
- Set live backtesting dataframe path
- :param pair: current pair
- """
- self.backtesting_live_model_folder_path = Path(
- self.full_path / self.backtest_predictions_folder / "live_data")
-
- def set_backtesting_live_dataframe_path(
- self, pair: str
- ) -> None:
- """
- Set live backtesting dataframe path
- :param pair: current pair
- """
- self.set_backtesting_live_dataframe_folder_path()
- if not self.backtesting_live_model_folder_path.is_dir():
- self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True)
-
- pair_path = pair.split(":")[0].replace("/", "_").lower()
- file_name = f"live_backtesting_{pair_path}.feather"
- self.backtesting_live_model_path = Path(
- self.full_path /
- self.backtesting_live_model_folder_path /
- file_name)
- self.backtesting_live_model_bkp_path = Path(
- self.full_path /
- self.backtesting_live_model_folder_path /
- file_name.replace(".feather", ".backup.feather"))
-
- def save_backtesting_live_dataframe(
- self, dataframe: DataFrame, pair: str
- ) -> None:
- """
- Save live backtesting dataframe to feather file format
- :param dataframe: current live dataframe
- :param pair: current pair
- """
- self.set_backtesting_live_dataframe_path(pair)
- last_row_df = dataframe.tail(1)
- if self.backtesting_live_model_path.is_file():
- saved_dataframe = self.get_backtesting_live_dataframe()
- concat_dataframe = pd.concat([saved_dataframe, last_row_df])
- self.save_backtesting_live_dataframe_to_feather(concat_dataframe)
- else:
- self.save_backtesting_live_dataframe_to_feather(last_row_df)
-
- shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
-
- def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame):
- dataframe.reset_index(drop=True).to_feather(
- self.backtesting_live_model_path, compression_level=9, compression='lz4')
-
- def get_backtesting_live_dataframe(
- self
- ) -> DataFrame:
- """
- Get live backtesting dataframe from feather file format
- return: saved dataframe from previous dry/run or live
- """
- if self.backtesting_live_model_path.is_file():
- saved_dataframe = DataFrame()
- try:
- saved_dataframe = read_feather(self.backtesting_live_model_path)
- except Exception:
- saved_dataframe = read_feather(self.backtesting_live_model_bkp_path)
- return saved_dataframe
- else:
- raise OperationalException(
- "Saved live backtesting dataframe file not found."
- )
-
def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
"""
- Returns timerange information based on live backtesting dataframe file
+ Returns timerange information based on historic predictions file
:return: timerange calculated from saved live data
"""
- all_assets_start_dates = []
- all_assets_end_dates = []
- self.set_backtesting_live_dataframe_folder_path()
- if not self.backtesting_live_model_folder_path.is_dir():
+ from freqtrade.freqai.data_drawer import FreqaiDataDrawer
+ dd = FreqaiDataDrawer(Path(self.full_path), self.config)
+ if not dd.historic_predictions_path.is_file():
raise OperationalException(
- 'Saved live data not found. Saved lived data is required '
+ 'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
- 'and save_live_data_backtest config option as true'
+ 'and backtest_using_historic_predictions config option as true'
)
- for file_in_dir in self.backtesting_live_model_folder_path.iterdir():
- if file_in_dir.is_file() and "backup" not in file_in_dir.name:
- saved_dataframe = read_feather(file_in_dir)
- all_assets_start_dates.append(saved_dataframe.date.min())
- all_assets_end_dates.append(saved_dataframe.date.max())
- start_date = min(all_assets_start_dates)
- end_date = max(all_assets_end_dates)
+
+ dd.load_historic_predictions_from_disk()
+
+ all_pairs_end_dates = []
+ for pair in dd.historic_predictions:
+ pair_historic_data = dd.historic_predictions[pair]
+ all_pairs_end_dates.append(pair_historic_data.date_pred.max())
+
+ global_metadata = dd.load_global_metadata_from_disk()
+ start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"]))
+ end_date = max(all_pairs_end_dates)
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index c48758df4..473fe939f 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -53,6 +53,7 @@ class IFreqaiModel(ABC):
def __init__(self, config: Config) -> None:
self.config = config
+ self.metadata: Dict[str, Any] = {}
self.assert_config(self.config)
self.freqai_info: Dict[str, Any] = config["freqai"]
self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
@@ -67,10 +68,10 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
- self.save_live_data_backtest: bool = self.freqai_info.get(
- "save_live_data_backtest", False)
- if self.save_live_data_backtest:
- logger.info('Live configured to save data for backtest.')
+ self.backtest_using_historic_predictions: bool = self.freqai_info.get(
+ "backtest_using_historic_predictions", True)
+ if self.backtest_using_historic_predictions:
+ logger.info('Backtesting live models configured to use historic predictions.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date
@@ -103,6 +104,7 @@ class IFreqaiModel(ABC):
self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
+ self.metadata = self.dd.load_global_metadata_from_disk()
record_params(config, self.full_path)
@@ -136,6 +138,7 @@ class IFreqaiModel(ABC):
self.inference_timer('start')
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk)
+ dataframe = dk.remove_features_from_df(dk.return_dataframe)
# For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
@@ -145,14 +148,19 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
if self.dk.backtest_live_models:
- logger.info(
- f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
+ if self.backtest_using_historic_predictions:
+ logger.info(
+ "Backtesting using historic predictions (live models)")
+ else:
+ logger.info(
+ f"Backtesting {len(self.dk.backtesting_timeranges)} "
+ "timeranges (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
- if not self.save_live_data_backtest:
+ if not self.backtest_using_historic_predictions:
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
@@ -163,8 +171,7 @@ class IFreqaiModel(ABC):
self.clean_up()
if self.live:
self.inference_timer('stop', metadata["pair"])
- if self.save_live_data_backtest:
- dk.save_backtesting_live_dataframe(dataframe, metadata["pair"])
+ self.set_start_dry_live_date(dataframe)
return dataframe
@@ -335,14 +342,12 @@ class IFreqaiModel(ABC):
"""
pair = metadata["pair"]
dk.return_dataframe = dataframe
- self.dk.set_backtesting_live_dataframe_path(pair)
- saved_dataframe = self.dk.get_backtesting_live_dataframe()
- columns_to_drop = list(set(dk.return_dataframe.columns).difference(
- ["date", "open", "high", "low", "close", "volume"]))
- saved_dataframe = saved_dataframe.drop(
- columns=["open", "high", "low", "close", "volume"])
+ saved_dataframe = self.dd.historic_predictions[pair]
+ columns_to_drop = list(set(saved_dataframe.columns).intersection(
+ dk.return_dataframe.columns))
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
- dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date')
+ dk.return_dataframe = pd.merge(
+ dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
@@ -886,6 +891,22 @@ class IFreqaiModel(ABC):
return
+ def update_metadata(self, metadata: Dict[str, Any]):
+ """
+ Update global metadata and save the updated json file
+ :param metadata: new global metadata dict
+ """
+ self.dd.save_global_metadata_to_disk(metadata)
+ self.metadata = metadata
+
+ def set_start_dry_live_date(self, live_dataframe: DataFrame):
+ key_name = "start_dry_live_date"
+ if key_name not in self.metadata:
+ metadata = self.metadata
+ metadata[key_name] = int(
+ pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
+ self.update_metadata(metadata)
+
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py
index c9efe6a3c..f42a87be7 100644
--- a/freqtrade/freqai/utils.py
+++ b/freqtrade/freqai/utils.py
@@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str:
dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config)
timerange: TimeRange = TimeRange()
- if not config.get("save_live_data_backtest", False):
+ if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
else:
timerange = dk.get_timerange_from_backtesting_live_dataframe()
diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py
index ca7c19c94..2dbbd7ef5 100644
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -261,45 +261,18 @@ def test_get_full_model_path(mocker, freqai_conf, model):
assert model_path.is_dir() is True
-def test_save_backtesting_live_dataframe(mocker, freqai_conf):
- freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
- dataframe_without_last_candle = dataframe.copy()
- dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True)
- freqai_conf.update({"save_live_data_backtest": True})
- freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC")
- saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
- assert len(saved_dataframe) == 1
- assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0]
- freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC")
- saved_dataframe = freqai.dk.get_backtesting_live_dataframe()
- assert len(saved_dataframe) == 2
- assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0]
- assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0]
-
-
def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf):
freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
- freqai_conf.update({"save_live_data_backtest": True})
- freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
- freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe)
+ freqai_conf.update({"backtest_using_historic_predictions": True})
timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe()
assert timerange.startts == 1516406400
assert timerange.stopts == 1517356500
-def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf):
+def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf):
freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
with pytest.raises(
OperationalException,
- match=r'Saved live data not found.*'
+ match=r'Historic predictions not found.*'
):
freqai.dk.get_timerange_from_backtesting_live_dataframe()
-
-
-def test_saved_live_bt_file_not_found(mocker, freqai_conf):
- freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf)
- with pytest.raises(
- OperationalException,
- match=r'.*live backtesting dataframe file not found.*'
- ):
- freqai.dk.get_backtesting_live_dataframe()
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index ed634de55..66b3bac17 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -300,37 +300,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
shutil.rmtree(Path(freqai.dk.full_path))
-def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog):
- freqai_conf.update({"save_live_data_backtest": True})
- freqai_conf.update({"freqai_backtest_live_models": True})
-
- strategy = get_patched_freqai_strategy(mocker, freqai_conf)
- exchange = get_patched_exchange(mocker, freqai_conf)
- strategy.dp = DataProvider(freqai_conf, exchange)
- strategy.freqai_info = freqai_conf.get("freqai", {})
- freqai = strategy.freqai
- freqai.live = False
- freqai.dk = FreqaiDataKitchen(freqai_conf)
- timerange = TimeRange.parse_timerange("20180110-20180130")
- freqai.dd.load_all_pair_histories(timerange, freqai.dk)
- sub_timerange = TimeRange.parse_timerange("20180110-20180130")
- corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
- df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
- metadata = {"pair": "ADA/BTC"}
-
- # create a dummy live dataframe file with 10 rows
- dataframe_predictions = df.tail(10).copy()
- dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1
- freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC")
- freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions)
-
- freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk)
- assert len(freqai.dk.return_dataframe) == len(df)
- assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == (
- len(dataframe_predictions))
- shutil.rmtree(Path(freqai.dk.full_path))
-
-
def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)