diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 2961b1b8d..059d56a1f 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire). | `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved). -| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. +| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
Default: `True` | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 4c90a4885..d777b180e 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -83,8 +83,8 @@ To save the models generated during a particular backtest so that you can start FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options: -1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster. -2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. +1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster. +2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 038ddaf2e..e83b05aaa 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -81,6 +81,7 @@ class FreqaiDataDrawer: self.historic_predictions_bkp_path = Path( self.full_path / "historic_predictions.backup.pkl") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") + self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") self.follow_mode = follow_mode if follow_mode: @@ -125,6 +126,17 @@ class FreqaiDataDrawer: self.update_metric_tracker('cpu_load5min', load5 / cpus, pair) self.update_metric_tracker('cpu_load15min', load15 / cpus, pair) + def load_global_metadata_from_disk(self): + """ + Locate and load a previously saved global metadata in present model folder. + """ + exists = self.global_metadata_path.is_file() + if exists: + with open(self.global_metadata_path, "r") as fp: + metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE) + return metatada_dict + return {} + def load_drawer_from_disk(self): """ Locate and load a previously saved data drawer full of all pair model metadata in @@ -225,6 +237,15 @@ class FreqaiDataDrawer: rapidjson.dump(self.follower_dict, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE) + def save_global_metadata_to_disk(self, metadata: Dict[str, Any]): + """ + Save global metadata json to disk + """ + with self.save_lock: + with open(self.global_metadata_path, 'w') as fp: + rapidjson.dump(metadata, fp, default=self.np_encoder, + number_mode=rapidjson.NM_NATIVE) + def create_follower_dict(self): """ Create or dictionary for each follower to maintain unique persistent prediction targets diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index be2fb68b1..641c95725 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple import numpy as np import numpy.typing as npt import pandas as pd -from pandas import DataFrame, read_feather +from pandas import DataFrame from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -74,9 +74,6 @@ class FreqaiDataKitchen: self.training_features_list: List = [] self.model_filename: str = "" self.backtesting_results_path = Path() - self.backtesting_live_model_folder_path = Path() - self.backtesting_live_model_path = Path() - self.backtesting_live_model_bkp_path = Path() self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair @@ -90,7 +87,9 @@ class FreqaiDataKitchen: self.full_path = self.get_full_models_path(self.config) if self.backtest_live_models: - if self.pair: + if self.pair and not ( + self.freqai_config.get("backtest_using_historic_predictions", True) + ): self.set_timerange_from_ready_models() (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange_live_models() @@ -1488,101 +1487,30 @@ class FreqaiDataKitchen: return dataframe - def set_backtesting_live_dataframe_folder_path( - self - ) -> None: - """ - Set live backtesting dataframe path - :param pair: current pair - """ - self.backtesting_live_model_folder_path = Path( - self.full_path / self.backtest_predictions_folder / "live_data") - - def set_backtesting_live_dataframe_path( - self, pair: str - ) -> None: - """ - Set live backtesting dataframe path - :param pair: current pair - """ - self.set_backtesting_live_dataframe_folder_path() - if not self.backtesting_live_model_folder_path.is_dir(): - self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True) - - pair_path = pair.split(":")[0].replace("/", "_").lower() - file_name = f"live_backtesting_{pair_path}.feather" - self.backtesting_live_model_path = Path( - self.full_path / - self.backtesting_live_model_folder_path / - file_name) - self.backtesting_live_model_bkp_path = Path( - self.full_path / - self.backtesting_live_model_folder_path / - file_name.replace(".feather", ".backup.feather")) - - def save_backtesting_live_dataframe( - self, dataframe: DataFrame, pair: str - ) -> None: - """ - Save live backtesting dataframe to feather file format - :param dataframe: current live dataframe - :param pair: current pair - """ - self.set_backtesting_live_dataframe_path(pair) - last_row_df = dataframe.tail(1) - if self.backtesting_live_model_path.is_file(): - saved_dataframe = self.get_backtesting_live_dataframe() - concat_dataframe = pd.concat([saved_dataframe, last_row_df]) - self.save_backtesting_live_dataframe_to_feather(concat_dataframe) - else: - self.save_backtesting_live_dataframe_to_feather(last_row_df) - - shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) - - def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame): - dataframe.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') - - def get_backtesting_live_dataframe( - self - ) -> DataFrame: - """ - Get live backtesting dataframe from feather file format - return: saved dataframe from previous dry/run or live - """ - if self.backtesting_live_model_path.is_file(): - saved_dataframe = DataFrame() - try: - saved_dataframe = read_feather(self.backtesting_live_model_path) - except Exception: - saved_dataframe = read_feather(self.backtesting_live_model_bkp_path) - return saved_dataframe - else: - raise OperationalException( - "Saved live backtesting dataframe file not found." - ) - def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: """ - Returns timerange information based on live backtesting dataframe file + Returns timerange information based on historic predictions file :return: timerange calculated from saved live data """ - all_assets_start_dates = [] - all_assets_end_dates = [] - self.set_backtesting_live_dataframe_folder_path() - if not self.backtesting_live_model_folder_path.is_dir(): + from freqtrade.freqai.data_drawer import FreqaiDataDrawer + dd = FreqaiDataDrawer(Path(self.full_path), self.config) + if not dd.historic_predictions_path.is_file(): raise OperationalException( - 'Saved live data not found. Saved lived data is required ' + 'Historic predictions not found. Historic predictions data is required ' 'to run backtest with the freqai-backtest-live-models option ' - 'and save_live_data_backtest config option as true' + 'and backtest_using_historic_predictions config option as true' ) - for file_in_dir in self.backtesting_live_model_folder_path.iterdir(): - if file_in_dir.is_file() and "backup" not in file_in_dir.name: - saved_dataframe = read_feather(file_in_dir) - all_assets_start_dates.append(saved_dataframe.date.min()) - all_assets_end_dates.append(saved_dataframe.date.max()) - start_date = min(all_assets_start_dates) - end_date = max(all_assets_end_dates) + + dd.load_historic_predictions_from_disk() + + all_pairs_end_dates = [] + for pair in dd.historic_predictions: + pair_historic_data = dd.historic_predictions[pair] + all_pairs_end_dates.append(pair_historic_data.date_pred.max()) + + global_metadata = dd.load_global_metadata_from_disk() + start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"])) + end_date = max(all_pairs_end_dates) # add 1 day to string timerange to ensure BT module will load all dataframe data end_date = end_date + timedelta(days=1) backtesting_timerange = TimeRange( diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index c48758df4..473fe939f 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -53,6 +53,7 @@ class IFreqaiModel(ABC): def __init__(self, config: Config) -> None: self.config = config + self.metadata: Dict[str, Any] = {} self.assert_config(self.config) self.freqai_info: Dict[str, Any] = config["freqai"] self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get( @@ -67,10 +68,10 @@ class IFreqaiModel(ABC): self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) if self.save_backtest_models: logger.info('Backtesting module configured to save all models.') - self.save_live_data_backtest: bool = self.freqai_info.get( - "save_live_data_backtest", False) - if self.save_live_data_backtest: - logger.info('Live configured to save data for backtest.') + self.backtest_using_historic_predictions: bool = self.freqai_info.get( + "backtest_using_historic_predictions", True) + if self.backtest_using_historic_predictions: + logger.info('Backtesting live models configured to use historic predictions.') self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) # set current candle to arbitrary historical date @@ -103,6 +104,7 @@ class IFreqaiModel(ABC): self.get_corr_dataframes: bool = True self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() + self.metadata = self.dd.load_global_metadata_from_disk() record_params(config, self.full_path) @@ -136,6 +138,7 @@ class IFreqaiModel(ABC): self.inference_timer('start') self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) dk = self.start_live(dataframe, metadata, strategy, self.dk) + dataframe = dk.remove_features_from_df(dk.return_dataframe) # For backtesting, each pair enters and then gets trained for each window along the # sliding window defined by "train_period_days" (training window) and "live_retrain_hours" @@ -145,14 +148,19 @@ class IFreqaiModel(ABC): elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) if self.dk.backtest_live_models: - logger.info( - f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)") + if self.backtest_using_historic_predictions: + logger.info( + "Backtesting using historic predictions (live models)") + else: + logger.info( + f"Backtesting {len(self.dk.backtesting_timeranges)} " + "timeranges (live models)") else: logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - if not self.save_live_data_backtest: + if not self.backtest_using_historic_predictions: dk = self.start_backtesting(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) else: @@ -163,8 +171,7 @@ class IFreqaiModel(ABC): self.clean_up() if self.live: self.inference_timer('stop', metadata["pair"]) - if self.save_live_data_backtest: - dk.save_backtesting_live_dataframe(dataframe, metadata["pair"]) + self.set_start_dry_live_date(dataframe) return dataframe @@ -335,14 +342,12 @@ class IFreqaiModel(ABC): """ pair = metadata["pair"] dk.return_dataframe = dataframe - self.dk.set_backtesting_live_dataframe_path(pair) - saved_dataframe = self.dk.get_backtesting_live_dataframe() - columns_to_drop = list(set(dk.return_dataframe.columns).difference( - ["date", "open", "high", "low", "close", "volume"])) - saved_dataframe = saved_dataframe.drop( - columns=["open", "high", "low", "close", "volume"]) + saved_dataframe = self.dd.historic_predictions[pair] + columns_to_drop = list(set(saved_dataframe.columns).intersection( + dk.return_dataframe.columns)) dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) - dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date') + dk.return_dataframe = pd.merge( + dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) return dk @@ -886,6 +891,22 @@ class IFreqaiModel(ABC): return + def update_metadata(self, metadata: Dict[str, Any]): + """ + Update global metadata and save the updated json file + :param metadata: new global metadata dict + """ + self.dd.save_global_metadata_to_disk(metadata) + self.metadata = metadata + + def set_start_dry_live_date(self, live_dataframe: DataFrame): + key_name = "start_dry_live_date" + if key_name not in self.metadata: + metadata = self.metadata + metadata[key_name] = int( + pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()) + self.update_metadata(metadata) + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index c9efe6a3c..f42a87be7 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str: dk = FreqaiDataKitchen(config) models_path = dk.get_full_models_path(config) timerange: TimeRange = TimeRange() - if not config.get("save_live_data_backtest", False): + if not config.get("freqai", {}).get("backtest_using_historic_predictions", True): timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) else: timerange = dk.get_timerange_from_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index ca7c19c94..2dbbd7ef5 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -261,45 +261,18 @@ def test_get_full_model_path(mocker, freqai_conf, model): assert model_path.is_dir() is True -def test_save_backtesting_live_dataframe(mocker, freqai_conf): - freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) - dataframe_without_last_candle = dataframe.copy() - dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True) - freqai_conf.update({"save_live_data_backtest": True}) - freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC") - saved_dataframe = freqai.dk.get_backtesting_live_dataframe() - assert len(saved_dataframe) == 1 - assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0] - freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC") - saved_dataframe = freqai.dk.get_backtesting_live_dataframe() - assert len(saved_dataframe) == 2 - assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0] - assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0] - - def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) - freqai_conf.update({"save_live_data_backtest": True}) - freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") - freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe) + freqai_conf.update({"backtest_using_historic_predictions": True}) timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe() assert timerange.startts == 1516406400 assert timerange.stopts == 1517356500 -def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf): +def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf): freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) with pytest.raises( OperationalException, - match=r'Saved live data not found.*' + match=r'Historic predictions not found.*' ): freqai.dk.get_timerange_from_backtesting_live_dataframe() - - -def test_saved_live_bt_file_not_found(mocker, freqai_conf): - freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) - with pytest.raises( - OperationalException, - match=r'.*live backtesting dataframe file not found.*' - ): - freqai.dk.get_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index ed634de55..66b3bac17 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -300,37 +300,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): shutil.rmtree(Path(freqai.dk.full_path)) -def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog): - freqai_conf.update({"save_live_data_backtest": True}) - freqai_conf.update({"freqai_backtest_live_models": True}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = False - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - metadata = {"pair": "ADA/BTC"} - - # create a dummy live dataframe file with 10 rows - dataframe_predictions = df.tail(10).copy() - dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1 - freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") - freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions) - - freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk) - assert len(freqai.dk.return_dataframe) == len(df) - assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == ( - len(dataframe_predictions)) - shutil.rmtree(Path(freqai.dk.full_path)) - - def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog): freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10}) strategy = get_patched_freqai_strategy(mocker, freqai_conf)