From 9c5ba0732a3440c95487cb13263bd394d0670647 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Tue, 8 Nov 2022 10:32:18 -0300 Subject: [PATCH 01/18] save predictions with date and merge by date --- freqtrade/freqai/data_kitchen.py | 23 +++++++++++++++-------- freqtrade/freqai/freqai_interface.py | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 12a3cd519..8fd42ee85 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -979,7 +979,8 @@ class FreqaiDataKitchen: return weights def get_predictions_to_append(self, predictions: DataFrame, - do_predict: npt.ArrayLike) -> DataFrame: + do_predict: npt.ArrayLike, + dataframe_backtest: DataFrame) -> DataFrame: """ Get backtest prediction from current backtest period """ @@ -1001,7 +1002,9 @@ class FreqaiDataKitchen: if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: append_df["DI_values"] = self.DI_values - return append_df + dataframe_backtest.reset_index(drop=True, inplace=True) + merged_df = pd.concat([dataframe_backtest["date"], append_df], axis=1) + return merged_df def append_predictions(self, append_df: DataFrame) -> None: """ @@ -1019,15 +1022,19 @@ class FreqaiDataKitchen: when it goes back to the strategy. These rows are not included in the backtest. """ - len_filler = len(dataframe) - len(self.full_df.index) # startup_candle_count - filler_df = pd.DataFrame( - np.zeros((len_filler, len(self.full_df.columns))), columns=self.full_df.columns - ) + # len_filler = len(dataframe) - len(self.full_df.index) # startup_candle_count + # filler_df = pd.DataFrame( + # np.zeros((len_filler, len(self.full_df.columns))), columns=self.full_df.columns + # ) - self.full_df = pd.concat([filler_df, self.full_df], axis=0, ignore_index=True) + # self.full_df = pd.concat([filler_df, self.full_df], axis=0, ignore_index=True) to_keep = [col for col in dataframe.columns if not col.startswith("&")] - self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1) + # self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1) + # self.full_df = DataFrame() + + self.return_dataframe = pd.merge(dataframe[to_keep], + self.full_df, how='left', on='date') self.full_df = DataFrame() return diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index ae123f852..59a82958b 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -301,7 +301,7 @@ class IFreqaiModel(ABC): self.model = self.dd.load_data(pair, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk) - append_df = dk.get_predictions_to_append(pred_df, do_preds) + append_df = dk.get_predictions_to_append(pred_df, do_preds, dataframe_backtest) dk.append_predictions(append_df) dk.save_backtesting_prediction(append_df) From 8d9988a942aba46f4e7eb851d51f30497983e1b7 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Tue, 8 Nov 2022 11:06:23 -0300 Subject: [PATCH 02/18] enforce date column in backtesting freqai predictions files --- freqtrade/freqai/data_kitchen.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 8fd42ee85..b99447ac9 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1021,20 +1021,11 @@ class FreqaiDataKitchen: Back fill values to before the backtesting range so that the dataframe matches size when it goes back to the strategy. These rows are not included in the backtest. """ - - # len_filler = len(dataframe) - len(self.full_df.index) # startup_candle_count - # filler_df = pd.DataFrame( - # np.zeros((len_filler, len(self.full_df.columns))), columns=self.full_df.columns - # ) - - # self.full_df = pd.concat([filler_df, self.full_df], axis=0, ignore_index=True) - to_keep = [col for col in dataframe.columns if not col.startswith("&")] - # self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1) - # self.full_df = DataFrame() - self.return_dataframe = pd.merge(dataframe[to_keep], self.full_df, how='left', on='date') + self.return_dataframe[self.full_df.columns] = ( + self.return_dataframe[self.full_df.columns].fillna(value=0)) self.full_df = DataFrame() return @@ -1368,12 +1359,13 @@ class FreqaiDataKitchen: if file_exists: append_df = self.get_backtesting_prediction() - if len(append_df) == len_backtest_df: + if len(append_df) == len_backtest_df and 'date' in append_df: logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") return True else: logger.info("A new backtesting prediction file is required. " - "(Number of predictions is different from dataframe length).") + "(Number of predictions is different from dataframe length or " + "old prediction file version).") return False else: logger.info( From 3e57c18ac60e4d5310f3c4044b5d0ba59a0cb822 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Tue, 8 Nov 2022 18:20:39 -0300 Subject: [PATCH 03/18] add fix_live_predictions function to backtesting --- freqtrade/freqai/data_kitchen.py | 2 +- freqtrade/freqai/freqai_interface.py | 52 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index b99447ac9..53de00426 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1014,7 +1014,7 @@ class FreqaiDataKitchen: if self.full_df.empty: self.full_df = append_df else: - self.full_df = pd.concat([self.full_df, append_df], axis=0) + self.full_df = pd.concat([self.full_df, append_df], axis=0, ignore_index=True) def fill_predictions(self, dataframe): """ diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 59a82958b..ab0fb388a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -305,6 +305,7 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) dk.save_backtesting_prediction(append_df) + self.backtesting_fit_live_predictions(dk) dk.fill_predictions(dataframe) return dk @@ -824,6 +825,57 @@ class IFreqaiModel(ABC): f"to {tr_train_stopts_str}, {train_it}/{total_trains} " "trains" ) + + def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen): + start_time = time.perf_counter() + fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0) + if fit_live_predictions_candles: + predictions_columns = [col for col in dk.full_df.columns if ( + col.startswith("&") and + '_mean' not in col and + '_std' not in col and + col not in self.dk.data["extra_returns_per_train"]) + ] + self.dd.historic_predictions[self.dk.pair] = pd.DataFrame( + columns=dk.full_df.columns).astype(dk.full_df.dtypes) + + # for index, row in dk.full_df.iterrows(): + for index in range(len(dk.full_df)): + if index > fit_live_predictions_candles: + self.dd.historic_predictions[self.dk.pair] = ( + dk.full_df.iloc[index - fit_live_predictions_candles + 1:index + 1]) + else: + self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index + 1] + # self.dd.historic_predictions[self.dk.pair].loc[index] = row.values.tolist() + # pd.concat(self.dd.historic_predictions[self.dk.pair], row.values) + self.fit_live_predictions(self.dk, self.dk.pair) + if index > fit_live_predictions_candles: + print(index) + + if index <= fit_live_predictions_candles: + dk.full_df.at[index, "warmed_up"] = 0 + else: + dk.full_df.at[index, "warmed_up"] = 1 + + for label in predictions_columns: + if dk.full_df[label].dtype == object: + continue + if "labels_mean" in self.dk.data: + dk.full_df.at[index, f"{label}_mean"] = ( + self.dk.data["labels_mean"][label]) + if "labels_std" in self.dk.data: + dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label] + + for extra_col in self.dk.data["extra_returns_per_train"]: + dk.full_df.at[index, f"{extra_col}"] = ( + self.dk.data["extra_returns_per_train"][extra_col]) + + end_time = time.perf_counter() + logger.info(f"Downloaded the tutorial in {start_time - end_time:0.4f} seconds") + + # print(f"Downloaded the tutorial in {start_time - end_time:0.4f} seconds") + + return # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. From 8ee95db9270376eb459a172391f800986baec3c5 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Wed, 9 Nov 2022 09:51:42 -0300 Subject: [PATCH 04/18] refactoring backtesting_fit_live_predictions function --- freqtrade/freqai/freqai_interface.py | 39 ++++++++++------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index ab0fb388a..1dc326079 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -827,37 +827,31 @@ class IFreqaiModel(ABC): ) def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen): - start_time = time.perf_counter() + """ + Apply fit_live_predictions function in backtesting with a dummy historic_predictions + :param dk: datakitchen object + """ fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0) if fit_live_predictions_candles: - predictions_columns = [col for col in dk.full_df.columns if ( + label_columns = [col for col in dk.full_df.columns if ( col.startswith("&") and - '_mean' not in col and - '_std' not in col and + not (col.startswith("&") and col.endswith("_mean")) and + not (col.startswith("&") and col.endswith("_std")) and col not in self.dk.data["extra_returns_per_train"]) ] self.dd.historic_predictions[self.dk.pair] = pd.DataFrame( columns=dk.full_df.columns).astype(dk.full_df.dtypes) - # for index, row in dk.full_df.iterrows(): for index in range(len(dk.full_df)): - if index > fit_live_predictions_candles: + if index >= fit_live_predictions_candles: self.dd.historic_predictions[self.dk.pair] = ( - dk.full_df.iloc[index - fit_live_predictions_candles + 1:index + 1]) + dk.full_df.iloc[index - fit_live_predictions_candles:index]) else: - self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index + 1] - # self.dd.historic_predictions[self.dk.pair].loc[index] = row.values.tolist() - # pd.concat(self.dd.historic_predictions[self.dk.pair], row.values) + self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index] + self.fit_live_predictions(self.dk, self.dk.pair) - if index > fit_live_predictions_candles: - print(index) - - if index <= fit_live_predictions_candles: - dk.full_df.at[index, "warmed_up"] = 0 - else: - dk.full_df.at[index, "warmed_up"] = 1 - - for label in predictions_columns: + if index >= fit_live_predictions_candles: + for label in label_columns: if dk.full_df[label].dtype == object: continue if "labels_mean" in self.dk.data: @@ -869,13 +863,8 @@ class IFreqaiModel(ABC): for extra_col in self.dk.data["extra_returns_per_train"]: dk.full_df.at[index, f"{extra_col}"] = ( self.dk.data["extra_returns_per_train"][extra_col]) - - end_time = time.perf_counter() - logger.info(f"Downloaded the tutorial in {start_time - end_time:0.4f} seconds") - - # print(f"Downloaded the tutorial in {start_time - end_time:0.4f} seconds") - return + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. From 4f0f3e5b64cf38a96dbf8aa7fa1cb21177373d7a Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Wed, 9 Nov 2022 10:07:24 -0300 Subject: [PATCH 05/18] removed unnecessary code --- freqtrade/freqai/freqai_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 1dc326079..09e965b82 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -829,6 +829,8 @@ class IFreqaiModel(ABC): def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen): """ Apply fit_live_predictions function in backtesting with a dummy historic_predictions + The loop is required to simulate dry/live operation, as it is not possible to predict + the type of logic implemented by the user. :param dk: datakitchen object """ fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0) @@ -839,8 +841,6 @@ class IFreqaiModel(ABC): not (col.startswith("&") and col.endswith("_std")) and col not in self.dk.data["extra_returns_per_train"]) ] - self.dd.historic_predictions[self.dk.pair] = pd.DataFrame( - columns=dk.full_df.columns).astype(dk.full_df.dtypes) for index in range(len(dk.full_df)): if index >= fit_live_predictions_candles: From 27fa9f1f4e8542f90a0361f207b0a131011eac93 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 12 Nov 2022 14:37:23 -0300 Subject: [PATCH 06/18] backtest saved dataframe from live --- freqtrade/freqai/data_kitchen.py | 109 ++++++++++++++++++++++++++- freqtrade/freqai/freqai_interface.py | 42 ++++++++++- freqtrade/freqai/utils.py | 7 +- 3 files changed, 154 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 53de00426..8e30b0aec 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple import numpy as np import numpy.typing as npt import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, read_feather from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -73,6 +73,9 @@ class FreqaiDataKitchen: self.training_features_list: List = [] self.model_filename: str = "" self.backtesting_results_path = Path() + self.backtesting_live_model_folder_path = Path() + self.backtesting_live_model_path = Path() + self.backtesting_live_model_bkp_path = Path() self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair @@ -1488,3 +1491,107 @@ class FreqaiDataKitchen: dataframe.columns = dataframe.columns.str.replace(c, "") return dataframe + + def set_backtesting_live_dataframe_folder_path( + self + ) -> None: + """ + Set live backtesting dataframe path + :param pair: current pair + """ + self.backtesting_live_model_folder_path = Path( + self.full_path / self.backtest_predictions_folder / "live_data") + + def set_backtesting_live_dataframe_path( + self, pair: str + ) -> None: + """ + Set live backtesting dataframe path + :param pair: current pair + """ + self.set_backtesting_live_dataframe_folder_path() + if not self.backtesting_live_model_folder_path.is_dir(): + self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True) + + pair_path = pair.split(":")[0].replace("/", "_").lower() + file_name = f"live_backtesting_{pair_path}.feather" + path_to_live_backtesting_file = Path(self.full_path / + self.backtesting_live_model_folder_path / + file_name) + path_to_live_backtesting_bkp_file = Path(self.full_path / + self.backtesting_live_model_folder_path / + file_name.replace(".feather", ".backup.feather")) + + self.backtesting_live_model_path = path_to_live_backtesting_file + self.backtesting_live_model_bkp_path = path_to_live_backtesting_bkp_file + + def save_backtesting_live_dataframe( + self, dataframe: DataFrame, pair: str + ) -> None: + """ + Save live backtesting dataframe to feather file format + :param dataframe: current live dataframe + :param pair: current pair + """ + self.set_backtesting_live_dataframe_path(pair) + last_row_df = dataframe.tail(1) + if self.backtesting_live_model_path.is_file(): + saved_dataframe = self.get_backtesting_live_dataframe() + concat_dataframe = pd.concat([saved_dataframe, last_row_df]) + concat_dataframe.reset_index(drop=True).to_feather( + self.backtesting_live_model_path, compression_level=9, compression='lz4') + else: + last_row_df.reset_index(drop=True).to_feather( + self.backtesting_live_model_path, compression_level=9, compression='lz4') + + shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) + + def get_backtesting_live_dataframe( + self + ) -> DataFrame: + """ + Get live backtesting dataframe from feather file format + return: saved dataframe from previous dry/run or live + """ + if self.backtesting_live_model_path.is_file(): + saved_dataframe = DataFrame() + try: + saved_dataframe = read_feather(self.backtesting_live_model_path) + except Exception: + saved_dataframe = read_feather(self.backtesting_live_model_bkp_path) + return saved_dataframe + else: + raise OperationalException( + "Saved pair file not found" + ) + + def get_timerange_from_backtesting_live_dataframe( + self) -> TimeRange: + """ + Returns timerange information based on a FreqAI model directory + :param models_path: FreqAI model path + + :return: timerange calculated from saved live data + """ + all_assets_start_dates = [] + all_assets_end_dates = [] + self.set_backtesting_live_dataframe_folder_path() + if not self.backtesting_live_model_folder_path.is_dir(): + raise OperationalException( + 'Saved live data not found. Saved lived data is required ' + 'to run backtest with the freqai-backtest-live-models option ' + 'and save_live_data_backtest config option as true' + ) + for file_in_dir in self.backtesting_live_model_folder_path.iterdir(): + if file_in_dir.is_file() and "backup" not in file_in_dir.name: + saved_dataframe = read_feather(file_in_dir) + all_assets_start_dates.append(saved_dataframe.date.min()) + all_assets_end_dates.append(saved_dataframe.date.max()) + start_date = min(all_assets_start_dates) + end_date = min(all_assets_end_dates) + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date = end_date + timedelta(days=1) + backtesting_timerange = TimeRange( + 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) + ) + return backtesting_timerange diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 09e965b82..47d75dfaa 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -67,6 +67,11 @@ class IFreqaiModel(ABC): self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) if self.save_backtest_models: logger.info('Backtesting module configured to save all models.') + self.save_live_data_backtest: bool = self.freqai_info.get( + "save_live_data_backtest", True) + if self.save_live_data_backtest: + logger.info('Live configured to save data for backtest.') + self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) # set current candle to arbitrary historical date self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc) @@ -147,12 +152,20 @@ class IFreqaiModel(ABC): dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - dk = self.start_backtesting(dataframe, metadata, self.dk) + if not self.save_live_data_backtest: + dk = self.start_backtesting(dataframe, metadata, self.dk) + dataframe = dk.remove_features_from_df(dk.return_dataframe) + else: + dk = self.start_backtesting_from_live_saved_files( + dataframe, metadata, self.dk) + dataframe = dk.return_dataframe - dataframe = dk.remove_features_from_df(dk.return_dataframe) self.clean_up() if self.live: self.inference_timer('stop', metadata["pair"]) + if self.save_live_data_backtest: + dk.save_backtesting_live_dataframe(dataframe, metadata["pair"]) + return dataframe def clean_up(self): @@ -310,6 +323,31 @@ class IFreqaiModel(ABC): return dk + def start_backtesting_from_live_saved_files( + self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen + ) -> FreqaiDataKitchen: + """ + :param dataframe: DataFrame = strategy passed dataframe + :param metadata: Dict = pair metadata + :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only + :return: + FreqaiDataKitchen = Data management/analysis tool associated to present pair only + """ + pair = metadata["pair"] + dk.return_dataframe = dataframe + + dk.return_dataframe = dataframe + self.dk.set_backtesting_live_dataframe_path(pair) + saved_dataframe = self.dk.get_backtesting_live_dataframe() + columns_to_drop = list(set(dk.return_dataframe.columns).difference( + ["date", "open", "high", "low", "close", "volume"])) + saved_dataframe = saved_dataframe.drop( + columns=["open", "high", "low", "close", "volume"]) + dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) + dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date') + # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) + return dk + def start_live( self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen ) -> FreqaiDataKitchen: diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index e854bcf0b..ad38a339b 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -229,7 +229,12 @@ def get_timerange_backtest_live_models(config: Config) -> str: """ dk = FreqaiDataKitchen(config) models_path = dk.get_full_models_path(config) - timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) + timerange: TimeRange = TimeRange() + if not config.get("save_live_data_backtest", True): + timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) + else: + timerange = dk.get_timerange_from_backtesting_live_dataframe() + start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" From b01e4e3dbfcfebc72990e03399a7bcb93f231d5f Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 17 Nov 2022 10:14:30 -0300 Subject: [PATCH 07/18] change default value - save_live_data_backtest as false --- freqtrade/freqai/freqai_interface.py | 2 +- freqtrade/freqai/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 47d75dfaa..cc6cd3c9b 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -68,7 +68,7 @@ class IFreqaiModel(ABC): if self.save_backtest_models: logger.info('Backtesting module configured to save all models.') self.save_live_data_backtest: bool = self.freqai_info.get( - "save_live_data_backtest", True) + "save_live_data_backtest", False) if self.save_live_data_backtest: logger.info('Live configured to save data for backtest.') diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index ad38a339b..a4e7a9a55 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str: dk = FreqaiDataKitchen(config) models_path = dk.get_full_models_path(config) timerange: TimeRange = TimeRange() - if not config.get("save_live_data_backtest", True): + if not config.get("save_live_data_backtest", False): timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) else: timerange = dk.get_timerange_from_backtesting_live_dataframe() From 913749c81bad3c85c882391bf0b6341967b0e89a Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 17 Nov 2022 10:30:16 -0300 Subject: [PATCH 08/18] backtesting_from_live_saved_files - code refactoring --- docs/freqai-running.md | 2 +- freqtrade/freqai/data_kitchen.py | 27 +++++++++++---------------- freqtrade/freqai/freqai_interface.py | 2 -- 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index f97ed0ab4..d2f9595be 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -81,7 +81,7 @@ To save the models generated during a particular backtest so that you can start ### Backtest live models -FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `True` in the config. +FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `False` in the config. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index c7fae7770..d5427c4a5 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1519,15 +1519,13 @@ class FreqaiDataKitchen: pair_path = pair.split(":")[0].replace("/", "_").lower() file_name = f"live_backtesting_{pair_path}.feather" - path_to_live_backtesting_file = Path(self.full_path / - self.backtesting_live_model_folder_path / - file_name) - path_to_live_backtesting_bkp_file = Path(self.full_path / - self.backtesting_live_model_folder_path / - file_name.replace(".feather", ".backup.feather")) - - self.backtesting_live_model_path = path_to_live_backtesting_file - self.backtesting_live_model_bkp_path = path_to_live_backtesting_bkp_file + self.backtesting_live_model_path = Path(self.full_path / + self.backtesting_live_model_folder_path / + file_name) + self.backtesting_live_model_bkp_path = Path( + self.full_path / + self.backtesting_live_model_folder_path / + file_name.replace(".feather", ".backup.feather")) def save_backtesting_live_dataframe( self, dataframe: DataFrame, pair: str @@ -1566,15 +1564,12 @@ class FreqaiDataKitchen: return saved_dataframe else: raise OperationalException( - "Saved pair file not found" + "Saved live backtesting dataframe file not found." ) - def get_timerange_from_backtesting_live_dataframe( - self) -> TimeRange: + def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: """ - Returns timerange information based on a FreqAI model directory - :param models_path: FreqAI model path - + Returns timerange information based on live backtesting dataframe file :return: timerange calculated from saved live data """ all_assets_start_dates = [] @@ -1592,7 +1587,7 @@ class FreqaiDataKitchen: all_assets_start_dates.append(saved_dataframe.date.min()) all_assets_end_dates.append(saved_dataframe.date.max()) start_date = min(all_assets_start_dates) - end_date = min(all_assets_end_dates) + end_date = max(all_assets_end_dates) # add 1 day to string timerange to ensure BT module will load all dataframe data end_date = end_date + timedelta(days=1) backtesting_timerange = TimeRange( diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index cc6cd3c9b..8d84d70c5 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -334,8 +334,6 @@ class IFreqaiModel(ABC): FreqaiDataKitchen = Data management/analysis tool associated to present pair only """ pair = metadata["pair"] - dk.return_dataframe = dataframe - dk.return_dataframe = dataframe self.dk.set_backtesting_live_dataframe_path(pair) saved_dataframe = self.dk.get_backtesting_live_dataframe() From 99bff9cbfa149b0c28b91c2736a472aad47c8633 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 17 Nov 2022 10:30:51 -0300 Subject: [PATCH 09/18] backtesting_from_live_saved_files - code refactoring --- freqtrade/freqai/data_kitchen.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index d5427c4a5..ed78cfee5 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1519,9 +1519,10 @@ class FreqaiDataKitchen: pair_path = pair.split(":")[0].replace("/", "_").lower() file_name = f"live_backtesting_{pair_path}.feather" - self.backtesting_live_model_path = Path(self.full_path / - self.backtesting_live_model_folder_path / - file_name) + self.backtesting_live_model_path = Path( + self.full_path / + self.backtesting_live_model_folder_path / + file_name) self.backtesting_live_model_bkp_path = Path( self.full_path / self.backtesting_live_model_folder_path / From 3903b04d3f42f4465d85165913e265c75ffc9f76 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 17 Nov 2022 15:20:07 -0300 Subject: [PATCH 10/18] save_live_data_backtest - added docs and tests --- docs/freqai-parameter-table.md | 1 + docs/freqai-running.md | 5 ++- freqtrade/freqai/data_kitchen.py | 10 +++-- freqtrade/freqai/freqai_interface.py | 10 ++--- tests/freqai/test_freqai_datakitchen.py | 44 ++++++++++++++++++++ tests/freqai/test_freqai_interface.py | 55 +++++++++++++++++++++++++ 6 files changed, 114 insertions(+), 11 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index c027a12b1..2961b1b8d 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -15,6 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire). | `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved). +| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. diff --git a/docs/freqai-running.md b/docs/freqai-running.md index d2f9595be..4c90a4885 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -81,7 +81,10 @@ To save the models generated during a particular backtest so that you can start ### Backtest live models -FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `False` in the config. +FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options: + +1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster. +2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ed78cfee5..d93060568 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1541,14 +1541,16 @@ class FreqaiDataKitchen: if self.backtesting_live_model_path.is_file(): saved_dataframe = self.get_backtesting_live_dataframe() concat_dataframe = pd.concat([saved_dataframe, last_row_df]) - concat_dataframe.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') + self.save_backtesting_live_dataframe_to_feather(concat_dataframe) else: - last_row_df.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') + self.save_backtesting_live_dataframe_to_feather(last_row_df) shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) + def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame): + dataframe.reset_index(drop=True).to_feather( + self.backtesting_live_model_path, compression_level=9, compression='lz4') + def get_backtesting_live_dataframe( self ) -> DataFrame: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 8d84d70c5..a0dac5725 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -694,7 +694,8 @@ class IFreqaiModel(ABC): for label in full_labels: if self.dd.historic_predictions[dk.pair][label].dtype == object: continue - f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) + f = spy.stats.norm.fit( + self.dd.historic_predictions[dk.pair][label].fillna(0).tail(num_candles)) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] return @@ -882,11 +883,7 @@ class IFreqaiModel(ABC): if index >= fit_live_predictions_candles: self.dd.historic_predictions[self.dk.pair] = ( dk.full_df.iloc[index - fit_live_predictions_candles:index]) - else: - self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[:index] - - self.fit_live_predictions(self.dk, self.dk.pair) - if index >= fit_live_predictions_candles: + self.fit_live_predictions(self.dk, self.dk.pair) for label in label_columns: if dk.full_df[label].dtype == object: continue @@ -899,6 +896,7 @@ class IFreqaiModel(ABC): for extra_col in self.dk.data["extra_returns_per_train"]: dk.full_df.at[index, f"{extra_col}"] = ( self.dk.data["extra_returns_per_train"][extra_col]) + return # Following methods which are overridden by user made prediction models. diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 9abe60edb..ca7c19c94 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -259,3 +259,47 @@ def test_get_full_model_path(mocker, freqai_conf, model): model_path = freqai.dk.get_full_models_path(freqai_conf) assert model_path.is_dir() is True + + +def test_save_backtesting_live_dataframe(mocker, freqai_conf): + freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + dataframe_without_last_candle = dataframe.copy() + dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True) + freqai_conf.update({"save_live_data_backtest": True}) + freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC") + saved_dataframe = freqai.dk.get_backtesting_live_dataframe() + assert len(saved_dataframe) == 1 + assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0] + freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC") + saved_dataframe = freqai.dk.get_backtesting_live_dataframe() + assert len(saved_dataframe) == 2 + assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0] + assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0] + + +def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): + freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai_conf.update({"save_live_data_backtest": True}) + freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") + freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe) + timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe() + assert timerange.startts == 1516406400 + assert timerange.stopts == 1517356500 + + +def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf): + freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) + with pytest.raises( + OperationalException, + match=r'Saved live data not found.*' + ): + freqai.dk.get_timerange_from_backtesting_live_dataframe() + + +def test_saved_live_bt_file_not_found(mocker, freqai_conf): + freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) + with pytest.raises( + OperationalException, + match=r'.*live backtesting dataframe file not found.*' + ): + freqai.dk.get_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 25bc99580..ed634de55 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -300,6 +300,61 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): shutil.rmtree(Path(freqai.dk.full_path)) +def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog): + freqai_conf.update({"save_live_data_backtest": True}) + freqai_conf.update({"freqai_backtest_live_models": True}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = False + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + sub_timerange = TimeRange.parse_timerange("20180110-20180130") + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + metadata = {"pair": "ADA/BTC"} + + # create a dummy live dataframe file with 10 rows + dataframe_predictions = df.tail(10).copy() + dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1 + freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") + freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions) + + freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk) + assert len(freqai.dk.return_dataframe) == len(df) + assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == ( + len(dataframe_predictions)) + shutil.rmtree(Path(freqai.dk.full_path)) + + +def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog): + freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = False + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180128-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + sub_timerange = TimeRange.parse_timerange("20180129-20180130") + corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) + df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + freqai.dk.pair = "ADA/BTC" + freqai.dk.full_df = df + assert "&-s_close_mean" not in freqai.dk.full_df.columns + assert "&-s_close_std" not in freqai.dk.full_df.columns + freqai.backtesting_fit_live_predictions(freqai.dk) + assert "&-s_close_mean" in freqai.dk.full_df.columns + assert "&-s_close_std" in freqai.dk.full_df.columns + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_follow_mode(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) From 80d070e9eed2a05980818af817594c6cae0b0f9a Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 19 Nov 2022 14:15:58 -0300 Subject: [PATCH 11/18] update code to use historic_predictions for freqai_backtest_live_models --- docs/freqai-parameter-table.md | 2 +- docs/freqai-running.md | 4 +- freqtrade/freqai/data_drawer.py | 21 +++++ freqtrade/freqai/data_kitchen.py | 114 +++++------------------- freqtrade/freqai/freqai_interface.py | 53 +++++++---- freqtrade/freqai/utils.py | 2 +- tests/freqai/test_freqai_datakitchen.py | 33 +------ tests/freqai/test_freqai_interface.py | 31 ------- 8 files changed, 86 insertions(+), 174 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 2961b1b8d..059d56a1f 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire). | `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved). -| `save_live_data_backtest` | Save live dataframe during dry/live runs to reuse in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option. +| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
Default: `True` | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 4c90a4885..d777b180e 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -83,8 +83,8 @@ To save the models generated during a particular backtest so that you can start FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options: -1. Set `"save_live_data_backtest"` to `True` in the config. With this option, FreqAI will save the live dataframe for reuse in backtesting. This option requires less disk space and backtesting will run faster. -2. Set `"purge_old_models"` to `False` and `"save_live_data_backtest"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. +1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster. +2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 038ddaf2e..e83b05aaa 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -81,6 +81,7 @@ class FreqaiDataDrawer: self.historic_predictions_bkp_path = Path( self.full_path / "historic_predictions.backup.pkl") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") + self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") self.follow_mode = follow_mode if follow_mode: @@ -125,6 +126,17 @@ class FreqaiDataDrawer: self.update_metric_tracker('cpu_load5min', load5 / cpus, pair) self.update_metric_tracker('cpu_load15min', load15 / cpus, pair) + def load_global_metadata_from_disk(self): + """ + Locate and load a previously saved global metadata in present model folder. + """ + exists = self.global_metadata_path.is_file() + if exists: + with open(self.global_metadata_path, "r") as fp: + metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE) + return metatada_dict + return {} + def load_drawer_from_disk(self): """ Locate and load a previously saved data drawer full of all pair model metadata in @@ -225,6 +237,15 @@ class FreqaiDataDrawer: rapidjson.dump(self.follower_dict, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE) + def save_global_metadata_to_disk(self, metadata: Dict[str, Any]): + """ + Save global metadata json to disk + """ + with self.save_lock: + with open(self.global_metadata_path, 'w') as fp: + rapidjson.dump(metadata, fp, default=self.np_encoder, + number_mode=rapidjson.NM_NATIVE) + def create_follower_dict(self): """ Create or dictionary for each follower to maintain unique persistent prediction targets diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index be2fb68b1..641c95725 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple import numpy as np import numpy.typing as npt import pandas as pd -from pandas import DataFrame, read_feather +from pandas import DataFrame from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -74,9 +74,6 @@ class FreqaiDataKitchen: self.training_features_list: List = [] self.model_filename: str = "" self.backtesting_results_path = Path() - self.backtesting_live_model_folder_path = Path() - self.backtesting_live_model_path = Path() - self.backtesting_live_model_bkp_path = Path() self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair @@ -90,7 +87,9 @@ class FreqaiDataKitchen: self.full_path = self.get_full_models_path(self.config) if self.backtest_live_models: - if self.pair: + if self.pair and not ( + self.freqai_config.get("backtest_using_historic_predictions", True) + ): self.set_timerange_from_ready_models() (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange_live_models() @@ -1488,101 +1487,30 @@ class FreqaiDataKitchen: return dataframe - def set_backtesting_live_dataframe_folder_path( - self - ) -> None: - """ - Set live backtesting dataframe path - :param pair: current pair - """ - self.backtesting_live_model_folder_path = Path( - self.full_path / self.backtest_predictions_folder / "live_data") - - def set_backtesting_live_dataframe_path( - self, pair: str - ) -> None: - """ - Set live backtesting dataframe path - :param pair: current pair - """ - self.set_backtesting_live_dataframe_folder_path() - if not self.backtesting_live_model_folder_path.is_dir(): - self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True) - - pair_path = pair.split(":")[0].replace("/", "_").lower() - file_name = f"live_backtesting_{pair_path}.feather" - self.backtesting_live_model_path = Path( - self.full_path / - self.backtesting_live_model_folder_path / - file_name) - self.backtesting_live_model_bkp_path = Path( - self.full_path / - self.backtesting_live_model_folder_path / - file_name.replace(".feather", ".backup.feather")) - - def save_backtesting_live_dataframe( - self, dataframe: DataFrame, pair: str - ) -> None: - """ - Save live backtesting dataframe to feather file format - :param dataframe: current live dataframe - :param pair: current pair - """ - self.set_backtesting_live_dataframe_path(pair) - last_row_df = dataframe.tail(1) - if self.backtesting_live_model_path.is_file(): - saved_dataframe = self.get_backtesting_live_dataframe() - concat_dataframe = pd.concat([saved_dataframe, last_row_df]) - self.save_backtesting_live_dataframe_to_feather(concat_dataframe) - else: - self.save_backtesting_live_dataframe_to_feather(last_row_df) - - shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path) - - def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame): - dataframe.reset_index(drop=True).to_feather( - self.backtesting_live_model_path, compression_level=9, compression='lz4') - - def get_backtesting_live_dataframe( - self - ) -> DataFrame: - """ - Get live backtesting dataframe from feather file format - return: saved dataframe from previous dry/run or live - """ - if self.backtesting_live_model_path.is_file(): - saved_dataframe = DataFrame() - try: - saved_dataframe = read_feather(self.backtesting_live_model_path) - except Exception: - saved_dataframe = read_feather(self.backtesting_live_model_bkp_path) - return saved_dataframe - else: - raise OperationalException( - "Saved live backtesting dataframe file not found." - ) - def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: """ - Returns timerange information based on live backtesting dataframe file + Returns timerange information based on historic predictions file :return: timerange calculated from saved live data """ - all_assets_start_dates = [] - all_assets_end_dates = [] - self.set_backtesting_live_dataframe_folder_path() - if not self.backtesting_live_model_folder_path.is_dir(): + from freqtrade.freqai.data_drawer import FreqaiDataDrawer + dd = FreqaiDataDrawer(Path(self.full_path), self.config) + if not dd.historic_predictions_path.is_file(): raise OperationalException( - 'Saved live data not found. Saved lived data is required ' + 'Historic predictions not found. Historic predictions data is required ' 'to run backtest with the freqai-backtest-live-models option ' - 'and save_live_data_backtest config option as true' + 'and backtest_using_historic_predictions config option as true' ) - for file_in_dir in self.backtesting_live_model_folder_path.iterdir(): - if file_in_dir.is_file() and "backup" not in file_in_dir.name: - saved_dataframe = read_feather(file_in_dir) - all_assets_start_dates.append(saved_dataframe.date.min()) - all_assets_end_dates.append(saved_dataframe.date.max()) - start_date = min(all_assets_start_dates) - end_date = max(all_assets_end_dates) + + dd.load_historic_predictions_from_disk() + + all_pairs_end_dates = [] + for pair in dd.historic_predictions: + pair_historic_data = dd.historic_predictions[pair] + all_pairs_end_dates.append(pair_historic_data.date_pred.max()) + + global_metadata = dd.load_global_metadata_from_disk() + start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"])) + end_date = max(all_pairs_end_dates) # add 1 day to string timerange to ensure BT module will load all dataframe data end_date = end_date + timedelta(days=1) backtesting_timerange = TimeRange( diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index c48758df4..473fe939f 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -53,6 +53,7 @@ class IFreqaiModel(ABC): def __init__(self, config: Config) -> None: self.config = config + self.metadata: Dict[str, Any] = {} self.assert_config(self.config) self.freqai_info: Dict[str, Any] = config["freqai"] self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get( @@ -67,10 +68,10 @@ class IFreqaiModel(ABC): self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) if self.save_backtest_models: logger.info('Backtesting module configured to save all models.') - self.save_live_data_backtest: bool = self.freqai_info.get( - "save_live_data_backtest", False) - if self.save_live_data_backtest: - logger.info('Live configured to save data for backtest.') + self.backtest_using_historic_predictions: bool = self.freqai_info.get( + "backtest_using_historic_predictions", True) + if self.backtest_using_historic_predictions: + logger.info('Backtesting live models configured to use historic predictions.') self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) # set current candle to arbitrary historical date @@ -103,6 +104,7 @@ class IFreqaiModel(ABC): self.get_corr_dataframes: bool = True self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() + self.metadata = self.dd.load_global_metadata_from_disk() record_params(config, self.full_path) @@ -136,6 +138,7 @@ class IFreqaiModel(ABC): self.inference_timer('start') self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) dk = self.start_live(dataframe, metadata, strategy, self.dk) + dataframe = dk.remove_features_from_df(dk.return_dataframe) # For backtesting, each pair enters and then gets trained for each window along the # sliding window defined by "train_period_days" (training window) and "live_retrain_hours" @@ -145,14 +148,19 @@ class IFreqaiModel(ABC): elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) if self.dk.backtest_live_models: - logger.info( - f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)") + if self.backtest_using_historic_predictions: + logger.info( + "Backtesting using historic predictions (live models)") + else: + logger.info( + f"Backtesting {len(self.dk.backtesting_timeranges)} " + "timeranges (live models)") else: logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - if not self.save_live_data_backtest: + if not self.backtest_using_historic_predictions: dk = self.start_backtesting(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) else: @@ -163,8 +171,7 @@ class IFreqaiModel(ABC): self.clean_up() if self.live: self.inference_timer('stop', metadata["pair"]) - if self.save_live_data_backtest: - dk.save_backtesting_live_dataframe(dataframe, metadata["pair"]) + self.set_start_dry_live_date(dataframe) return dataframe @@ -335,14 +342,12 @@ class IFreqaiModel(ABC): """ pair = metadata["pair"] dk.return_dataframe = dataframe - self.dk.set_backtesting_live_dataframe_path(pair) - saved_dataframe = self.dk.get_backtesting_live_dataframe() - columns_to_drop = list(set(dk.return_dataframe.columns).difference( - ["date", "open", "high", "low", "close", "volume"])) - saved_dataframe = saved_dataframe.drop( - columns=["open", "high", "low", "close", "volume"]) + saved_dataframe = self.dd.historic_predictions[pair] + columns_to_drop = list(set(saved_dataframe.columns).intersection( + dk.return_dataframe.columns)) dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) - dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date') + dk.return_dataframe = pd.merge( + dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) return dk @@ -886,6 +891,22 @@ class IFreqaiModel(ABC): return + def update_metadata(self, metadata: Dict[str, Any]): + """ + Update global metadata and save the updated json file + :param metadata: new global metadata dict + """ + self.dd.save_global_metadata_to_disk(metadata) + self.metadata = metadata + + def set_start_dry_live_date(self, live_dataframe: DataFrame): + key_name = "start_dry_live_date" + if key_name not in self.metadata: + metadata = self.metadata + metadata[key_name] = int( + pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()) + self.update_metadata(metadata) + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index c9efe6a3c..f42a87be7 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str: dk = FreqaiDataKitchen(config) models_path = dk.get_full_models_path(config) timerange: TimeRange = TimeRange() - if not config.get("save_live_data_backtest", False): + if not config.get("freqai", {}).get("backtest_using_historic_predictions", True): timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) else: timerange = dk.get_timerange_from_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index ca7c19c94..2dbbd7ef5 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -261,45 +261,18 @@ def test_get_full_model_path(mocker, freqai_conf, model): assert model_path.is_dir() is True -def test_save_backtesting_live_dataframe(mocker, freqai_conf): - freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) - dataframe_without_last_candle = dataframe.copy() - dataframe_without_last_candle.drop(dataframe.tail(1).index, inplace=True) - freqai_conf.update({"save_live_data_backtest": True}) - freqai.dk.save_backtesting_live_dataframe(dataframe_without_last_candle, "ADA/BTC") - saved_dataframe = freqai.dk.get_backtesting_live_dataframe() - assert len(saved_dataframe) == 1 - assert saved_dataframe.iloc[-1, 0] == dataframe_without_last_candle.iloc[-1, 0] - freqai.dk.save_backtesting_live_dataframe(dataframe, "ADA/BTC") - saved_dataframe = freqai.dk.get_backtesting_live_dataframe() - assert len(saved_dataframe) == 2 - assert saved_dataframe.iloc[-1, 0] == dataframe.iloc[-1, 0] - assert saved_dataframe.iloc[-2, 0] == dataframe.iloc[-2, 0] - - def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) - freqai_conf.update({"save_live_data_backtest": True}) - freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") - freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe) + freqai_conf.update({"backtest_using_historic_predictions": True}) timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe() assert timerange.startts == 1516406400 assert timerange.stopts == 1517356500 -def test_get_timerange_from_backtesting_live_dataframe_folder_not_found(mocker, freqai_conf): +def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf): freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) with pytest.raises( OperationalException, - match=r'Saved live data not found.*' + match=r'Historic predictions not found.*' ): freqai.dk.get_timerange_from_backtesting_live_dataframe() - - -def test_saved_live_bt_file_not_found(mocker, freqai_conf): - freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) - with pytest.raises( - OperationalException, - match=r'.*live backtesting dataframe file not found.*' - ): - freqai.dk.get_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index ed634de55..66b3bac17 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -300,37 +300,6 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): shutil.rmtree(Path(freqai.dk.full_path)) -def test_start_backtesting_from_saved_live_dataframe(mocker, freqai_conf, caplog): - freqai_conf.update({"save_live_data_backtest": True}) - freqai_conf.update({"freqai_backtest_live_models": True}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = False - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - sub_timerange = TimeRange.parse_timerange("20180110-20180130") - corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) - df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - metadata = {"pair": "ADA/BTC"} - - # create a dummy live dataframe file with 10 rows - dataframe_predictions = df.tail(10).copy() - dataframe_predictions["&s_close"] = dataframe_predictions["close"] * 1.1 - freqai.dk.set_backtesting_live_dataframe_path("ADA/BTC") - freqai.dk.save_backtesting_live_dataframe_to_feather(dataframe_predictions) - - freqai.start_backtesting_from_live_saved_files(df, metadata, freqai.dk) - assert len(freqai.dk.return_dataframe) == len(df) - assert len(freqai.dk.return_dataframe[freqai.dk.return_dataframe["&s_close"] > 0]) == ( - len(dataframe_predictions)) - shutil.rmtree(Path(freqai.dk.full_path)) - - def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog): freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) From fdc82af883d4e6601ab7468e73a748ecc2d11fd0 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 19 Nov 2022 22:27:58 -0300 Subject: [PATCH 12/18] fix tests - update code to backtest with historic_predictions --- freqtrade/freqai/data_drawer.py | 31 ++++++++++++++++++++- freqtrade/freqai/data_kitchen.py | 31 --------------------- freqtrade/freqai/utils.py | 4 ++- tests/freqai/test_freqai_backtesting.py | 2 ++ tests/freqai/test_freqai_datadrawer.py | 37 +++++++++++++++++++++++++ tests/freqai/test_freqai_datakitchen.py | 18 +----------- 6 files changed, 73 insertions(+), 50 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index e83b05aaa..59b8e2684 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -3,7 +3,7 @@ import logging import re import shutil import threading -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any, Dict, Tuple, TypedDict @@ -714,3 +714,32 @@ class FreqaiDataDrawer: ).reset_index(drop=True) return corr_dataframes, base_dataframes + + def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: + """ + Returns timerange information based on historic predictions file + :return: timerange calculated from saved live data + """ + if not self.historic_predictions_path.is_file(): + raise OperationalException( + 'Historic predictions not found. Historic predictions data is required ' + 'to run backtest with the freqai-backtest-live-models option ' + 'and backtest_using_historic_predictions config option as true' + ) + + self.load_historic_predictions_from_disk() + + all_pairs_end_dates = [] + for pair in self.historic_predictions: + pair_historic_data = self.historic_predictions[pair] + all_pairs_end_dates.append(pair_historic_data.date_pred.max()) + + global_metadata = self.load_global_metadata_from_disk() + start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"])) + end_date = max(all_pairs_end_dates) + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date = end_date + timedelta(days=1) + backtesting_timerange = TimeRange( + 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) + ) + return backtesting_timerange diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 641c95725..b364f4e7e 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1486,34 +1486,3 @@ class FreqaiDataKitchen: dataframe.columns = dataframe.columns.str.replace(c, "") return dataframe - - def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: - """ - Returns timerange information based on historic predictions file - :return: timerange calculated from saved live data - """ - from freqtrade.freqai.data_drawer import FreqaiDataDrawer - dd = FreqaiDataDrawer(Path(self.full_path), self.config) - if not dd.historic_predictions_path.is_file(): - raise OperationalException( - 'Historic predictions not found. Historic predictions data is required ' - 'to run backtest with the freqai-backtest-live-models option ' - 'and backtest_using_historic_predictions config option as true' - ) - - dd.load_historic_predictions_from_disk() - - all_pairs_end_dates = [] - for pair in dd.historic_predictions: - pair_historic_data = dd.historic_predictions[pair] - all_pairs_end_dates.append(pair_historic_data.date_pred.max()) - - global_metadata = dd.load_global_metadata_from_disk() - start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"])) - end_date = max(all_pairs_end_dates) - # add 1 day to string timerange to ensure BT module will load all dataframe data - end_date = end_date + timedelta(days=1) - backtesting_timerange = TimeRange( - 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) - ) - return backtesting_timerange diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index f42a87be7..fd5d448bd 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -14,6 +14,7 @@ from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange.exchange import market_is_active +from freqtrade.freqai.data_drawer import FreqaiDataDrawer from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist @@ -233,6 +234,7 @@ def get_timerange_backtest_live_models(config: Config) -> str: if not config.get("freqai", {}).get("backtest_using_historic_predictions", True): timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) else: - timerange = dk.get_timerange_from_backtesting_live_dataframe() + dd = FreqaiDataDrawer(models_path, config) + timerange = dd.get_timerange_from_backtesting_live_dataframe() return timerange.timerange_str diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index b9e2d650a..49b27f724 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -65,6 +65,8 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda mocker.patch('freqtrade.optimize.backtesting.history.load_data') mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) freqai_conf["timerange"] = "" + freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False}) + patched_configuration_load_config_file(mocker, freqai_conf) args = [ diff --git a/tests/freqai/test_freqai_datadrawer.py b/tests/freqai/test_freqai_datadrawer.py index 7ab963507..3abf84586 100644 --- a/tests/freqai/test_freqai_datadrawer.py +++ b/tests/freqai/test_freqai_datadrawer.py @@ -2,8 +2,11 @@ import shutil from pathlib import Path +import pytest + from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider +from freqtrade.exceptions import OperationalException from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from tests.conftest import get_patched_exchange from tests.freqai.conftest import get_patched_freqai_strategy @@ -93,3 +96,37 @@ def test_use_strategy_to_populate_indicators(mocker, freqai_conf): assert len(df.columns) == 33 shutil.rmtree(Path(freqai.dk.full_path)) + + +def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180126-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + sub_timerange = TimeRange.parse_timerange("20180128-20180130") + _, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "ADA/BTC", freqai.dk) + base_df["5m"]["date_pred"] = base_df["5m"]["date"] + freqai.dd.historic_predictions = {} + freqai.dd.historic_predictions["ADA/USDT"] = base_df["5m"] + freqai.dd.save_historic_predictions_to_disk() + freqai.dd.save_global_metadata_to_disk({"start_dry_live_date": 1516406400}) + + timerange = freqai.dd.get_timerange_from_backtesting_live_dataframe() + assert timerange.startts == 1516406400 + assert timerange.stopts == 1517356500 + + +def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + freqai = strategy.freqai + with pytest.raises( + OperationalException, + match=r'Historic predictions not found.*' + ): + freqai.dd.get_timerange_from_backtesting_live_dataframe() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 2dbbd7ef5..4dfc75d38 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -190,6 +190,7 @@ def test_get_timerange_from_ready_models(mocker, freqai_conf, model): freqai_conf.update({"freqaimodel": model}) freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"strategy": "freqai_test_strat"}) + freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) @@ -259,20 +260,3 @@ def test_get_full_model_path(mocker, freqai_conf, model): model_path = freqai.dk.get_full_models_path(freqai_conf) assert model_path.is_dir() is True - - -def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): - freqai, dataframe = make_unfiltered_dataframe(mocker, freqai_conf) - freqai_conf.update({"backtest_using_historic_predictions": True}) - timerange = freqai.dk.get_timerange_from_backtesting_live_dataframe() - assert timerange.startts == 1516406400 - assert timerange.stopts == 1517356500 - - -def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf): - freqai, _ = make_unfiltered_dataframe(mocker, freqai_conf) - with pytest.raises( - OperationalException, - match=r'Historic predictions not found.*' - ): - freqai.dk.get_timerange_from_backtesting_live_dataframe() From c01f25ddc95f6dbdf91b3dddd52cda4bcbf57428 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 22 Nov 2022 13:09:09 -0300 Subject: [PATCH 13/18] update code to freqai_backtest_live_models only from historic predictions --- docs/freqai-parameter-table.md | 1 - docs/freqai-running.md | 12 +- freqtrade/freqai/data_drawer.py | 3 +- freqtrade/freqai/data_kitchen.py | 144 ++++-------------------- freqtrade/freqai/freqai_interface.py | 19 +--- freqtrade/freqai/utils.py | 9 +- tests/freqai/test_freqai_backtesting.py | 2 +- tests/freqai/test_freqai_datadrawer.py | 6 +- tests/freqai/test_freqai_datakitchen.py | 67 ----------- 9 files changed, 36 insertions(+), 227 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 059d56a1f..c027a12b1 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -15,7 +15,6 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
**Datatype:** Positive integer.
Default: `0` (models never expire). | `purge_old_models` | Delete obsolete models.
**Datatype:** Boolean.
Default: `False` (all historic models remain on disk). | `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`.
**Datatype:** Boolean.
Default: `False` (no models are saved). -| `backtest_using_historic_predictions` | Reuse `historic_predictions` in backtesting with [Backtest live models](freqai-running.md#backtest_live_models)) option.
Default: `True` | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. diff --git a/docs/freqai-running.md b/docs/freqai-running.md index d777b180e..23873547f 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -81,17 +81,9 @@ To save the models generated during a particular backtest so that you can start ### Backtest live models -FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. For that, you have 2 options: +FreqAI allow you to reuse live historic predictions through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. -1. Set `"backtest_using_historic_predictions"` to `True` in the config. With this option, FreqAI will reuse `historic_predictions` in backtesting. This option requires less disk space and backtesting will run faster. -2. Set `"purge_old_models"` to `False` and `"backtest_using_historic_predictions"` to `False` in the config. In this case, FreqAI will use the saved models to make the predictions in backtesting. This option requires more disk space and the backtest will have a longer execution time. - -The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. - -Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution. - -!!! Note - Currently, there is no checking for expired models, even if the `expired_hours` parameter is set. +The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in historic predictions file. ### Downloading data to cover the full backtest period diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 59b8e2684..9f1e27796 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -715,7 +715,7 @@ class FreqaiDataDrawer: return corr_dataframes, base_dataframes - def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange: + def get_timerange_from_live_historic_predictions(self) -> TimeRange: """ Returns timerange information based on historic predictions file :return: timerange calculated from saved live data @@ -724,7 +724,6 @@ class FreqaiDataDrawer: raise OperationalException( 'Historic predictions not found. Historic predictions data is required ' 'to run backtest with the freqai-backtest-live-models option ' - 'and backtest_using_historic_predictions config option as true' ) self.load_historic_predictions_from_disk() diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index b364f4e7e..f75fd3dd8 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,7 +1,7 @@ import copy import logging import shutil -from datetime import datetime, timedelta, timezone +from datetime import datetime, timezone from math import cos, sin from pathlib import Path from typing import Any, Dict, List, Tuple @@ -86,14 +86,7 @@ class FreqaiDataKitchen: if not self.live: self.full_path = self.get_full_models_path(self.config) - if self.backtest_live_models: - if self.pair and not ( - self.freqai_config.get("backtest_using_historic_predictions", True) - ): - self.set_timerange_from_ready_models() - (self.training_timeranges, - self.backtesting_timeranges) = self.split_timerange_live_models() - else: + if not self.backtest_live_models: self.full_timerange = self.create_fulltimerange( self.config["timerange"], self.freqai_config.get("train_period_days", 0) ) @@ -458,28 +451,28 @@ class FreqaiDataKitchen: # print(tr_training_list, tr_backtesting_list) return tr_training_list_timerange, tr_backtesting_list_timerange - def split_timerange_live_models( - self - ) -> Tuple[list, list]: + # def split_timerange_live_models( + # self + # ) -> Tuple[list, list]: - tr_backtesting_list_timerange = [] - asset = self.pair.split("/")[0] - if asset not in self.backtest_live_models_data["assets_end_dates"]: - raise OperationalException( - f"Model not available for pair {self.pair}. " - "Please, try again after removing this pair from the configuration file." - ) - asset_data = self.backtest_live_models_data["assets_end_dates"][asset] - backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] - model_end_dates = [x for x in asset_data] - model_end_dates.append(backtesting_timerange.stopts) - model_end_dates.sort() - for index, item in enumerate(model_end_dates): - if len(model_end_dates) > (index + 1): - tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1]) - tr_backtesting_list_timerange.append(tr_to_add) + # tr_backtesting_list_timerange = [] + # asset = self.pair.split("/")[0] + # if asset not in self.backtest_live_models_data["assets_end_dates"]: + # raise OperationalException( + # f"Model not available for pair {self.pair}. " + # "Please, try again after removing this pair from the configuration file." + # ) + # asset_data = self.backtest_live_models_data["assets_end_dates"][asset] + # backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] + # model_end_dates = [x for x in asset_data] + # model_end_dates.append(backtesting_timerange.stopts) + # model_end_dates.sort() + # for index, item in enumerate(model_end_dates): + # if len(model_end_dates) > (index + 1): + # tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1]) + # tr_backtesting_list_timerange.append(tr_to_add) - return tr_backtesting_list_timerange, tr_backtesting_list_timerange + # return tr_backtesting_list_timerange, tr_backtesting_list_timerange def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: """ @@ -1371,17 +1364,6 @@ class FreqaiDataKitchen: ) return False - def set_timerange_from_ready_models(self): - backtesting_timerange, \ - assets_end_dates = ( - self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) - - self.backtest_live_models_data = { - "backtesting_timerange": backtesting_timerange, - "assets_end_dates": assets_end_dates - } - return - def get_full_models_path(self, config: Config) -> Path: """ Returns default FreqAI model path @@ -1392,88 +1374,6 @@ class FreqaiDataKitchen: config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) ) - def get_timerange_and_assets_end_dates_from_ready_models( - self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: - """ - Returns timerange information based on a FreqAI model directory - :param models_path: FreqAI model path - - :return: a Tuple with (Timerange calculated from directory and - a Dict with pair and model end training dates info) - """ - all_models_end_dates = [] - assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models( - models_path) - for key in assets_end_dates: - for model_end_date in assets_end_dates[key]: - if model_end_date not in all_models_end_dates: - all_models_end_dates.append(model_end_date) - - if len(all_models_end_dates) == 0: - raise OperationalException( - 'At least 1 saved model is required to ' - 'run backtest with the freqai-backtest-live-models option' - ) - - if len(all_models_end_dates) == 1: - logger.warning( - "Only 1 model was found. Backtesting will run with the " - "timerange from the end of the training date to the current date" - ) - - finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - if len(all_models_end_dates) > 1: - # After last model end date, use the same period from previous model - # to finish the backtest - all_models_end_dates.sort(reverse=True) - finish_timestamp = all_models_end_dates[0] + \ - (all_models_end_dates[0] - all_models_end_dates[1]) - - all_models_end_dates.append(finish_timestamp) - all_models_end_dates.sort() - start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - - # add 1 day to string timerange to ensure BT module will load all dataframe data - end_date = end_date + timedelta(days=1) - backtesting_timerange = TimeRange( - 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) - ) - return backtesting_timerange, assets_end_dates - - def get_assets_timestamps_training_from_ready_models( - self, models_path: Path) -> Dict[str, Any]: - """ - Scan the models path and returns all assets end training dates (timestamp) - :param models_path: FreqAI model path - - :return: a Dict with asset and model end training dates info - """ - assets_end_dates: Dict[str, Any] = {} - if not models_path.is_dir(): - raise OperationalException( - 'Model folders not found. Saved models are required ' - 'to run backtest with the freqai-backtest-live-models option' - ) - for model_dir in models_path.iterdir(): - if str(model_dir.name).startswith("sub-train"): - model_end_date = int(model_dir.name.split("_")[1]) - asset = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = ( - f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" - "_model.joblib" - ) - - model_path_file = Path(model_dir / model_file_name) - if model_path_file.is_file(): - if asset not in assets_end_dates: - assets_end_dates[asset] = [] - assets_end_dates[asset].append(model_end_date) - - return assets_end_dates - def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame: """ Remove all special characters from feature strings (:) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 473fe939f..80348fda8 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -68,10 +68,6 @@ class IFreqaiModel(ABC): self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) if self.save_backtest_models: logger.info('Backtesting module configured to save all models.') - self.backtest_using_historic_predictions: bool = self.freqai_info.get( - "backtest_using_historic_predictions", True) - if self.backtest_using_historic_predictions: - logger.info('Backtesting live models configured to use historic predictions.') self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) # set current candle to arbitrary historical date @@ -148,23 +144,18 @@ class IFreqaiModel(ABC): elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) if self.dk.backtest_live_models: - if self.backtest_using_historic_predictions: - logger.info( - "Backtesting using historic predictions (live models)") - else: - logger.info( - f"Backtesting {len(self.dk.backtesting_timeranges)} " - "timeranges (live models)") + logger.info( + "Backtesting using historic predictions (live models)") else: logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - if not self.backtest_using_historic_predictions: + if not self.config.get("freqai_backtest_live_models", False): dk = self.start_backtesting(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) else: - dk = self.start_backtesting_from_live_saved_files( + dk = self.start_backtesting_from_historic_predictions( dataframe, metadata, self.dk) dataframe = dk.return_dataframe @@ -330,7 +321,7 @@ class IFreqaiModel(ABC): return dk - def start_backtesting_from_live_saved_files( + def start_backtesting_from_historic_predictions( self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen ) -> FreqaiDataKitchen: """ diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index fd5d448bd..806e3ca15 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -230,11 +230,6 @@ def get_timerange_backtest_live_models(config: Config) -> str: """ dk = FreqaiDataKitchen(config) models_path = dk.get_full_models_path(config) - timerange: TimeRange = TimeRange() - if not config.get("freqai", {}).get("backtest_using_historic_predictions", True): - timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) - else: - dd = FreqaiDataDrawer(models_path, config) - timerange = dd.get_timerange_from_backtesting_live_dataframe() - + dd = FreqaiDataDrawer(models_path, config) + timerange = dd.get_timerange_from_live_historic_predictions() return timerange.timerange_str diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index 49b27f724..60963e762 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -81,7 +81,7 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda bt_config = setup_optimize_configuration(args, RunMode.BACKTEST) with pytest.raises(OperationalException, - match=r".* Saved models are required to run backtest .*"): + match=r".* Historic predictions data is required to run backtest .*"): Backtesting(bt_config) Backtesting.cleanup() diff --git a/tests/freqai/test_freqai_datadrawer.py b/tests/freqai/test_freqai_datadrawer.py index 3abf84586..da3b8f9c1 100644 --- a/tests/freqai/test_freqai_datadrawer.py +++ b/tests/freqai/test_freqai_datadrawer.py @@ -98,7 +98,7 @@ def test_use_strategy_to_populate_indicators(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): +def test_get_timerange_from_live_historic_predictions(mocker, freqai_conf): strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) strategy.dp = DataProvider(freqai_conf, exchange) @@ -115,7 +115,7 @@ def test_get_timerange_from_backtesting_live_dataframe(mocker, freqai_conf): freqai.dd.save_historic_predictions_to_disk() freqai.dd.save_global_metadata_to_disk({"start_dry_live_date": 1516406400}) - timerange = freqai.dd.get_timerange_from_backtesting_live_dataframe() + timerange = freqai.dd.get_timerange_from_live_historic_predictions() assert timerange.startts == 1516406400 assert timerange.stopts == 1517356500 @@ -129,4 +129,4 @@ def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_co OperationalException, match=r'Historic predictions not found.*' ): - freqai.dd.get_timerange_from_backtesting_live_dataframe() + freqai.dd.get_timerange_from_live_historic_predictions() diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 4dfc75d38..0dc897916 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -9,7 +9,6 @@ from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.exceptions import OperationalException from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.utils import get_timerange_backtest_live_models from tests.conftest import get_patched_exchange, log_has_re from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy, make_data_dictionary, make_unfiltered_dataframe) @@ -166,72 +165,6 @@ def test_make_train_test_datasets(mocker, freqai_conf): assert len(data_dictionary['train_features'].index) == 1916 -def test_get_pairs_timestamp_validation(mocker, freqai_conf): - exchange = get_patched_exchange(mocker, freqai_conf) - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - freqai_conf['freqai'].update({"identifier": "invalid_id"}) - model_path = freqai.dk.get_full_models_path(freqai_conf) - with pytest.raises( - OperationalException, - match=r'.*required to run backtest with the freqai-backtest-live-models.*' - ): - freqai.dk.get_assets_timestamps_training_from_ready_models(model_path) - - -@pytest.mark.parametrize('model', [ - 'LightGBMRegressor' - ]) -def test_get_timerange_from_ready_models(mocker, freqai_conf, model): - freqai_conf.update({"freqaimodel": model}) - freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_strat"}) - freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180101-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - - freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180101-20180130") - - # 1516233600 (2018-01-18 00:00) - Start Training 1 - # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) - # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) - # 1516838400 (2018-01-25 00:00) - End Timerange - - new_timerange = TimeRange("date", "date", 1516233600, 1516406400) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - new_timerange = TimeRange("date", "date", 1516406400, 1516579200) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - model_path = freqai.dk.get_full_models_path(freqai_conf) - (backtesting_timerange, - pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models( - models_path=model_path) - - assert len(pairs_end_dates["ADA"]) == 2 - assert backtesting_timerange.startts == 1516406400 - assert backtesting_timerange.stopts == 1516838400 - - backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) - assert backtesting_string_timerange == '20180120-20180125' - - @pytest.mark.parametrize('model', [ 'LightGBMRegressor' ]) From d09157efb89a947e24451babd5b1ff11f3fa58e0 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 22 Nov 2022 15:15:42 -0300 Subject: [PATCH 14/18] update code to use one prediction file / pair --- freqtrade/freqai/data_kitchen.py | 39 ++++++++++++++++++--------- freqtrade/freqai/freqai_interface.py | 1 + tests/freqai/test_freqai_interface.py | 14 ++++++++-- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index f75fd3dd8..65f3483af 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple import numpy as np import numpy.typing as npt import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, HDFStore from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -74,6 +74,7 @@ class FreqaiDataKitchen: self.training_features_list: List = [] self.model_filename: str = "" self.backtesting_results_path = Path() + self.backtesting_h5_data: HDFStore = {} self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair @@ -1319,7 +1320,7 @@ class FreqaiDataKitchen: if not full_predictions_folder.is_dir(): full_predictions_folder.mkdir(parents=True, exist_ok=True) - append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w') + append_df.to_hdf(self.backtesting_results_path, key=self.model_filename) def get_backtesting_prediction( self @@ -1327,9 +1328,26 @@ class FreqaiDataKitchen: """ Get prediction dataframe from h5 file format """ - append_df = pd.read_hdf(self.backtesting_results_path) + append_df = self.backtesting_h5_data[self.model_filename] return append_df + def load_prediction_pair_file( + self + ) -> None: + """ + Load prediction file if it exists + """ + pair_file_name = self.pair.split(':')[0].replace('/', '_').lower() + path_to_predictionfile = Path(self.full_path / + self.backtest_predictions_folder / + f"{pair_file_name}_prediction.h5") + self.backtesting_results_path = path_to_predictionfile + file_exists = path_to_predictionfile.is_file() + if file_exists: + self.backtesting_h5_data = pd.HDFStore(path_to_predictionfile) + else: + self.backtesting_h5_data = {} + def check_if_backtest_prediction_is_valid( self, len_backtest_df: int @@ -1341,17 +1359,11 @@ class FreqaiDataKitchen: :return: :boolean: whether the prediction file is valid. """ - path_to_predictionfile = Path(self.full_path / - self.backtest_predictions_folder / - f"{self.model_filename}_prediction.h5") - self.backtesting_results_path = path_to_predictionfile - - file_exists = path_to_predictionfile.is_file() - - if file_exists: + if self.model_filename in self.backtesting_h5_data: append_df = self.get_backtesting_prediction() if len(append_df) == len_backtest_df and 'date' in append_df: - logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") + logger.info("Found backtesting prediction file " + f"at {self.backtesting_results_path.name}") return True else: logger.info("A new backtesting prediction file is required. " @@ -1360,7 +1372,8 @@ class FreqaiDataKitchen: return False else: logger.info( - f"Could not find backtesting prediction file at {path_to_predictionfile}" + "Could not find backtesting prediction file " + f"at {self.backtesting_results_path.name}" ) return False diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 80348fda8..21851b3b6 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -260,6 +260,7 @@ class IFreqaiModel(ABC): self.pair_it += 1 train_it = 0 + dk.load_prediction_pair_file() # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month # tr_backtest is the backtesting time range e.g. the week directly diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 66b3bac17..6e2e774fe 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -263,7 +263,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") - metadata = {"pair": "ADA/BTC"} + pair = "ADA/BTC" + metadata = {"pair": pair} + freqai.dk.pair = pair freqai.start_backtesting(df, metadata, freqai.dk) model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] @@ -286,6 +288,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") + pair = "ADA/BTC" + metadata = {"pair": pair} + freqai.dk.pair = pair freqai.start_backtesting(df, metadata, freqai.dk) assert log_has_re( @@ -293,9 +298,14 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): caplog, ) + pair = "ETH/BTC" + metadata = {"pair": pair} + freqai.dk.pair = pair + freqai.start_backtesting(df, metadata, freqai.dk) + path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder) prediction_files = [x for x in path.iterdir() if x.is_file()] - assert len(prediction_files) == 5 + assert len(prediction_files) == 2 shutil.rmtree(Path(freqai.dk.full_path)) From 8ea58ab35243cd238a989faabc429160b180cb52 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Tue, 29 Nov 2022 10:38:35 -0300 Subject: [PATCH 15/18] change BT prediction files to feather format --- freqtrade/freqai/data_kitchen.py | 40 ++++++++++------------------ freqtrade/freqai/freqai_interface.py | 1 - 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index d438aaede..1c4177381 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1317,41 +1317,24 @@ class FreqaiDataKitchen: self, append_df: DataFrame ) -> None: """ - Save prediction dataframe from backtesting to h5 file format + Save prediction dataframe from backtesting to feather file format :param append_df: dataframe for backtesting period """ full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder) if not full_predictions_folder.is_dir(): full_predictions_folder.mkdir(parents=True, exist_ok=True) - append_df.to_hdf(self.backtesting_results_path, key=self.model_filename) + append_df.to_feather(self.backtesting_results_path) def get_backtesting_prediction( self ) -> DataFrame: """ - Get prediction dataframe from h5 file format + Get prediction dataframe from feather file format """ - append_df = self.backtesting_h5_data[self.model_filename] + append_df = pd.read_feather(self.backtesting_results_path) return append_df - def load_prediction_pair_file( - self - ) -> None: - """ - Load prediction file if it exists - """ - pair_file_name = self.pair.split(':')[0].replace('/', '_').lower() - path_to_predictionfile = Path(self.full_path / - self.backtest_predictions_folder / - f"{pair_file_name}_prediction.h5") - self.backtesting_results_path = path_to_predictionfile - file_exists = path_to_predictionfile.is_file() - if file_exists: - self.backtesting_h5_data = pd.HDFStore(path_to_predictionfile) - else: - self.backtesting_h5_data = {} - def check_if_backtest_prediction_is_valid( self, len_backtest_df: int @@ -1363,11 +1346,17 @@ class FreqaiDataKitchen: :return: :boolean: whether the prediction file is valid. """ - if self.model_filename in self.backtesting_h5_data: + path_to_predictionfile = Path(self.full_path / + self.backtest_predictions_folder / + f"{self.model_filename}_prediction.feather") + self.backtesting_results_path = path_to_predictionfile + + file_exists = path_to_predictionfile.is_file() + + if file_exists: append_df = self.get_backtesting_prediction() if len(append_df) == len_backtest_df and 'date' in append_df: - logger.info("Found backtesting prediction file " - f"at {self.backtesting_results_path.name}") + logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") return True else: logger.info("A new backtesting prediction file is required. " @@ -1376,8 +1365,7 @@ class FreqaiDataKitchen: return False else: logger.info( - "Could not find backtesting prediction file " - f"at {self.backtesting_results_path.name}" + f"Could not find backtesting prediction file at {path_to_predictionfile}" ) return False diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index b2f931760..129571d4a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -275,7 +275,6 @@ class IFreqaiModel(ABC): self.pair_it += 1 train_it = 0 - dk.load_prediction_pair_file() # Loop enforcing the sliding window training/backtesting paradigm # tr_train is the training time range e.g. 1 historical month # tr_backtest is the backtesting time range e.g. the week directly From 4571aedb33bac90dcb7f669bfd4c707f1c760173 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 30 Nov 2022 00:53:35 +0100 Subject: [PATCH 16/18] consolidate and clean code --- docs/freqai-running.md | 4 +- freqtrade/freqai/data_kitchen.py | 23 ----------- freqtrade/freqai/freqai_interface.py | 59 +++++++++++++--------------- 3 files changed, 29 insertions(+), 57 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 23873547f..b046e7bb8 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -79,11 +79,11 @@ To change your **features**, you **must** set a new `identifier` in the config t To save the models generated during a particular backtest so that you can start a live deployment from one of them instead of training a new model, you must set `save_backtest_models` to `True` in the config. -### Backtest live models +### Backtest live collected predictions FreqAI allow you to reuse live historic predictions through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study. -The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in historic predictions file. +The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in the historic predictions file. ### Downloading data to cover the full backtest period diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1c4177381..3201fc451 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -75,7 +75,6 @@ class FreqaiDataKitchen: self.training_features_list: List = [] self.model_filename: str = "" self.backtesting_results_path = Path() - self.backtesting_h5_data: HDFStore = {} self.backtest_predictions_folder: str = "backtesting_predictions" self.live = live self.pair = pair @@ -456,28 +455,6 @@ class FreqaiDataKitchen: # print(tr_training_list, tr_backtesting_list) return tr_training_list_timerange, tr_backtesting_list_timerange - # def split_timerange_live_models( - # self - # ) -> Tuple[list, list]: - - # tr_backtesting_list_timerange = [] - # asset = self.pair.split("/")[0] - # if asset not in self.backtest_live_models_data["assets_end_dates"]: - # raise OperationalException( - # f"Model not available for pair {self.pair}. " - # "Please, try again after removing this pair from the configuration file." - # ) - # asset_data = self.backtest_live_models_data["assets_end_dates"][asset] - # backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] - # model_end_dates = [x for x in asset_data] - # model_end_dates.append(backtesting_timerange.stopts) - # model_end_dates.sort() - # for index, item in enumerate(model_end_dates): - # if len(model_end_dates) > (index + 1): - # tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1]) - # tr_backtesting_list_timerange.append(tr_to_add) - - # return tr_backtesting_list_timerange, tr_backtesting_list_timerange def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: """ diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 129571d4a..cf7c4151b 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -55,7 +55,6 @@ class IFreqaiModel(ABC): def __init__(self, config: Config) -> None: self.config = config - self.metadata: Dict[str, Any] = {} self.assert_config(self.config) self.freqai_info: Dict[str, Any] = config["freqai"] self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get( @@ -102,7 +101,7 @@ class IFreqaiModel(ABC): self.get_corr_dataframes: bool = True self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() - self.metadata = self.dd.load_global_metadata_from_disk() + self.metadata: Dict[str, Any] = self.dd.load_global_metadata_from_disk() self.data_provider: Optional[DataProvider] = None self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1) @@ -148,18 +147,13 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - if self.dk.backtest_live_models: - logger.info( - "Backtesting using historic predictions (live models)") - else: - logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") - dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"] - ) if not self.config.get("freqai_backtest_live_models", False): + logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dk = self.start_backtesting(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) else: + logger.info( + "Backtesting using historic predictions (live models)") dk = self.start_backtesting_from_historic_predictions( dataframe, metadata, self.dk) dataframe = dk.return_dataframe @@ -167,7 +161,6 @@ class IFreqaiModel(ABC): self.clean_up() if self.live: self.inference_timer('stop', metadata["pair"]) - self.set_start_dry_live_date(dataframe) return dataframe @@ -336,27 +329,6 @@ class IFreqaiModel(ABC): return dk - def start_backtesting_from_historic_predictions( - self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen - ) -> FreqaiDataKitchen: - """ - :param dataframe: DataFrame = strategy passed dataframe - :param metadata: Dict = pair metadata - :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - :return: - FreqaiDataKitchen = Data management/analysis tool associated to present pair only - """ - pair = metadata["pair"] - dk.return_dataframe = dataframe - saved_dataframe = self.dd.historic_predictions[pair] - columns_to_drop = list(set(saved_dataframe.columns).intersection( - dk.return_dataframe.columns)) - dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) - dk.return_dataframe = pd.merge( - dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") - # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) - return dk - def start_live( self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen ) -> FreqaiDataKitchen: @@ -665,6 +637,8 @@ class IFreqaiModel(ABC): self.dd.historic_predictions[pair] = pred_df hist_preds_df = self.dd.historic_predictions[pair] + self.set_start_dry_live_date(pred_df) + for label in hist_preds_df.columns: if hist_preds_df[label].dtype == object: continue @@ -913,6 +887,27 @@ class IFreqaiModel(ABC): pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()) self.update_metadata(metadata) + def start_backtesting_from_historic_predictions( + self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen + ) -> FreqaiDataKitchen: + """ + :param dataframe: DataFrame = strategy passed dataframe + :param metadata: Dict = pair metadata + :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only + :return: + FreqaiDataKitchen = Data management/analysis tool associated to present pair only + """ + pair = metadata["pair"] + dk.return_dataframe = dataframe + saved_dataframe = self.dd.historic_predictions[pair] + columns_to_drop = list(set(saved_dataframe.columns).intersection( + dk.return_dataframe.columns)) + dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) + dk.return_dataframe = pd.merge( + dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") + # dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0) + return dk + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. From 17cf3c7e837123620988908a085d190d9afa9b54 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Wed, 30 Nov 2022 08:28:45 -0300 Subject: [PATCH 17/18] bug fixes and removed fillna from fit_live_predictions --- freqtrade/freqai/freqai_interface.py | 8 ++++++-- tests/freqai/test_freqai_interface.py | 3 ++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index cf7c4151b..3386d2881 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -147,6 +147,9 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) + dataframe = self.dk.use_strategy_to_populate_indicators( + strategy, prediction_dataframe=dataframe, pair=metadata["pair"] + ) if not self.config.get("freqai_backtest_live_models", False): logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dk = self.start_backtesting(dataframe, metadata, self.dk) @@ -637,7 +640,7 @@ class IFreqaiModel(ABC): self.dd.historic_predictions[pair] = pred_df hist_preds_df = self.dd.historic_predictions[pair] - self.set_start_dry_live_date(pred_df) + self.set_start_dry_live_date(strat_df) for label in hist_preds_df.columns: if hist_preds_df[label].dtype == object: @@ -680,7 +683,7 @@ class IFreqaiModel(ABC): if self.dd.historic_predictions[dk.pair][label].dtype == object: continue f = spy.stats.norm.fit( - self.dd.historic_predictions[dk.pair][label].fillna(0).tail(num_candles)) + self.dd.historic_predictions[dk.pair][label].tail(num_candles)) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] return @@ -844,6 +847,7 @@ class IFreqaiModel(ABC): """ fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0) if fit_live_predictions_candles: + logger.info("Applying fit_live_predictions in backtesting") label_columns = [col for col in dk.full_df.columns if ( col.startswith("&") and not (col.startswith("&") and col.endswith("_mean")) and diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 605485e12..c53137093 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -363,7 +363,8 @@ def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog): corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk) df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") freqai.dk.pair = "ADA/BTC" - freqai.dk.full_df = df + freqai.dk.full_df = df.fillna(0) + freqai.dk.full_df assert "&-s_close_mean" not in freqai.dk.full_df.columns assert "&-s_close_std" not in freqai.dk.full_df.columns freqai.backtesting_fit_live_predictions(freqai.dk) From 79821ebb33bf6eea901e51ae4e24ba8e16837ac4 Mon Sep 17 00:00:00 2001 From: Wagner Costa Date: Wed, 30 Nov 2022 08:41:44 -0300 Subject: [PATCH 18/18] fix flake8 errors --- freqtrade/freqai/data_kitchen.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 3201fc451..c6f22e468 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -10,7 +10,7 @@ import numpy as np import numpy.typing as npt import pandas as pd import psutil -from pandas import DataFrame, HDFStore +from pandas import DataFrame from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -455,7 +455,6 @@ class FreqaiDataKitchen: # print(tr_training_list, tr_backtesting_list) return tr_training_list_timerange, tr_backtesting_list_timerange - def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: """ Given a full dataframe, extract the user desired window