From 3ee7eb63f799e6475afe27df23424a58f0a10a86 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 24 Sep 2022 12:28:52 -0300 Subject: [PATCH 01/22] starting backtest live models --- freqtrade/freqai/data_kitchen.py | 33 ++++++++++++++++++++++++++++ freqtrade/freqai/freqai_interface.py | 1 + 2 files changed, 34 insertions(+) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 005005368..fc3aeca72 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1283,3 +1283,36 @@ class FreqaiDataKitchen: f"Could not find backtesting prediction file at {path_to_predictionfile}" ) return file_exists + + def get_timerange_from_ready_models(self): + return self.gen_get_timerange_from_ready_models(self.full_path) + + def gen_get_timerange_from_ready_models(self, models_path: Path): + all_models_end_dates = [] + pairs_end_dates: Dict[str, Any] = {} + for model_dir in models_path.iterdir(): + if str(model_dir.name).startswith("sub-train"): + model_end_date = model_dir.name.split("_")[1] + pair = model_dir.name.split("_")[0].replace("sub-train-", "") + model_file_name = f"cb\ + _{str(model_dir.name).replace('sub-train-', '').lower()}_model.joblib" + model_path_file = Path(model_dir / model_file_name) + if model_path_file.is_file(): + if pair not in pairs_end_dates: + pairs_end_dates[pair] = [] + + pairs_end_dates[pair].append({ + "model_end_date": int(model_end_date), + "model_path_file": model_path_file, + "model_dir": model_dir + }) + + if model_end_date not in all_models_end_dates: + all_models_end_dates.append(int(model_end_date)) + + start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) + stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) + backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" + backtesting_timerange = TimeRange('date', 'date', min(all_models_end_dates), + max(all_models_end_dates)) + return backtesting_timerange, backtesting_string_timerange, pairs_end_dates diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index e0a45fb38..2315d4017 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -134,6 +134,7 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) + self.dk.get_timerange_from_ready_models() logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] From 7f116db95e187cbadebcc5fca2274bc64a277bb7 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 24 Sep 2022 13:01:53 -0300 Subject: [PATCH 02/22] added generic function to get timerange from existent models --- freqtrade/freqai/data_kitchen.py | 16 ++++- freqtrade/freqai/freqai_interface.py | 91 +++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 58279c7e3..b9fc3f1b2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -99,6 +99,7 @@ class FreqaiDataKitchen: self.train_dates: DataFrame = pd.DataFrame() self.unique_classes: Dict[str, list] = {} self.unique_class_list: list = [] + self.backtest_live_models_data: Dict[str, Any] = {} def set_paths( self, @@ -1262,7 +1263,15 @@ class FreqaiDataKitchen: return file_exists def get_timerange_from_ready_models(self): - return self.gen_get_timerange_from_ready_models(self.full_path) + backtesting_timerange, \ + backtesting_string_timerange, \ + pairs_end_dates = self.gen_get_timerange_from_ready_models(self.full_path) + self.backtest_live_models_data = { + "backtesting_timerange": backtesting_timerange, + "backtesting_string_timerange": backtesting_string_timerange, + "pairs_end_dates": pairs_end_dates + } + return def gen_get_timerange_from_ready_models(self, models_path: Path): all_models_end_dates = [] @@ -1271,8 +1280,9 @@ class FreqaiDataKitchen: if str(model_dir.name).startswith("sub-train"): model_end_date = model_dir.name.split("_")[1] pair = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = f"cb\ - _{str(model_dir.name).replace('sub-train-', '').lower()}_model.joblib" + model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}") + model_file_name = f"{model_file_name}_model.joblib" + model_path_file = Path(model_dir / model_file_name) if model_path_file.is_file(): if pair not in pairs_end_dates: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 2315d4017..dda9b8bcc 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -139,7 +139,12 @@ class IFreqaiModel(ABC): dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - dk = self.start_backtesting(dataframe, metadata, self.dk) + + backtest_live_models = True # temp + if not backtest_live_models: + dk = self.start_backtesting(dataframe, metadata, self.dk) + else: + dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) self.clean_up() @@ -304,6 +309,90 @@ class IFreqaiModel(ABC): return dk + def start_backtesting_live_models( + self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen + ) -> FreqaiDataKitchen: + """ + The main broad execution for backtesting. For backtesting, each pair enters and then gets + trained for each window along the sliding window defined by "train_period_days" + (training window) and "backtest_period_days" (backtest window, i.e. window immediately + following the training window). FreqAI slides the window and sequentially builds + the backtesting results before returning the concatenated results for the full + backtesting period back to the strategy. + :param dataframe: DataFrame = strategy passed dataframe + :param metadata: Dict = pair metadata + :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only + :return: + FreqaiDataKitchen = Data management/analysis tool associated to present pair only + """ + + self.pair_it += 1 + train_it = 0 + # Loop enforcing the sliding window training/backtesting paradigm + # tr_train is the training time range e.g. 1 historical month + # tr_backtest is the backtesting time range e.g. the week directly + # following tr_train. Both of these windows slide through the + # entire backtest + for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges): + pair = metadata["pair"] + (_, _, _) = self.dd.get_pair_dict_info(pair) + train_it += 1 + total_trains = len(dk.backtesting_timeranges) + self.training_timerange = tr_train + dataframe_train = dk.slice_dataframe(tr_train, dataframe) + dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) + + trained_timestamp = tr_train + tr_train_startts_str = datetime.fromtimestamp( + tr_train.startts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + tr_train_stopts_str = datetime.fromtimestamp( + tr_train.stopts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + logger.info( + f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" + f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " + "trains" + ) + + trained_timestamp_int = int(trained_timestamp.stopts) + dk.data_path = Path( + dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}" + ) + + dk.set_new_model_names(pair, trained_timestamp) + + if dk.check_if_backtest_prediction_exists(): + self.dd.load_metadata(dk) + self.check_if_feature_list_matches_strategy(dataframe_train, dk) + append_df = dk.get_backtesting_prediction() + dk.append_predictions(append_df) + else: + if not self.model_exists( + pair, dk, trained_timestamp=trained_timestamp_int + ): + dk.find_features(dataframe_train) + self.model = self.train(dataframe_train, pair, dk) + self.dd.pair_dict[pair]["trained_timestamp"] = int( + trained_timestamp.stopts) + + if self.save_backtest_models: + logger.info('Saving backtest model to disk.') + self.dd.save_data(self.model, pair, dk) + else: + self.model = self.dd.load_data(pair, dk) + + self.check_if_feature_list_matches_strategy(dataframe_train, dk) + + pred_df, do_preds = self.predict(dataframe_backtest, dk) + append_df = dk.get_predictions_to_append(pred_df, do_preds) + dk.append_predictions(append_df) + dk.save_backtesting_prediction(append_df) + + dk.fill_predictions(dataframe) + + return dk + def start_live( self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen ) -> FreqaiDataKitchen: From 0ed7b2bfc37402dbe2884a872674e1d66a1e88c3 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sun, 25 Sep 2022 10:35:55 -0300 Subject: [PATCH 03/22] change start_backtesting to handle backtest_live_models --- freqtrade/freqai/data_kitchen.py | 58 +++++++++-- freqtrade/freqai/freqai_interface.py | 147 +++++++-------------------- 2 files changed, 82 insertions(+), 123 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index b9fc3f1b2..fdbbf3d51 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -62,6 +62,7 @@ class FreqaiDataKitchen: live: bool = False, pair: str = "", ): + self.backtest_live_models = False # temp self.data: Dict[str, Any] = {} self.data_dictionary: Dict[str, DataFrame] = {} self.config = config @@ -88,11 +89,16 @@ class FreqaiDataKitchen: self.config["timerange"], self.freqai_config.get("train_period_days", 0) ) - (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( - self.full_timerange, - config["freqai"]["train_period_days"], - config["freqai"]["backtest_period_days"], - ) + if self.backtest_live_models: + self.get_timerange_from_ready_models() + (self.training_timeranges, + self.backtesting_timeranges) = self.split_timerange_live_models() + else: + (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( + self.full_timerange, + config["freqai"]["train_period_days"], + config["freqai"]["backtest_period_days"], + ) self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {}) self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1) @@ -451,6 +457,26 @@ class FreqaiDataKitchen: # print(tr_training_list, tr_backtesting_list) return tr_training_list_timerange, tr_backtesting_list_timerange + def split_timerange_live_models( + self + ) -> Tuple[list, list]: + + tr_backtesting_list_timerange = [] + pair = self.pair.split("/")[0].split(":")[0] + pair_data = self.backtest_live_models_data["pairs_end_dates"][pair] + model_end_dates = [] + backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] + for data in pair_data: + model_end_dates.append(data["model_end_date"]) + model_end_dates.append(backtesting_timerange.stopts) + model_end_dates.sort() + for index, item in enumerate(model_end_dates): + if len(model_end_dates) > (index + 1): + tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1]) + tr_backtesting_list_timerange.append(tr_to_add) + + return tr_backtesting_list_timerange, tr_backtesting_list_timerange + def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: """ Given a full dataframe, extract the user desired window @@ -1093,15 +1119,15 @@ class FreqaiDataKitchen: return retrain, trained_timerange, data_load_timerange - def set_new_model_names(self, pair: str, trained_timerange: TimeRange): + def set_new_model_names(self, pair: str, timestamp_id: int): coin, _ = pair.split("/") self.data_path = Path( self.full_path - / f"sub-train-{pair.split('/')[0]}_{int(trained_timerange.stopts)}" + / f"sub-train-{pair.split('/')[0]}_{timestamp_id}" ) - self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}" + self.model_filename = f"cb_{coin.lower()}_{timestamp_id}" def set_all_pairs(self) -> None: @@ -1278,7 +1304,7 @@ class FreqaiDataKitchen: pairs_end_dates: Dict[str, Any] = {} for model_dir in models_path.iterdir(): if str(model_dir.name).startswith("sub-train"): - model_end_date = model_dir.name.split("_")[1] + model_end_date = int(model_dir.name.split("_")[1]) pair = model_dir.name.split("_")[0].replace("sub-train-", "") model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}") model_file_name = f"{model_file_name}_model.joblib" @@ -1289,14 +1315,24 @@ class FreqaiDataKitchen: pairs_end_dates[pair] = [] pairs_end_dates[pair].append({ - "model_end_date": int(model_end_date), + "model_end_date": model_end_date, "model_path_file": model_path_file, "model_dir": model_dir }) if model_end_date not in all_models_end_dates: - all_models_end_dates.append(int(model_end_date)) + all_models_end_dates.append(model_end_date) + finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) + if len(all_models_end_dates) > 1: + # After last model end date, use the same period from previous model + # to finish the backtest + all_models_end_dates.sort(reverse=True) + finish_timestamp = all_models_end_dates[0] + \ + (all_models_end_dates[0] - all_models_end_dates[1]) + + all_models_end_dates.append(finish_timestamp) + all_models_end_dates.sort() start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index dda9b8bcc..052de7948 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -134,17 +134,17 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - self.dk.get_timerange_from_ready_models() - logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") + if(self.dk.backtest_live_models): + logger.info( + f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (Live Models)") + else: + logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - - backtest_live_models = True # temp - if not backtest_live_models: - dk = self.start_backtesting(dataframe, metadata, self.dk) - else: - dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) + dk = self.start_backtesting(dataframe, metadata, self.dk) + # else: + # dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) self.clean_up() @@ -265,28 +265,39 @@ class IFreqaiModel(ABC): tr_train_stopts_str = datetime.fromtimestamp( tr_train.stopts, tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - logger.info( - f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" - f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " - "trains" - ) - - trained_timestamp_int = int(trained_timestamp.stopts) - dk.data_path = Path( - dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}" + if not dk.backtest_live_models: + logger.info( + f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" + f" from {tr_train_startts_str}" + f" to {tr_train_stopts_str}, {train_it}/{total_trains} " + "trains" ) - dk.set_new_model_names(pair, trained_timestamp) + timestamp_model_id = int(trained_timestamp.stopts) + if dk.backtest_live_models: + timestamp_model_id = int(tr_backtest.startts) + + dk.data_path = Path( + dk.full_path / f"sub-train-{pair.split('/')[0]}_{timestamp_model_id}" + ) + + dk.set_new_model_names(pair, timestamp_model_id) if dk.check_if_backtest_prediction_exists(): self.dd.load_metadata(dk) - self.check_if_feature_list_matches_strategy(dataframe_train, dk) + if not dk.backtest_live_models: + self.check_if_feature_list_matches_strategy(dataframe_train, dk) + append_df = dk.get_backtesting_prediction() dk.append_predictions(append_df) else: - if not self.model_exists( - pair, dk, trained_timestamp=trained_timestamp_int - ): + if not self.model_exists(dk): + if dk.backtest_live_models: + raise OperationalException( + "Training models is not allowed " + "in backtest_live_models backtesting " + "mode" + ) dk.find_features(dataframe_train) self.model = self.train(dataframe_train, pair, dk) self.dd.pair_dict[pair]["trained_timestamp"] = int( @@ -306,91 +317,6 @@ class IFreqaiModel(ABC): dk.save_backtesting_prediction(append_df) dk.fill_predictions(dataframe) - - return dk - - def start_backtesting_live_models( - self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen - ) -> FreqaiDataKitchen: - """ - The main broad execution for backtesting. For backtesting, each pair enters and then gets - trained for each window along the sliding window defined by "train_period_days" - (training window) and "backtest_period_days" (backtest window, i.e. window immediately - following the training window). FreqAI slides the window and sequentially builds - the backtesting results before returning the concatenated results for the full - backtesting period back to the strategy. - :param dataframe: DataFrame = strategy passed dataframe - :param metadata: Dict = pair metadata - :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - :return: - FreqaiDataKitchen = Data management/analysis tool associated to present pair only - """ - - self.pair_it += 1 - train_it = 0 - # Loop enforcing the sliding window training/backtesting paradigm - # tr_train is the training time range e.g. 1 historical month - # tr_backtest is the backtesting time range e.g. the week directly - # following tr_train. Both of these windows slide through the - # entire backtest - for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges): - pair = metadata["pair"] - (_, _, _) = self.dd.get_pair_dict_info(pair) - train_it += 1 - total_trains = len(dk.backtesting_timeranges) - self.training_timerange = tr_train - dataframe_train = dk.slice_dataframe(tr_train, dataframe) - dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) - - trained_timestamp = tr_train - tr_train_startts_str = datetime.fromtimestamp( - tr_train.startts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - tr_train_stopts_str = datetime.fromtimestamp( - tr_train.stopts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - logger.info( - f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" - f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " - "trains" - ) - - trained_timestamp_int = int(trained_timestamp.stopts) - dk.data_path = Path( - dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}" - ) - - dk.set_new_model_names(pair, trained_timestamp) - - if dk.check_if_backtest_prediction_exists(): - self.dd.load_metadata(dk) - self.check_if_feature_list_matches_strategy(dataframe_train, dk) - append_df = dk.get_backtesting_prediction() - dk.append_predictions(append_df) - else: - if not self.model_exists( - pair, dk, trained_timestamp=trained_timestamp_int - ): - dk.find_features(dataframe_train) - self.model = self.train(dataframe_train, pair, dk) - self.dd.pair_dict[pair]["trained_timestamp"] = int( - trained_timestamp.stopts) - - if self.save_backtest_models: - logger.info('Saving backtest model to disk.') - self.dd.save_data(self.model, pair, dk) - else: - self.model = self.dd.load_data(pair, dk) - - self.check_if_feature_list_matches_strategy(dataframe_train, dk) - - pred_df, do_preds = self.predict(dataframe_backtest, dk) - append_df = dk.get_predictions_to_append(pred_df, do_preds) - dk.append_predictions(append_df) - dk.save_backtesting_prediction(append_df) - - dk.fill_predictions(dataframe) - return dk def start_live( @@ -595,10 +521,7 @@ class IFreqaiModel(ABC): def model_exists( self, - pair: str, dk: FreqaiDataKitchen, - trained_timestamp: int = None, - model_filename: str = "", scanning: bool = False, ) -> bool: """ @@ -608,7 +531,7 @@ class IFreqaiModel(ABC): :return: :boolean: whether the model file exists or not. """ - path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib") + path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib") file_exists = path_to_modelfile.is_file() if file_exists and not scanning: logger.info("Found model at %s", dk.data_path / dk.model_filename) @@ -663,7 +586,7 @@ class IFreqaiModel(ABC): model = self.train(unfiltered_dataframe, pair, dk) self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts - dk.set_new_model_names(pair, new_trained_timerange) + dk.set_new_model_names(pair, int(new_trained_timerange.stopts)) self.dd.save_data(model, pair, dk) if self.freqai_info["feature_parameters"].get("plot_feature_importance", False): From 5880f7a6381cc45f36cd3297ed90b62bd29fce5b Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sun, 25 Sep 2022 23:14:00 -0300 Subject: [PATCH 04/22] backtest_live_models - params validation and get timerange from live models in BT --- freqtrade/commands/arguments.py | 3 +- freqtrade/commands/cli_options.py | 6 + freqtrade/configuration/config_validation.py | 16 + freqtrade/configuration/configuration.py | 3 + freqtrade/freqai/data_kitchen.py | 105 +-- freqtrade/freqai/freqai_interface copy.py | 783 ------------------- freqtrade/freqai/freqai_util.py | 75 ++ freqtrade/optimize/backtesting.py | 7 + 8 files changed, 165 insertions(+), 833 deletions(-) delete mode 100644 freqtrade/freqai/freqai_interface copy.py create mode 100644 freqtrade/freqai/freqai_util.py diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index 97d8cc130..6944b4a6c 100644 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -25,7 +25,8 @@ ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv", ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions", "enable_protections", "dry_run_wallet", "timeframe_detail", "strategy_list", "export", "exportfilename", - "backtest_breakdown", "backtest_cache"] + "backtest_breakdown", "backtest_cache", + "freqai_backtest_live_models"] ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + ["hyperopt", "hyperopt_path", "position_stacking", "use_max_market_positions", diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index e50fb86d8..a1558d399 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -668,4 +668,10 @@ AVAILABLE_CLI_OPTIONS = { help='Specify additional lookup path for freqaimodels.', metavar='PATH', ), + "freqai_backtest_live_models": Arg( + '--freqai-backtest-live-models', + help='Run backtest with ready models.', + action='store_true', + default=False, + ), } diff --git a/freqtrade/configuration/config_validation.py b/freqtrade/configuration/config_validation.py index 7055d9551..6e27fc748 100644 --- a/freqtrade/configuration/config_validation.py +++ b/freqtrade/configuration/config_validation.py @@ -86,6 +86,7 @@ def validate_config_consistency(conf: Dict[str, Any], preliminary: bool = False) _validate_unlimited_amount(conf) _validate_ask_orderbook(conf) _validate_freqai_hyperopt(conf) + _validate_freqai_backtest(conf) _validate_consumers(conf) validate_migrated_strategy_settings(conf) @@ -334,6 +335,21 @@ def _validate_freqai_hyperopt(conf: Dict[str, Any]) -> None: 'Using analyze-per-epoch parameter is not supported with a FreqAI strategy.') +def _validate_freqai_backtest(conf: Dict[str, Any]) -> None: + freqai_enabled = conf.get('freqai', {}).get('enabled', False) + timerange = conf.get('timerange') + freqai_backtest_live_models = conf.get('freqai_backtest_live_models', False) + if freqai_backtest_live_models and freqai_enabled and timerange: + raise OperationalException( + 'Using timerange parameter is not supported with ' + '--freqai-backtest-live-models parameter.') + + if freqai_backtest_live_models and not freqai_enabled: + raise OperationalException( + 'Using --freqai-backtest-live-models parameter is only ' + 'supported with a FreqAI strategy.') + + def _validate_consumers(conf: Dict[str, Any]) -> None: emc_conf = conf.get('external_message_consumer', {}) if emc_conf.get('enabled', False): diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py index 76105cc4d..22b6fc05b 100644 --- a/freqtrade/configuration/configuration.py +++ b/freqtrade/configuration/configuration.py @@ -277,6 +277,9 @@ class Configuration: self._args_to_config(config, argname='disableparamexport', logstring='Parameter --disableparamexport detected: {} ...') + self._args_to_config(config, argname='freqai_backtest_live_models', + logstring='Parameter --freqai-backtest-live-models detected ...') + # Edge section: if 'stoploss_range' in self.args and self.args["stoploss_range"]: txt_range = eval(self.args["stoploss_range"]) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 05cd4de4b..861a3c366 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -21,6 +21,7 @@ from freqtrade.configuration import TimeRange from freqtrade.constants import Config from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds +from freqtrade.freqai import freqai_util from freqtrade.strategy.interface import IStrategy @@ -62,7 +63,6 @@ class FreqaiDataKitchen: live: bool = False, pair: str = "", ): - self.backtest_live_models = False # temp self.data: Dict[str, Any] = {} self.data_dictionary: Dict[str, DataFrame] = {} self.config = config @@ -81,16 +81,21 @@ class FreqaiDataKitchen: self.svm_model: linear_model.SGDOneClassSVM = None self.keras: bool = self.freqai_config.get("keras", False) self.set_all_pairs() + self.backtest_live_models = config.get("freqai_backtest_live_models", False) + if not self.live: - if not self.config["timerange"]: + if (not self.config.get("timerange") and + not self.backtest_live_models): raise OperationalException( 'Please pass --timerange if you intend to use FreqAI for backtesting.') + + self.full_path = freqai_util.get_full_model_path(self.config) self.full_timerange = self.create_fulltimerange( self.config["timerange"], self.freqai_config.get("train_period_days", 0) ) if self.backtest_live_models: - self.get_timerange_from_ready_models() + self.set_timerange_from_ready_models() (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange_live_models() else: @@ -118,10 +123,7 @@ class FreqaiDataKitchen: metadata: dict = strategy furnished pair metadata trained_timestamp: int = timestamp of most recent training """ - self.full_path = Path( - self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier")) - ) - + self.full_path = freqai_util.get_full_model_path(self.config) self.data_path = Path( self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}" @@ -1035,11 +1037,6 @@ class FreqaiDataKitchen: start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc) stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc) full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") - - self.full_path = Path( - self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}" - ) - config_path = Path(self.config["config_files"][0]) if not self.full_path.is_dir(): @@ -1292,10 +1289,10 @@ class FreqaiDataKitchen: ) return file_exists - def get_timerange_from_ready_models(self): + def set_timerange_from_ready_models(self): backtesting_timerange, \ backtesting_string_timerange, \ - pairs_end_dates = self.gen_get_timerange_from_ready_models(self.full_path) + pairs_end_dates = freqai_util.get_timerange_from_ready_models(self.full_path) self.backtest_live_models_data = { "backtesting_timerange": backtesting_timerange, "backtesting_string_timerange": backtesting_string_timerange, @@ -1303,43 +1300,53 @@ class FreqaiDataKitchen: } return - def gen_get_timerange_from_ready_models(self, models_path: Path): - all_models_end_dates = [] - pairs_end_dates: Dict[str, Any] = {} - for model_dir in models_path.iterdir(): - if str(model_dir.name).startswith("sub-train"): - model_end_date = int(model_dir.name.split("_")[1]) - pair = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}") - model_file_name = f"{model_file_name}_model.joblib" + # def get_timerange_from_ready_models(self, models_path: Path): + # all_models_end_dates = [] + # pairs_end_dates: Dict[str, Any] = {} + # for model_dir in models_path.iterdir(): + # if str(model_dir.name).startswith("sub-train"): + # model_end_date = int(model_dir.name.split("_")[1]) + # pair = model_dir.name.split("_")[0].replace("sub-train-", "") + # model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" + # "_model.joblib") - model_path_file = Path(model_dir / model_file_name) - if model_path_file.is_file(): - if pair not in pairs_end_dates: - pairs_end_dates[pair] = [] + # model_path_file = Path(model_dir / model_file_name) + # if model_path_file.is_file(): + # if pair not in pairs_end_dates: + # pairs_end_dates[pair] = [] - pairs_end_dates[pair].append({ - "model_end_date": model_end_date, - "model_path_file": model_path_file, - "model_dir": model_dir - }) + # pairs_end_dates[pair].append({ + # "model_end_date": model_end_date, + # "model_path_file": model_path_file, + # "model_dir": model_dir + # }) - if model_end_date not in all_models_end_dates: - all_models_end_dates.append(model_end_date) + # if model_end_date not in all_models_end_dates: + # all_models_end_dates.append(model_end_date) - finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - if len(all_models_end_dates) > 1: - # After last model end date, use the same period from previous model - # to finish the backtest - all_models_end_dates.sort(reverse=True) - finish_timestamp = all_models_end_dates[0] + \ - (all_models_end_dates[0] - all_models_end_dates[1]) + # if len(all_models_end_dates) == 0: + # raise OperationalException( + # 'At least 1 saved model is required to ' + # 'run backtesting with the backtest_live_models option' + # ) - all_models_end_dates.append(finish_timestamp) - all_models_end_dates.sort() - start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) - stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) - backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" - backtesting_timerange = TimeRange('date', 'date', min(all_models_end_dates), - max(all_models_end_dates)) - return backtesting_timerange, backtesting_string_timerange, pairs_end_dates + # if len(all_models_end_dates) == 1: + # logger.warning(f"Only 1 model was found. Backtesting will run with the " + # "timerange from the end of the training date to the current date") + + # finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) + # if len(all_models_end_dates) > 1: + # # After last model end date, use the same period from previous model + # # to finish the backtest + # all_models_end_dates.sort(reverse=True) + # finish_timestamp = all_models_end_dates[0] + \ + # (all_models_end_dates[0] - all_models_end_dates[1]) + + # all_models_end_dates.append(finish_timestamp) + # all_models_end_dates.sort() + # start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) + # stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) + # backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" + # backtesting_timerange = TimeRange('date', 'date', min(all_models_end_dates), + # max(all_models_end_dates)) + # return backtesting_timerange, backtesting_string_timerange, pairs_end_dates diff --git a/freqtrade/freqai/freqai_interface copy.py b/freqtrade/freqai/freqai_interface copy.py deleted file mode 100644 index 052de7948..000000000 --- a/freqtrade/freqai/freqai_interface copy.py +++ /dev/null @@ -1,783 +0,0 @@ -import logging -import shutil -import threading -import time -from abc import ABC, abstractmethod -from collections import deque -from datetime import datetime, timezone -from pathlib import Path -from threading import Lock -from typing import Any, Dict, List, Tuple - -import numpy as np -import pandas as pd -from numpy.typing import NDArray -from pandas import DataFrame - -from freqtrade.configuration import TimeRange -from freqtrade.constants import DATETIME_PRINT_FORMAT, Config -from freqtrade.enums import RunMode -from freqtrade.exceptions import OperationalException -from freqtrade.exchange import timeframe_to_seconds -from freqtrade.freqai.data_drawer import FreqaiDataDrawer -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.utils import plot_feature_importance -from freqtrade.strategy.interface import IStrategy - - -pd.options.mode.chained_assignment = None -logger = logging.getLogger(__name__) - - -class IFreqaiModel(ABC): - """ - Class containing all tools for training and prediction in the strategy. - Base*PredictionModels inherit from this class. - - Record of contribution: - FreqAI was developed by a group of individuals who all contributed specific skillsets to the - project. - - Conception and software development: - Robert Caulk @robcaulk - - Theoretical brainstorming: - Elin Törnquist @th0rntwig - - Code review, software architecture brainstorming: - @xmatthias - - Beta testing and bug reporting: - @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm - Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert - """ - - def __init__(self, config: Config) -> None: - - self.config = config - self.assert_config(self.config) - self.freqai_info: Dict[str, Any] = config["freqai"] - self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get( - "data_split_parameters", {}) - self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get( - "model_training_parameters", {}) - self.retrain = False - self.first = True - self.set_full_path() - self.follow_mode: bool = self.freqai_info.get("follow_mode", False) - self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) - if self.save_backtest_models: - logger.info('Backtesting module configured to save all models.') - self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) - self.identifier: str = self.freqai_info.get("identifier", "no_id_provided") - self.scanning = False - self.ft_params = self.freqai_info["feature_parameters"] - self.keras: bool = self.freqai_info.get("keras", False) - if self.keras and self.ft_params.get("DI_threshold", 0): - self.ft_params["DI_threshold"] = 0 - logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") - self.CONV_WIDTH = self.freqai_info.get("conv_width", 2) - if self.ft_params.get("inlier_metric_window", 0): - self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2 - self.pair_it = 0 - self.pair_it_train = 0 - self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist")) - self.train_queue = self._set_train_queue() - self.last_trade_database_summary: DataFrame = {} - self.current_trade_database_summary: DataFrame = {} - self.analysis_lock = Lock() - self.inference_time: float = 0 - self.train_time: float = 0 - self.begin_time: float = 0 - self.begin_time_train: float = 0 - self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) - self.continual_learning = self.freqai_info.get('continual_learning', False) - - self._threads: List[threading.Thread] = [] - self._stop_event = threading.Event() - - def __getstate__(self): - """ - Return an empty state to be pickled in hyperopt - """ - return ({}) - - def assert_config(self, config: Config) -> None: - - if not config.get("freqai", {}): - raise OperationalException("No freqai parameters found in configuration file.") - - def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: - """ - Entry point to the FreqaiModel from a specific pair, it will train a new model if - necessary before making the prediction. - - :param dataframe: Full dataframe coming from strategy - it contains entire - backtesting timerange + additional historical data necessary to train - the model. - :param metadata: pair metadata coming from strategy. - :param strategy: Strategy to train on - """ - - self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE) - self.dd.set_pair_dict_info(metadata) - - if self.live: - self.inference_timer('start') - self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - dk = self.start_live(dataframe, metadata, strategy, self.dk) - - # For backtesting, each pair enters and then gets trained for each window along the - # sliding window defined by "train_period_days" (training window) and "live_retrain_hours" - # (backtest window, i.e. window immediately following the training window). - # FreqAI slides the window and sequentially builds the backtesting results before returning - # the concatenated results for the full backtesting period back to the strategy. - elif not self.follow_mode: - self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - if(self.dk.backtest_live_models): - logger.info( - f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (Live Models)") - else: - logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") - dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"] - ) - dk = self.start_backtesting(dataframe, metadata, self.dk) - # else: - # dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) - - dataframe = dk.remove_features_from_df(dk.return_dataframe) - self.clean_up() - if self.live: - self.inference_timer('stop') - return dataframe - - def clean_up(self): - """ - Objects that should be handled by GC already between coins, but - are explicitly shown here to help demonstrate the non-persistence of these - objects. - """ - self.model = None - self.dk = None - - def shutdown(self): - """ - Cleans up threads on Shutdown, set stop event. Join threads to wait - for current training iteration. - """ - logger.info("Stopping FreqAI") - self._stop_event.set() - - logger.info("Waiting on Training iteration") - for _thread in self._threads: - _thread.join() - - def start_scanning(self, *args, **kwargs) -> None: - """ - Start `self._start_scanning` in a separate thread - """ - _thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs) - self._threads.append(_thread) - _thread.start() - - def _start_scanning(self, strategy: IStrategy) -> None: - """ - Function designed to constantly scan pairs for retraining on a separate thread (intracandle) - to improve model youth. This function is agnostic to data preparation/collection/storage, - it simply trains on what ever data is available in the self.dd. - :param strategy: IStrategy = The user defined strategy class - """ - while not self._stop_event.is_set(): - time.sleep(1) - pair = self.train_queue[0] - - # ensure pair is avaialble in dp - if pair not in strategy.dp.current_whitelist(): - self.train_queue.popleft() - logger.warning(f'{pair} not in current whitelist, removing from train queue.') - continue - - (_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair) - - dk = FreqaiDataKitchen(self.config, self.live, pair) - dk.set_paths(pair, trained_timestamp) - ( - retrain, - new_trained_timerange, - data_load_timerange, - ) = dk.check_if_new_training_required(trained_timestamp) - dk.set_paths(pair, new_trained_timerange.stopts) - - if retrain: - self.train_timer('start') - try: - self.extract_data_and_train_model( - new_trained_timerange, pair, strategy, dk, data_load_timerange - ) - except Exception as msg: - logger.warning(f'Training {pair} raised exception {msg}, skipping.') - - self.train_timer('stop') - - # only rotate the queue after the first has been trained. - self.train_queue.rotate(-1) - - self.dd.save_historic_predictions_to_disk() - - def start_backtesting( - self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen - ) -> FreqaiDataKitchen: - """ - The main broad execution for backtesting. For backtesting, each pair enters and then gets - trained for each window along the sliding window defined by "train_period_days" - (training window) and "backtest_period_days" (backtest window, i.e. window immediately - following the training window). FreqAI slides the window and sequentially builds - the backtesting results before returning the concatenated results for the full - backtesting period back to the strategy. - :param dataframe: DataFrame = strategy passed dataframe - :param metadata: Dict = pair metadata - :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - :return: - FreqaiDataKitchen = Data management/analysis tool associated to present pair only - """ - - self.pair_it += 1 - train_it = 0 - # Loop enforcing the sliding window training/backtesting paradigm - # tr_train is the training time range e.g. 1 historical month - # tr_backtest is the backtesting time range e.g. the week directly - # following tr_train. Both of these windows slide through the - # entire backtest - for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges): - pair = metadata["pair"] - (_, _, _) = self.dd.get_pair_dict_info(pair) - train_it += 1 - total_trains = len(dk.backtesting_timeranges) - self.training_timerange = tr_train - dataframe_train = dk.slice_dataframe(tr_train, dataframe) - dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) - - trained_timestamp = tr_train - tr_train_startts_str = datetime.fromtimestamp( - tr_train.startts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - tr_train_stopts_str = datetime.fromtimestamp( - tr_train.stopts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - if not dk.backtest_live_models: - logger.info( - f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" - f" from {tr_train_startts_str}" - f" to {tr_train_stopts_str}, {train_it}/{total_trains} " - "trains" - ) - - timestamp_model_id = int(trained_timestamp.stopts) - if dk.backtest_live_models: - timestamp_model_id = int(tr_backtest.startts) - - dk.data_path = Path( - dk.full_path / f"sub-train-{pair.split('/')[0]}_{timestamp_model_id}" - ) - - dk.set_new_model_names(pair, timestamp_model_id) - - if dk.check_if_backtest_prediction_exists(): - self.dd.load_metadata(dk) - if not dk.backtest_live_models: - self.check_if_feature_list_matches_strategy(dataframe_train, dk) - - append_df = dk.get_backtesting_prediction() - dk.append_predictions(append_df) - else: - if not self.model_exists(dk): - if dk.backtest_live_models: - raise OperationalException( - "Training models is not allowed " - "in backtest_live_models backtesting " - "mode" - ) - dk.find_features(dataframe_train) - self.model = self.train(dataframe_train, pair, dk) - self.dd.pair_dict[pair]["trained_timestamp"] = int( - trained_timestamp.stopts) - - if self.save_backtest_models: - logger.info('Saving backtest model to disk.') - self.dd.save_data(self.model, pair, dk) - else: - self.model = self.dd.load_data(pair, dk) - - self.check_if_feature_list_matches_strategy(dataframe_train, dk) - - pred_df, do_preds = self.predict(dataframe_backtest, dk) - append_df = dk.get_predictions_to_append(pred_df, do_preds) - dk.append_predictions(append_df) - dk.save_backtesting_prediction(append_df) - - dk.fill_predictions(dataframe) - return dk - - def start_live( - self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen - ) -> FreqaiDataKitchen: - """ - The main broad execution for dry/live. This function will check if a retraining should be - performed, and if so, retrain and reset the model. - :param dataframe: DataFrame = strategy passed dataframe - :param metadata: Dict = pair metadata - :param strategy: IStrategy = currently employed strategy - dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - :returns: - dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - """ - - # update follower - if self.follow_mode: - self.dd.update_follower_metadata() - - # get the model metadata associated with the current pair - (_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"]) - - # if the metadata doesn't exist, the follower returns null arrays to strategy - if self.follow_mode and return_null_array: - logger.info("Returning null array from follower to strategy") - self.dd.return_null_values_to_strategy(dataframe, dk) - return dk - - # append the historic data once per round - if self.dd.historic_data: - self.dd.update_historic_data(strategy, dk) - logger.debug(f'Updating historic data on pair {metadata["pair"]}') - - if not self.follow_mode: - - (_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required( - trained_timestamp - ) - dk.set_paths(metadata["pair"], new_trained_timerange.stopts) - - # load candle history into memory if it is not yet. - if not self.dd.historic_data: - self.dd.load_all_pair_histories(data_load_timerange, dk) - - if not self.scanning: - self.scanning = True - self.start_scanning(strategy) - - elif self.follow_mode: - dk.set_paths(metadata["pair"], trained_timestamp) - logger.info( - "FreqAI instance set to follow_mode, finding existing pair " - f"using { self.identifier }" - ) - - # load the model and associated data into the data kitchen - self.model = self.dd.load_data(metadata["pair"], dk) - - with self.analysis_lock: - dataframe = self.dk.use_strategy_to_populate_indicators( - strategy, prediction_dataframe=dataframe, pair=metadata["pair"] - ) - - if not self.model: - logger.warning( - f"No model ready for {metadata['pair']}, returning null values to strategy." - ) - self.dd.return_null_values_to_strategy(dataframe, dk) - return dk - - # ensure user is feeding the correct indicators to the model - self.check_if_feature_list_matches_strategy(dataframe, dk) - - self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp) - - return dk - - def build_strategy_return_arrays( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int - ) -> None: - - # hold the historical predictions in memory so we are sending back - # correct array to strategy - - if pair not in self.dd.model_return_values: - # first predictions are made on entire historical candle set coming from strategy. This - # allows FreqUI to show full return values. - pred_df, do_preds = self.predict(dataframe, dk) - if pair not in self.dd.historic_predictions: - self.set_initial_historic_predictions(pred_df, dk, pair) - self.dd.set_initial_return_values(pair, pred_df) - - dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) - return - elif self.dk.check_if_model_expired(trained_timestamp): - pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list) - do_preds = np.ones(2, dtype=np.int_) * 2 - dk.DI_values = np.zeros(2) - logger.warning( - f"Model expired for {pair}, returning null values to strategy. Strategy " - "construction should take care to consider this event with " - "prediction == 0 and do_predict == 2" - ) - else: - # remaining predictions are made only on the most recent candles for performance and - # historical accuracy reasons. - pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False) - - if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live: - self.fit_live_predictions(dk, pair) - self.dd.append_model_predictions(pair, pred_df, do_preds, dk, len(dataframe)) - dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) - - return - - def check_if_feature_list_matches_strategy( - self, dataframe: DataFrame, dk: FreqaiDataKitchen - ) -> None: - """ - Ensure user is passing the proper feature set if they are reusing an `identifier` pointing - to a folder holding existing models. - :param dataframe: DataFrame = strategy provided dataframe - :param dk: FreqaiDataKitchen = non-persistent data container/analyzer for - current coin/bot loop - """ - dk.find_features(dataframe) - if "training_features_list_raw" in dk.data: - feature_list = dk.data["training_features_list_raw"] - else: - feature_list = dk.data['training_features_list'] - if dk.training_features_list != feature_list: - raise OperationalException( - "Trying to access pretrained model with `identifier` " - "but found different features furnished by current strategy." - "Change `identifier` to train from scratch, or ensure the" - "strategy is furnishing the same features as the pretrained" - "model. In case of --strategy-list, please be aware that FreqAI " - "requires all strategies to maintain identical " - "populate_any_indicator() functions" - ) - - def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None: - """ - Base data cleaning method for train. - Functions here improve/modify the input data by identifying outliers, - computing additional metrics, adding noise, reducing dimensionality etc. - """ - - ft_params = self.freqai_info["feature_parameters"] - - if ft_params.get('inlier_metric_window', 0): - dk.compute_inlier_metric(set_='train') - if self.freqai_info["data_split_parameters"]["test_size"] > 0: - dk.compute_inlier_metric(set_='test') - - if ft_params.get( - "principal_component_analysis", False - ): - dk.principal_component_analysis() - - if ft_params.get("use_SVM_to_remove_outliers", False): - dk.use_SVM_to_remove_outliers(predict=False) - - if ft_params.get("DI_threshold", 0): - dk.data["avg_mean_dist"] = dk.compute_distances() - - if ft_params.get("use_DBSCAN_to_remove_outliers", False): - if dk.pair in self.dd.old_DBSCAN_eps: - eps = self.dd.old_DBSCAN_eps[dk.pair] - else: - eps = None - dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps) - self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps'] - - if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0): - dk.add_noise_to_training_features() - - def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: - """ - Base data cleaning method for predict. - Functions here are complementary to the functions of data_cleaning_train. - """ - ft_params = self.freqai_info["feature_parameters"] - - if ft_params.get('inlier_metric_window', 0): - dk.compute_inlier_metric(set_='predict') - - if ft_params.get( - "principal_component_analysis", False - ): - dk.pca_transform(self.dk.data_dictionary['prediction_features']) - - if ft_params.get("use_SVM_to_remove_outliers", False): - dk.use_SVM_to_remove_outliers(predict=True) - - if ft_params.get("DI_threshold", 0): - dk.check_if_pred_in_training_spaces() - - if ft_params.get("use_DBSCAN_to_remove_outliers", False): - dk.use_DBSCAN_to_remove_outliers(predict=True) - - def model_exists( - self, - dk: FreqaiDataKitchen, - scanning: bool = False, - ) -> bool: - """ - Given a pair and path, check if a model already exists - :param pair: pair e.g. BTC/USD - :param path: path to model - :return: - :boolean: whether the model file exists or not. - """ - path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib") - file_exists = path_to_modelfile.is_file() - if file_exists and not scanning: - logger.info("Found model at %s", dk.data_path / dk.model_filename) - elif not scanning: - logger.info("Could not find model at %s", dk.data_path / dk.model_filename) - return file_exists - - def set_full_path(self) -> None: - self.full_path = Path( - self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}" - ) - self.full_path.mkdir(parents=True, exist_ok=True) - shutil.copy( - self.config["config_files"][0], - Path(self.full_path, Path(self.config["config_files"][0]).name), - ) - - def extract_data_and_train_model( - self, - new_trained_timerange: TimeRange, - pair: str, - strategy: IStrategy, - dk: FreqaiDataKitchen, - data_load_timerange: TimeRange, - ): - """ - Retrieve data and train model. - :param new_trained_timerange: TimeRange = the timerange to train the model on - :param metadata: dict = strategy provided metadata - :param strategy: IStrategy = user defined strategy object - :param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop - :param data_load_timerange: TimeRange = the amount of data to be loaded - for populate_any_indicators - (larger than new_trained_timerange so that - new_trained_timerange does not contain any NaNs) - """ - - corr_dataframes, base_dataframes = self.dd.get_base_and_corr_dataframes( - data_load_timerange, pair, dk - ) - - with self.analysis_lock: - unfiltered_dataframe = dk.use_strategy_to_populate_indicators( - strategy, corr_dataframes, base_dataframes, pair - ) - - unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe) - - # find the features indicated by strategy and store in datakitchen - dk.find_features(unfiltered_dataframe) - - model = self.train(unfiltered_dataframe, pair, dk) - - self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts - dk.set_new_model_names(pair, int(new_trained_timerange.stopts)) - self.dd.save_data(model, pair, dk) - - if self.freqai_info["feature_parameters"].get("plot_feature_importance", False): - plot_feature_importance(model, pair, dk) - - if self.freqai_info.get("purge_old_models", False): - self.dd.purge_old_models() - - def set_initial_historic_predictions( - self, pred_df: DataFrame, dk: FreqaiDataKitchen, pair: str - ) -> None: - """ - This function is called only if the datadrawer failed to load an - existing set of historic predictions. In this case, it builds - the structure and sets fake predictions off the first training - data. After that, FreqAI will append new real predictions to the - set of historic predictions. - - These values are used to generate live statistics which can be used - in the strategy for adaptive values. E.g. &*_mean/std are quantities - that can computed based on live predictions from the set of historical - predictions. Those values can be used in the user strategy to better - assess prediction rarity, and thus wait for probabilistically favorable - entries relative to the live historical predictions. - - If the user reuses an identifier on a subsequent instance, - this function will not be called. In that case, "real" predictions - will be appended to the loaded set of historic predictions. - :param: df: DataFrame = the dataframe containing the training feature data - :param: model: Any = A model which was `fit` using a common library such as - catboost or lightgbm - :param: dk: FreqaiDataKitchen = object containing methods for data analysis - :param: pair: str = current pair - """ - - self.dd.historic_predictions[pair] = pred_df - hist_preds_df = self.dd.historic_predictions[pair] - - for label in hist_preds_df.columns: - if hist_preds_df[label].dtype == object: - continue - hist_preds_df[f'{label}_mean'] = 0 - hist_preds_df[f'{label}_std'] = 0 - - hist_preds_df['do_predict'] = 0 - - if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: - hist_preds_df['DI_values'] = 0 - - for return_str in dk.data['extra_returns_per_train']: - hist_preds_df[return_str] = 0 - - # # for keras type models, the conv_window needs to be prepended so - # # viewing is correct in frequi - if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0): - n_lost_points = self.freqai_info.get('conv_width', 2) - zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))), - columns=hist_preds_df.columns) - self.dd.historic_predictions[pair] = pd.concat( - [zeros_df, hist_preds_df], axis=0, ignore_index=True) - - def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None: - """ - Fit the labels with a gaussian distribution - """ - import scipy as spy - - # add classes from classifier label types if used - full_labels = dk.label_list + dk.unique_class_list - - num_candles = self.freqai_info.get("fit_live_predictions_candles", 100) - dk.data["labels_mean"], dk.data["labels_std"] = {}, {} - for label in full_labels: - if self.dd.historic_predictions[dk.pair][label].dtype == object: - continue - f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) - dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] - - return - - def inference_timer(self, do='start'): - """ - Timer designed to track the cumulative time spent in FreqAI for one pass through - the whitelist. This will check if the time spent is more than 1/4 the time - of a single candle, and if so, it will warn the user of degraded performance - """ - if do == 'start': - self.pair_it += 1 - self.begin_time = time.time() - elif do == 'stop': - end = time.time() - self.inference_time += (end - self.begin_time) - if self.pair_it == self.total_pairs: - logger.info( - f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds') - if self.inference_time > 0.25 * self.base_tf_seconds: - logger.warning("Inference took over 25% of the candle time. Reduce pairlist to" - " avoid blinding open trades and degrading performance.") - self.pair_it = 0 - self.inference_time = 0 - return - - def train_timer(self, do='start'): - """ - Timer designed to track the cumulative time spent training the full pairlist in - FreqAI. - """ - if do == 'start': - self.pair_it_train += 1 - self.begin_time_train = time.time() - elif do == 'stop': - end = time.time() - self.train_time += (end - self.begin_time_train) - if self.pair_it_train == self.total_pairs: - logger.info( - f'Total time spent training pairlist {self.train_time:.2f} seconds') - self.pair_it_train = 0 - self.train_time = 0 - return - - def get_init_model(self, pair: str) -> Any: - if pair not in self.dd.model_dictionary or not self.continual_learning: - init_model = None - else: - init_model = self.dd.model_dictionary[pair] - - return init_model - - def _set_train_queue(self): - """ - Sets train queue from existing train timestamps if they exist - otherwise it sets the train queue based on the provided whitelist. - """ - current_pairlist = self.config.get("exchange", {}).get("pair_whitelist") - if not self.dd.pair_dict: - logger.info('Set fresh train queue from whitelist. ' - f'Queue: {current_pairlist}') - return deque(current_pairlist) - - best_queue = deque() - - pair_dict_sorted = sorted(self.dd.pair_dict.items(), - key=lambda k: k[1]['trained_timestamp']) - for pair in pair_dict_sorted: - if pair[0] in current_pairlist: - best_queue.append(pair[0]) - for pair in current_pairlist: - if pair not in best_queue: - best_queue.appendleft(pair) - - logger.info('Set existing queue from trained timestamps. ' - f'Best approximation queue: {best_queue}') - return best_queue - - # Following methods which are overridden by user made prediction models. - # See freqai/prediction_models/CatboostPredictionModel.py for an example. - - @abstractmethod - def train(self, unfiltered_df: DataFrame, pair: str, - dk: FreqaiDataKitchen, **kwargs) -> Any: - """ - Filter the training data and train a model to it. Train makes heavy use of the datahandler - for storing, saving, loading, and analyzing the data. - :param unfiltered_df: Full dataframe for the current training period - :param metadata: pair metadata from strategy. - :return: Trained model which can be used to inference (self.predict) - """ - - @abstractmethod - def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any: - """ - Most regressors use the same function names and arguments e.g. user - can drop in LGBMRegressor in place of CatBoostRegressor and all data - management will be properly handled by Freqai. - :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold - all the training and test data/labels. - """ - - return - - @abstractmethod - def predict( - self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs - ) -> Tuple[DataFrame, NDArray[np.int_]]: - """ - Filter the prediction features data and predict with it. - :param unfiltered_df: Full dataframe for the current backtest period. - :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only - :param first: boolean = whether this is the first prediction or not. - :return: - :predictions: np.array of predictions - :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove - data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index) - """ diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py new file mode 100644 index 000000000..06b4936c1 --- /dev/null +++ b/freqtrade/freqai/freqai_util.py @@ -0,0 +1,75 @@ +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict + +from freqtrade.configuration import TimeRange +from freqtrade.constants import Config +from freqtrade.exceptions import OperationalException + + +logger = logging.getLogger(__name__) + + +def get_full_model_path(config: Config) -> Path: + freqai_config: Dict[str, Any] = config["freqai"] + return Path( + config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) + ) + + +def get_timerange_from_ready_models(models_path: Path): + all_models_end_dates = [] + pairs_end_dates: Dict[str, Any] = {} + for model_dir in models_path.iterdir(): + if str(model_dir.name).startswith("sub-train"): + model_end_date = int(model_dir.name.split("_")[1]) + pair = model_dir.name.split("_")[0].replace("sub-train-", "") + model_file_name = ( + f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" + "_model.joblib" + ) + + model_path_file = Path(model_dir / model_file_name) + if model_path_file.is_file(): + if pair not in pairs_end_dates: + pairs_end_dates[pair] = [] + + pairs_end_dates[pair].append({ + "model_end_date": model_end_date, + "model_path_file": model_path_file, + "model_dir": model_dir + }) + + if model_end_date not in all_models_end_dates: + all_models_end_dates.append(model_end_date) + + if len(all_models_end_dates) == 0: + raise OperationalException( + 'At least 1 saved model is required to ' + 'run backtesting with the backtest_live_models option' + ) + + if len(all_models_end_dates) == 1: + logger.warning( + "Only 1 model was found. Backtesting will run with the " + "timerange from the end of the training date to the current date" + ) + + finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) + if len(all_models_end_dates) > 1: + # After last model end date, use the same period from previous model + # to finish the backtest + all_models_end_dates.sort(reverse=True) + finish_timestamp = all_models_end_dates[0] + \ + (all_models_end_dates[0] - all_models_end_dates[1]) + + all_models_end_dates.append(finish_timestamp) + all_models_end_dates.sort() + start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) + stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) + backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" + backtesting_timerange = TimeRange( + 'date', 'date', min(all_models_end_dates), max(all_models_end_dates) + ) + return backtesting_timerange, backtesting_string_timerange, pairs_end_dates diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 2a1c44f7f..d8d2e808b 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -25,6 +25,7 @@ from freqtrade.enums import (BacktestState, CandleType, ExitCheckTuple, ExitType from freqtrade.exceptions import DependencyException, OperationalException from freqtrade.exchange import (amount_to_contract_precision, price_to_precision, timeframe_to_minutes, timeframe_to_seconds) +from freqtrade.freqai import freqai_util from freqtrade.mixins import LoggingMixin from freqtrade.optimize.backtest_caching import get_strategy_run_id from freqtrade.optimize.bt_progress import BTProgress @@ -134,6 +135,12 @@ class Backtesting: self.fee = self.exchange.get_fee(symbol=self.pairlists.whitelist[0]) self.precision_mode = self.exchange.precisionMode + if self.config.get('freqai_backtest_live_models', False): + freqai_model_path = freqai_util.get_full_model_path(self.config) + _, live_models_timerange, _ = freqai_util.get_timerange_from_ready_models( + freqai_model_path) + self.config['timerange'] = live_models_timerange + self.timerange = TimeRange.parse_timerange( None if self.config.get('timerange') is None else str(self.config.get('timerange'))) From ec947ad65cd73ae3c5a6b4d6669abd6cedb513a6 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sun, 25 Sep 2022 23:47:27 -0300 Subject: [PATCH 05/22] remove commented code - backtest_live_models --- freqtrade/freqai/data_kitchen.py | 51 -------------------------------- 1 file changed, 51 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 861a3c366..29fda266d 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1299,54 +1299,3 @@ class FreqaiDataKitchen: "pairs_end_dates": pairs_end_dates } return - - # def get_timerange_from_ready_models(self, models_path: Path): - # all_models_end_dates = [] - # pairs_end_dates: Dict[str, Any] = {} - # for model_dir in models_path.iterdir(): - # if str(model_dir.name).startswith("sub-train"): - # model_end_date = int(model_dir.name.split("_")[1]) - # pair = model_dir.name.split("_")[0].replace("sub-train-", "") - # model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" - # "_model.joblib") - - # model_path_file = Path(model_dir / model_file_name) - # if model_path_file.is_file(): - # if pair not in pairs_end_dates: - # pairs_end_dates[pair] = [] - - # pairs_end_dates[pair].append({ - # "model_end_date": model_end_date, - # "model_path_file": model_path_file, - # "model_dir": model_dir - # }) - - # if model_end_date not in all_models_end_dates: - # all_models_end_dates.append(model_end_date) - - # if len(all_models_end_dates) == 0: - # raise OperationalException( - # 'At least 1 saved model is required to ' - # 'run backtesting with the backtest_live_models option' - # ) - - # if len(all_models_end_dates) == 1: - # logger.warning(f"Only 1 model was found. Backtesting will run with the " - # "timerange from the end of the training date to the current date") - - # finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - # if len(all_models_end_dates) > 1: - # # After last model end date, use the same period from previous model - # # to finish the backtest - # all_models_end_dates.sort(reverse=True) - # finish_timestamp = all_models_end_dates[0] + \ - # (all_models_end_dates[0] - all_models_end_dates[1]) - - # all_models_end_dates.append(finish_timestamp) - # all_models_end_dates.sort() - # start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) - # stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) - # backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" - # backtesting_timerange = TimeRange('date', 'date', min(all_models_end_dates), - # max(all_models_end_dates)) - # return backtesting_timerange, backtesting_string_timerange, pairs_end_dates From 22bef71d5d5158ad7598957dafad199cac4251bf Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Mon, 26 Sep 2022 19:01:24 -0300 Subject: [PATCH 06/22] backtest_live_models - add function comments and tests --- freqtrade/freqai/freqai_util.py | 25 ++++++++++- tests/freqai/test_freqai_backtesting.py | 60 +++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index 06b4936c1..da5b5615f 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -1,3 +1,6 @@ +""" +FreqAI generic functions +""" import logging from datetime import datetime, timezone from pathlib import Path @@ -12,15 +15,33 @@ logger = logging.getLogger(__name__) def get_full_model_path(config: Config) -> Path: + """ + Returns default FreqAI model path + :param config: Configuration dictionary + """ freqai_config: Dict[str, Any] = config["freqai"] return Path( config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) ) -def get_timerange_from_ready_models(models_path: Path): +def get_timerange_from_ready_models(models_path: Path) -> tuple[TimeRange, str, dict[str, Any]]: + """ + Returns timerange information based on a FreqAI model directory + :param models_path: FreqAI model path + + :returns: a Tuple with (backtesting_timerange: Timerange calculated from directory, + backtesting_string_timerange: str timerange calculated from + directory (format example '20020822-20220830'), \ + pairs_end_dates: Dict with pair and model end training dates info) + """ all_models_end_dates = [] pairs_end_dates: Dict[str, Any] = {} + if not models_path.is_dir(): + raise OperationalException( + 'Model folders not found. Saved models are required ' + 'to run backtest with the freqai-backtest-live-models option' + ) for model_dir in models_path.iterdir(): if str(model_dir.name).startswith("sub-train"): model_end_date = int(model_dir.name.split("_")[1]) @@ -47,7 +68,7 @@ def get_timerange_from_ready_models(models_path: Path): if len(all_models_end_dates) == 0: raise OperationalException( 'At least 1 saved model is required to ' - 'run backtesting with the backtest_live_models option' + 'run backtest with the freqai-backtest-live-models option' ) if len(all_models_end_dates) == 1: diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index b1881b2f5..d30383283 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -3,8 +3,11 @@ from datetime import datetime, timezone from pathlib import Path from unittest.mock import PropertyMock +import pytest + from freqtrade.commands.optimize_commands import setup_optimize_configuration from freqtrade.enums import RunMode +from freqtrade.exceptions import OperationalException from freqtrade.optimize.backtesting import Backtesting from tests.conftest import (CURRENT_TEST_STRATEGY, get_args, log_has_re, patch_exchange, patched_configuration_load_config_file) @@ -51,3 +54,60 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog): assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog) Backtesting.cleanup() + + +def test_freqai_backtest_live_models_validations(freqai_conf, mocker, testdatadir, caplog): + patch_exchange(mocker) + + now = datetime.now(timezone.utc) + mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', + PropertyMock(return_value=['HULUMULU/USDT', 'XRP/USDT'])) + mocker.patch('freqtrade.optimize.backtesting.history.load_data') + mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) + + patched_configuration_load_config_file(mocker, freqai_conf) + + args = [ + 'backtesting', + '--config', 'config.json', + '--datadir', str(testdatadir), + '--strategy-path', str(Path(__file__).parents[1] / 'strategy/strats'), + '--timeframe', '1h', + '--timerange', '20220108-20220115', + '--freqai-backtest-live-models' + ] + args = get_args(args) + with pytest.raises(OperationalException, + match=r".* timerange parameter is not supported .*"): + setup_optimize_configuration(args, RunMode.BACKTEST) + + Backtesting.cleanup() + + +def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testdatadir, caplog): + patch_exchange(mocker) + + now = datetime.now(timezone.utc) + mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', + PropertyMock(return_value=['HULUMULU/USDT', 'XRP/USDT'])) + mocker.patch('freqtrade.optimize.backtesting.history.load_data') + mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) + freqai_conf["timerange"] = "" + patched_configuration_load_config_file(mocker, freqai_conf) + + args = [ + 'backtesting', + '--config', 'config.json', + '--datadir', str(testdatadir), + '--strategy-path', str(Path(__file__).parents[1] / 'strategy/strats'), + '--timeframe', '1h', + '--freqai-backtest-live-models' + ] + args = get_args(args) + bt_config = setup_optimize_configuration(args, RunMode.BACKTEST) + + with pytest.raises(OperationalException, + match=r".* Saved models are required to run backtest .*"): + Backtesting(bt_config) + + Backtesting.cleanup() From 0318ca9f127a7e9b1004068a11ff8836cc1e4d7e Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Mon, 26 Sep 2022 19:08:25 -0300 Subject: [PATCH 07/22] backtest_live_models - fix typo --- freqtrade/freqai/freqai_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index da5b5615f..629bed9ea 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -4,7 +4,7 @@ FreqAI generic functions import logging from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict +from typing import Any, Dict, Tuple from freqtrade.configuration import TimeRange from freqtrade.constants import Config @@ -25,7 +25,7 @@ def get_full_model_path(config: Config) -> Path: ) -def get_timerange_from_ready_models(models_path: Path) -> tuple[TimeRange, str, dict[str, Any]]: +def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, dict[str, Any]]: """ Returns timerange information based on a FreqAI model directory :param models_path: FreqAI model path From 290afd9699b9a9be93199d746738a3e4b102c619 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Mon, 26 Sep 2022 19:21:53 -0300 Subject: [PATCH 08/22] backtest_live_models - fix typo --- freqtrade/freqai/freqai_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index 629bed9ea..3136ffd2c 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -25,7 +25,7 @@ def get_full_model_path(config: Config) -> Path: ) -def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, dict[str, Any]]: +def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, Dict[str, Any]]: """ Returns timerange information based on a FreqAI model directory :param models_path: FreqAI model path From 14b96aaa3848bb261e1a8d1fcedc678a372fb523 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Mon, 26 Sep 2022 19:52:59 -0300 Subject: [PATCH 09/22] backtesting live models - fix ci issues --- freqtrade/freqai/freqai_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 392d4d2d5..2344ab812 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -135,9 +135,9 @@ class IFreqaiModel(ABC): # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) - if(self.dk.backtest_live_models): + if self.dk.backtest_live_models: logger.info( - f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (Live Models)") + f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)") else: logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") dataframe = self.dk.use_strategy_to_populate_indicators( From 72aa47fc51d783617a52c5aa3138b590ab946f15 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Tue, 27 Sep 2022 00:14:12 -0300 Subject: [PATCH 10/22] backtest_live_models - fix issue with timerange BT and 2 trainings within same candle (no data) --- freqtrade/freqai/data_kitchen.py | 5 +++++ freqtrade/freqai/freqai_interface.py | 16 ++++++++++++++++ freqtrade/freqai/freqai_util.py | 14 ++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 29fda266d..9a4101dce 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -465,6 +465,11 @@ class FreqaiDataKitchen: tr_backtesting_list_timerange = [] pair = self.pair.split("/")[0].split(":")[0] + if pair not in self.backtest_live_models_data["pairs_end_dates"]: + raise OperationalException( + f"Model not available for pair {self.pair}. " + "Please, try again after removing this pair from the configuration file." + ) pair_data = self.backtest_live_models_data["pairs_end_dates"][pair] model_end_dates = [] backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 2344ab812..b150e1c97 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -257,6 +257,21 @@ class IFreqaiModel(ABC): dataframe_train = dk.slice_dataframe(tr_train, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) + if dk.backtest_live_models and len(dataframe_backtest) == 0: + tr_backtest_startts_str = datetime.fromtimestamp( + tr_backtest.startts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + tr_backtest_stopts_str = datetime.fromtimestamp( + tr_backtest.stopts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + logger.info( + f"No data found for pair {pair} " + f" from {tr_backtest_startts_str} " + f"to {tr_backtest_stopts_str}. " + "Probably more than one training within the same candle period." + ) + continue + trained_timestamp = tr_train tr_train_startts_str = datetime.fromtimestamp( tr_train.startts, @@ -264,6 +279,7 @@ class IFreqaiModel(ABC): tr_train_stopts_str = datetime.fromtimestamp( tr_train.stopts, tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + if not dk.backtest_live_models: logger.info( f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index 3136ffd2c..0d3056b8d 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -2,7 +2,7 @@ FreqAI generic functions """ import logging -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any, Dict, Tuple @@ -89,7 +89,17 @@ def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, all_models_end_dates.sort() start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) - backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" + end_date_string_timerange = stop + if ( + finish_timestamp < int(datetime.now(tz=timezone.utc).timestamp()) and + datetime.now(tz=timezone.utc).strftime('%Y%m%d') != stop.strftime('%Y%m%d') + ): + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date_string_timerange = stop + timedelta(days=1) + + backtesting_string_timerange = ( + f"{start.strftime('%Y%m%d')}-{end_date_string_timerange.strftime('%Y%m%d')}" + ) backtesting_timerange = TimeRange( 'date', 'date', min(all_models_end_dates), max(all_models_end_dates) ) From 0be115de9c7d9fb5a0568fef7a8894c2458eaf20 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Tue, 27 Sep 2022 10:26:57 -0300 Subject: [PATCH 11/22] backtest_live_models - added new tests and refactoring --- freqtrade/freqai/data_kitchen.py | 4 +- freqtrade/freqai/freqai_util.py | 65 ++++++++++--------- tests/freqai/test_freqai_util.py | 105 +++++++++++++++++++++++++++++++ tests/test_configuration.py | 72 +++++++++++++++++++++ 4 files changed, 216 insertions(+), 30 deletions(-) create mode 100644 tests/freqai/test_freqai_util.py diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 9a4101dce..7f32c942d 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -473,8 +473,8 @@ class FreqaiDataKitchen: pair_data = self.backtest_live_models_data["pairs_end_dates"][pair] model_end_dates = [] backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] - for data in pair_data: - model_end_dates.append(data["model_end_date"]) + for end_date in pair_data: + model_end_dates.append(end_date) model_end_dates.append(backtesting_timerange.stopts) model_end_dates.sort() for index, item in enumerate(model_end_dates): diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index 0d3056b8d..665310230 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -36,34 +36,11 @@ def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, pairs_end_dates: Dict with pair and model end training dates info) """ all_models_end_dates = [] - pairs_end_dates: Dict[str, Any] = {} - if not models_path.is_dir(): - raise OperationalException( - 'Model folders not found. Saved models are required ' - 'to run backtest with the freqai-backtest-live-models option' - ) - for model_dir in models_path.iterdir(): - if str(model_dir.name).startswith("sub-train"): - model_end_date = int(model_dir.name.split("_")[1]) - pair = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = ( - f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" - "_model.joblib" - ) - - model_path_file = Path(model_dir / model_file_name) - if model_path_file.is_file(): - if pair not in pairs_end_dates: - pairs_end_dates[pair] = [] - - pairs_end_dates[pair].append({ - "model_end_date": model_end_date, - "model_path_file": model_path_file, - "model_dir": model_dir - }) - - if model_end_date not in all_models_end_dates: - all_models_end_dates.append(model_end_date) + pairs_end_dates: Dict[str, Any] = get_pairs_timestamps_training_from_ready_models(models_path) + for key in pairs_end_dates: + for model_end_date in pairs_end_dates[key]: + if model_end_date not in all_models_end_dates: + all_models_end_dates.append(model_end_date) if len(all_models_end_dates) == 0: raise OperationalException( @@ -104,3 +81,35 @@ def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, 'date', 'date', min(all_models_end_dates), max(all_models_end_dates) ) return backtesting_timerange, backtesting_string_timerange, pairs_end_dates + + +def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: + """ + Scan the models path and returns all pairs end training dates (timestamp) + :param models_path: FreqAI model path + + :returns: + :pairs_end_dates: Dict with pair and model end training dates info + """ + pairs_end_dates: Dict[str, Any] = {} + if not models_path.is_dir(): + raise OperationalException( + 'Model folders not found. Saved models are required ' + 'to run backtest with the freqai-backtest-live-models option' + ) + for model_dir in models_path.iterdir(): + if str(model_dir.name).startswith("sub-train"): + model_end_date = int(model_dir.name.split("_")[1]) + pair = model_dir.name.split("_")[0].replace("sub-train-", "") + model_file_name = ( + f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" + "_model.joblib" + ) + + model_path_file = Path(model_dir / model_file_name) + if model_path_file.is_file(): + if pair not in pairs_end_dates: + pairs_end_dates[pair] = [] + + pairs_end_dates[pair].append(model_end_date) + return pairs_end_dates diff --git a/tests/freqai/test_freqai_util.py b/tests/freqai/test_freqai_util.py new file mode 100644 index 000000000..2c7c8c68a --- /dev/null +++ b/tests/freqai/test_freqai_util.py @@ -0,0 +1,105 @@ +import platform +from unittest.mock import MagicMock + +import pytest + +from freqtrade.configuration import TimeRange +from freqtrade.data.dataprovider import DataProvider +from freqtrade.exceptions import OperationalException +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.freqai_util import (get_full_model_path, + get_pairs_timestamps_training_from_ready_models, + get_timerange_from_ready_models) +from tests.conftest import get_patched_exchange +from tests.freqai.conftest import get_patched_freqai_strategy + + +def is_arm() -> bool: + machine = platform.machine() + return "arm" in machine or "aarch64" in machine + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_full_model_path(mocker, freqai_conf, model): + if is_arm() and model == 'CatboostRegressor': + pytest.skip("CatBoost is not supported on ARM") + + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = get_full_model_path(freqai_conf) + assert model_path.is_dir() is True + + +def test_get_pairs_timestamp_validation(mocker, freqai_conf): + model_path = get_full_model_path(freqai_conf) + with pytest.raises( + OperationalException, + match=r'.*required to run backtest with the freqai-backtest-live-models.*' + ): + get_pairs_timestamps_training_from_ready_models(model_path) + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_timerange_from_ready_models(mocker, freqai_conf, model): + if is_arm() and model == 'CatboostRegressor': + pytest.skip("CatBoost is not supported on ARM") + + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180101-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180101-20180130") + + new_timerange = TimeRange.parse_timerange("20180120-20180122") + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + new_timerange = TimeRange.parse_timerange("20180122-20180124") + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = get_full_model_path(freqai_conf) + (backtesting_timerange, + backtesting_string_timerange, + pairs_end_dates) = get_timerange_from_ready_models(models_path=model_path) + + assert len(pairs_end_dates["ADA"]) == 2 + assert backtesting_string_timerange == '20180122-20180127' + assert backtesting_timerange.startts == 1516579200 + assert backtesting_timerange.stopts == 1516924800 diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 99edf0233..94d4b4c78 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -1574,3 +1574,75 @@ def test_flat_vars_to_nested_dict(caplog): assert log_has("Loading variable 'FREQTRADE__EXCHANGE__SOME_SETTING'", caplog) assert not log_has("Loading variable 'NOT_RELEVANT'", caplog) + + +def test_setup_hyperopt_freqai(mocker, default_conf, caplog) -> None: + patched_configuration_load_config_file(mocker, default_conf) + mocker.patch( + 'freqtrade.configuration.configuration.create_datadir', + lambda c, x: x + ) + mocker.patch( + 'freqtrade.configuration.configuration.create_userdata_dir', + lambda x, *args, **kwargs: Path(x) + ) + arglist = [ + 'hyperopt', + '--config', 'config.json', + '--strategy', CURRENT_TEST_STRATEGY, + '--timerange', '20220801-20220805', + "--freqaimodel", + "LightGBMRegressorMultiTarget", + "--analyze-per-epoch" + ] + + args = Arguments(arglist).get_parsed_arg() + + configuration = Configuration(args) + config = configuration.get_config() + config['freqai'] = { + "enabled": True + } + with pytest.raises( + OperationalException, match=r".*analyze-per-epoch parameter is not supported.*" + ): + validate_config_consistency(config) + + +def test_setup_freqai_backtest_live_models(mocker, default_conf, caplog) -> None: + patched_configuration_load_config_file(mocker, default_conf) + mocker.patch( + 'freqtrade.configuration.configuration.create_datadir', + lambda c, x: x + ) + mocker.patch( + 'freqtrade.configuration.configuration.create_userdata_dir', + lambda x, *args, **kwargs: Path(x) + ) + arglist = [ + 'backtesting', + '--config', 'config.json', + '--strategy', CURRENT_TEST_STRATEGY, + '--timerange', '20220801-20220805', + "--freqaimodel", + "LightGBMRegressorMultiTarget", + "--freqai-backtest-live-models" + ] + + args = Arguments(arglist).get_parsed_arg() + + configuration = Configuration(args) + config = configuration.get_config() + with pytest.raises( + OperationalException, match=r".*--freqai-backtest-live-models parameter is only.*" + ): + validate_config_consistency(config) + + conf = deepcopy(config) + conf['freqai'] = { + "enabled": True + } + with pytest.raises( + OperationalException, match=r".* timerange parameter is not supported with .*" + ): + validate_config_consistency(conf) From 55ebbeec1820dfef17eb7cecca4a5b2fd412a3ae Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Wed, 28 Sep 2022 08:48:32 -0300 Subject: [PATCH 12/22] backtest_live models tests refactoring --- freqtrade/configuration/config_validation.py | 27 +++++++++++-------- freqtrade/freqai/data_kitchen.py | 5 ---- tests/freqai/test_freqai_backtesting.py | 28 -------------------- tests/test_configuration.py | 12 ++++++++- 4 files changed, 27 insertions(+), 45 deletions(-) diff --git a/freqtrade/configuration/config_validation.py b/freqtrade/configuration/config_validation.py index 6e27fc748..eafba2a28 100644 --- a/freqtrade/configuration/config_validation.py +++ b/freqtrade/configuration/config_validation.py @@ -336,18 +336,23 @@ def _validate_freqai_hyperopt(conf: Dict[str, Any]) -> None: def _validate_freqai_backtest(conf: Dict[str, Any]) -> None: - freqai_enabled = conf.get('freqai', {}).get('enabled', False) - timerange = conf.get('timerange') - freqai_backtest_live_models = conf.get('freqai_backtest_live_models', False) - if freqai_backtest_live_models and freqai_enabled and timerange: - raise OperationalException( - 'Using timerange parameter is not supported with ' - '--freqai-backtest-live-models parameter.') + if conf.get('runmode', RunMode.OTHER) == RunMode.BACKTEST: + freqai_enabled = conf.get('freqai', {}).get('enabled', False) + timerange = conf.get('timerange') + freqai_backtest_live_models = conf.get('freqai_backtest_live_models', False) + if freqai_backtest_live_models and freqai_enabled and timerange: + raise OperationalException( + 'Using timerange parameter is not supported with ' + '--freqai-backtest-live-models parameter.') - if freqai_backtest_live_models and not freqai_enabled: - raise OperationalException( - 'Using --freqai-backtest-live-models parameter is only ' - 'supported with a FreqAI strategy.') + if freqai_backtest_live_models and not freqai_enabled: + raise OperationalException( + 'Using --freqai-backtest-live-models parameter is only ' + 'supported with a FreqAI strategy.') + + if freqai_enabled and not freqai_backtest_live_models and not timerange: + raise OperationalException( + 'Please pass --timerange if you intend to use FreqAI for backtesting.') def _validate_consumers(conf: Dict[str, Any]) -> None: diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 7f32c942d..667250c4c 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -84,11 +84,6 @@ class FreqaiDataKitchen: self.backtest_live_models = config.get("freqai_backtest_live_models", False) if not self.live: - if (not self.config.get("timerange") and - not self.backtest_live_models): - raise OperationalException( - 'Please pass --timerange if you intend to use FreqAI for backtesting.') - self.full_path = freqai_util.get_full_model_path(self.config) self.full_timerange = self.create_fulltimerange( self.config["timerange"], self.freqai_config.get("train_period_days", 0) diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index d30383283..446e8295a 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -56,34 +56,6 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog): Backtesting.cleanup() -def test_freqai_backtest_live_models_validations(freqai_conf, mocker, testdatadir, caplog): - patch_exchange(mocker) - - now = datetime.now(timezone.utc) - mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist', - PropertyMock(return_value=['HULUMULU/USDT', 'XRP/USDT'])) - mocker.patch('freqtrade.optimize.backtesting.history.load_data') - mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) - - patched_configuration_load_config_file(mocker, freqai_conf) - - args = [ - 'backtesting', - '--config', 'config.json', - '--datadir', str(testdatadir), - '--strategy-path', str(Path(__file__).parents[1] / 'strategy/strats'), - '--timeframe', '1h', - '--timerange', '20220108-20220115', - '--freqai-backtest-live-models' - ] - args = get_args(args) - with pytest.raises(OperationalException, - match=r".* timerange parameter is not supported .*"): - setup_optimize_configuration(args, RunMode.BACKTEST) - - Backtesting.cleanup() - - def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testdatadir, caplog): patch_exchange(mocker) diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 94d4b4c78..9905be88c 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -1609,7 +1609,7 @@ def test_setup_hyperopt_freqai(mocker, default_conf, caplog) -> None: validate_config_consistency(config) -def test_setup_freqai_backtest_live_models(mocker, default_conf, caplog) -> None: +def test_setup_freqai_backtesting(mocker, default_conf, caplog) -> None: patched_configuration_load_config_file(mocker, default_conf) mocker.patch( 'freqtrade.configuration.configuration.create_datadir', @@ -1633,6 +1633,8 @@ def test_setup_freqai_backtest_live_models(mocker, default_conf, caplog) -> None configuration = Configuration(args) config = configuration.get_config() + config['runmode'] = RunMode.BACKTEST + with pytest.raises( OperationalException, match=r".*--freqai-backtest-live-models parameter is only.*" ): @@ -1646,3 +1648,11 @@ def test_setup_freqai_backtest_live_models(mocker, default_conf, caplog) -> None OperationalException, match=r".* timerange parameter is not supported with .*" ): validate_config_consistency(conf) + + conf['timerange'] = None + conf['freqai_backtest_live_models'] = False + + with pytest.raises( + OperationalException, match=r".* pass --timerange if you intend to use FreqAI .*" + ): + validate_config_consistency(conf) From 6845a5c6ea535a3b0cee9f27b33c6f3069e70220 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 29 Sep 2022 01:48:38 -0300 Subject: [PATCH 13/22] backtest_live_models - refactoring after PR review --- freqtrade/commands/cli_options.py | 3 +- freqtrade/freqai/data_kitchen.py | 22 ++++----- freqtrade/freqai/freqai_interface.py | 17 ++----- freqtrade/freqai/freqai_util.py | 73 +++++++++++++++------------- freqtrade/optimize/backtesting.py | 7 +-- tests/freqai/test_freqai_util.py | 24 ++++----- 6 files changed, 70 insertions(+), 76 deletions(-) diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index a1558d399..53a6f478d 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -671,7 +671,6 @@ AVAILABLE_CLI_OPTIONS = { "freqai_backtest_live_models": Arg( '--freqai-backtest-live-models', help='Run backtest with ready models.', - action='store_true', - default=False, + action='store_true' ), } diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 667250c4c..b8030f547 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -84,7 +84,7 @@ class FreqaiDataKitchen: self.backtest_live_models = config.get("freqai_backtest_live_models", False) if not self.live: - self.full_path = freqai_util.get_full_model_path(self.config) + self.full_path = freqai_util.get_full_models_path(self.config) self.full_timerange = self.create_fulltimerange( self.config["timerange"], self.freqai_config.get("train_period_days", 0) ) @@ -118,7 +118,7 @@ class FreqaiDataKitchen: metadata: dict = strategy furnished pair metadata trained_timestamp: int = timestamp of most recent training """ - self.full_path = freqai_util.get_full_model_path(self.config) + self.full_path = freqai_util.get_full_models_path(self.config) self.data_path = Path( self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}" @@ -459,17 +459,15 @@ class FreqaiDataKitchen: ) -> Tuple[list, list]: tr_backtesting_list_timerange = [] - pair = self.pair.split("/")[0].split(":")[0] - if pair not in self.backtest_live_models_data["pairs_end_dates"]: + asset = self.pair.split("/")[0] + if asset not in self.backtest_live_models_data["assets_end_dates"]: raise OperationalException( f"Model not available for pair {self.pair}. " "Please, try again after removing this pair from the configuration file." ) - pair_data = self.backtest_live_models_data["pairs_end_dates"][pair] - model_end_dates = [] + asset_data = self.backtest_live_models_data["assets_end_dates"][asset] backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] - for end_date in pair_data: - model_end_dates.append(end_date) + model_end_dates = [x for x in asset_data] model_end_dates.append(backtesting_timerange.stopts) model_end_dates.sort() for index, item in enumerate(model_end_dates): @@ -1291,11 +1289,11 @@ class FreqaiDataKitchen: def set_timerange_from_ready_models(self): backtesting_timerange, \ - backtesting_string_timerange, \ - pairs_end_dates = freqai_util.get_timerange_from_ready_models(self.full_path) + assets_end_dates = ( + freqai_util.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) + self.backtest_live_models_data = { "backtesting_timerange": backtesting_timerange, - "backtesting_string_timerange": backtesting_string_timerange, - "pairs_end_dates": pairs_end_dates + "assets_end_dates": assets_end_dates } return diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index b150e1c97..8106d034a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -264,12 +264,9 @@ class IFreqaiModel(ABC): tr_backtest_stopts_str = datetime.fromtimestamp( tr_backtest.stopts, tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - logger.info( - f"No data found for pair {pair} " - f" from {tr_backtest_startts_str} " - f"to {tr_backtest_stopts_str}. " - "Probably more than one training within the same candle period." - ) + logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} " + f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. " + "Probably more than one training within the same candle period.") continue trained_timestamp = tr_train @@ -305,12 +302,6 @@ class IFreqaiModel(ABC): dk.append_predictions(append_df) else: if not self.model_exists(dk): - if dk.backtest_live_models: - raise OperationalException( - "Training models is not allowed " - "in backtest_live_models backtesting " - "mode" - ) dk.find_features(dataframe_train) dk.find_labels(dataframe_train) self.model = self.train(dataframe_train, pair, dk) @@ -603,7 +594,7 @@ class IFreqaiModel(ABC): model = self.train(unfiltered_dataframe, pair, dk) self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts - dk.set_new_model_names(pair, int(new_trained_timerange.stopts)) + dk.set_new_model_names(pair, new_trained_timerange.stopts) self.dd.save_data(model, pair, dk) if self.plot_features: diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index 665310230..d3864a0d0 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -14,7 +14,7 @@ from freqtrade.exceptions import OperationalException logger = logging.getLogger(__name__) -def get_full_model_path(config: Config) -> Path: +def get_full_models_path(config: Config) -> Path: """ Returns default FreqAI model path :param config: Configuration dictionary @@ -25,20 +25,19 @@ def get_full_model_path(config: Config) -> Path: ) -def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, Dict[str, Any]]: +def get_timerange_and_assets_end_dates_from_ready_models( + models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: """ Returns timerange information based on a FreqAI model directory :param models_path: FreqAI model path - :returns: a Tuple with (backtesting_timerange: Timerange calculated from directory, - backtesting_string_timerange: str timerange calculated from - directory (format example '20020822-20220830'), \ - pairs_end_dates: Dict with pair and model end training dates info) + :return: a Tuple with (Timerange calculated from directory and + a Dict with pair and model end training dates info) """ all_models_end_dates = [] - pairs_end_dates: Dict[str, Any] = get_pairs_timestamps_training_from_ready_models(models_path) - for key in pairs_end_dates: - for model_end_date in pairs_end_dates[key]: + assets_end_dates: Dict[str, Any] = get_assets_timestamps_training_from_ready_models(models_path) + for key in assets_end_dates: + for model_end_date in assets_end_dates[key]: if model_end_date not in all_models_end_dates: all_models_end_dates.append(model_end_date) @@ -64,34 +63,27 @@ def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, all_models_end_dates.append(finish_timestamp) all_models_end_dates.sort() - start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) - stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) - end_date_string_timerange = stop - if ( - finish_timestamp < int(datetime.now(tz=timezone.utc).timestamp()) and - datetime.now(tz=timezone.utc).strftime('%Y%m%d') != stop.strftime('%Y%m%d') - ): - # add 1 day to string timerange to ensure BT module will load all dataframe data - end_date_string_timerange = stop + timedelta(days=1) + start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates)).timetuple()[:3], + tzinfo=timezone.utc)) + end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates)).timetuple()[:3], + tzinfo=timezone.utc)) - backtesting_string_timerange = ( - f"{start.strftime('%Y%m%d')}-{end_date_string_timerange.strftime('%Y%m%d')}" - ) + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date = end_date + timedelta(days=1) backtesting_timerange = TimeRange( - 'date', 'date', min(all_models_end_dates), max(all_models_end_dates) + 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) ) - return backtesting_timerange, backtesting_string_timerange, pairs_end_dates + return backtesting_timerange, assets_end_dates -def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: +def get_assets_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: """ - Scan the models path and returns all pairs end training dates (timestamp) + Scan the models path and returns all assets end training dates (timestamp) :param models_path: FreqAI model path - :returns: - :pairs_end_dates: Dict with pair and model end training dates info + :return: a Dict with asset and model end training dates info """ - pairs_end_dates: Dict[str, Any] = {} + assets_end_dates: Dict[str, Any] = {} if not models_path.is_dir(): raise OperationalException( 'Model folders not found. Saved models are required ' @@ -100,7 +92,7 @@ def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[s for model_dir in models_path.iterdir(): if str(model_dir.name).startswith("sub-train"): model_end_date = int(model_dir.name.split("_")[1]) - pair = model_dir.name.split("_")[0].replace("sub-train-", "") + asset = model_dir.name.split("_")[0].replace("sub-train-", "") model_file_name = ( f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" "_model.joblib" @@ -108,8 +100,23 @@ def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[s model_path_file = Path(model_dir / model_file_name) if model_path_file.is_file(): - if pair not in pairs_end_dates: - pairs_end_dates[pair] = [] + if asset not in assets_end_dates: + assets_end_dates[asset] = [] + assets_end_dates[asset].append(model_end_date) - pairs_end_dates[pair].append(model_end_date) - return pairs_end_dates + return assets_end_dates + + +def get_timerange_backtest_live_models(config: Config): + """ + Returns a formated timerange for backtest live/ready models + :param config: Configuration dictionary + + :return: a string timerange (format example: '20220801-20220822') + """ + models_path = get_full_models_path(config) + timerange, _ = get_timerange_and_assets_end_dates_from_ready_models(models_path) + start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) + tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" + return tr diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index a535253e1..626051700 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -25,7 +25,6 @@ from freqtrade.enums import (BacktestState, CandleType, ExitCheckTuple, ExitType from freqtrade.exceptions import DependencyException, OperationalException from freqtrade.exchange import (amount_to_contract_precision, price_to_precision, timeframe_to_minutes, timeframe_to_seconds) -from freqtrade.freqai import freqai_util from freqtrade.mixins import LoggingMixin from freqtrade.optimize.backtest_caching import get_strategy_run_id from freqtrade.optimize.bt_progress import BTProgress @@ -136,10 +135,8 @@ class Backtesting: self.precision_mode = self.exchange.precisionMode if self.config.get('freqai_backtest_live_models', False): - freqai_model_path = freqai_util.get_full_model_path(self.config) - _, live_models_timerange, _ = freqai_util.get_timerange_from_ready_models( - freqai_model_path) - self.config['timerange'] = live_models_timerange + from freqtrade.freqai import freqai_util + self.config['timerange'] = freqai_util.get_timerange_backtest_live_models(self.config) self.timerange = TimeRange.parse_timerange( None if self.config.get('timerange') is None else str(self.config.get('timerange'))) diff --git a/tests/freqai/test_freqai_util.py b/tests/freqai/test_freqai_util.py index 2c7c8c68a..9e6fdfc87 100644 --- a/tests/freqai/test_freqai_util.py +++ b/tests/freqai/test_freqai_util.py @@ -7,9 +7,10 @@ from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.exceptions import OperationalException from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.freqai_util import (get_full_model_path, - get_pairs_timestamps_training_from_ready_models, - get_timerange_from_ready_models) +from freqtrade.freqai.freqai_util import (get_assets_timestamps_training_from_ready_models, + get_full_models_path, + get_timerange_and_assets_end_dates_from_ready_models, + get_timerange_backtest_live_models) from tests.conftest import get_patched_exchange from tests.freqai.conftest import get_patched_freqai_strategy @@ -48,17 +49,17 @@ def test_get_full_model_path(mocker, freqai_conf, model): freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - model_path = get_full_model_path(freqai_conf) + model_path = get_full_models_path(freqai_conf) assert model_path.is_dir() is True def test_get_pairs_timestamp_validation(mocker, freqai_conf): - model_path = get_full_model_path(freqai_conf) + model_path = get_full_models_path(freqai_conf) with pytest.raises( OperationalException, match=r'.*required to run backtest with the freqai-backtest-live-models.*' ): - get_pairs_timestamps_training_from_ready_models(model_path) + get_assets_timestamps_training_from_ready_models(model_path) @pytest.mark.parametrize('model', [ @@ -94,12 +95,13 @@ def test_get_timerange_from_ready_models(mocker, freqai_conf, model): freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - model_path = get_full_model_path(freqai_conf) + model_path = get_full_models_path(freqai_conf) (backtesting_timerange, - backtesting_string_timerange, - pairs_end_dates) = get_timerange_from_ready_models(models_path=model_path) + pairs_end_dates) = get_timerange_and_assets_end_dates_from_ready_models(models_path=model_path) assert len(pairs_end_dates["ADA"]) == 2 - assert backtesting_string_timerange == '20180122-20180127' - assert backtesting_timerange.startts == 1516579200 + assert backtesting_timerange.startts == 1516492800 assert backtesting_timerange.stopts == 1516924800 + + backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) + assert backtesting_string_timerange == '20180121-20180126' From 01e3507e4c65a7eb3b60ebb725406b3edf276cd9 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Mon, 10 Oct 2022 15:15:43 -0300 Subject: [PATCH 14/22] fix freqai backtest live models --- tests/freqai/test_freqai_backtesting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/freqai/test_freqai_backtesting.py b/tests/freqai/test_freqai_backtesting.py index 9d44cc656..b9e2d650a 100644 --- a/tests/freqai/test_freqai_backtesting.py +++ b/tests/freqai/test_freqai_backtesting.py @@ -72,7 +72,7 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda '--config', 'config.json', '--datadir', str(testdatadir), '--strategy-path', str(Path(__file__).parents[1] / 'strategy/strats'), - '--timeframe', '1h', + '--timeframe', '5m', '--freqai-backtest-live-models' ] args = get_args(args) From 6919f3aa75708d4137ba553066dad372d062b6fd Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 13 Oct 2022 15:03:27 -0300 Subject: [PATCH 15/22] Backtest live models - fix utc date convert issue --- freqtrade/freqai/freqai_util.py | 8 ++++---- tests/freqai/test_freqai_util.py | 15 ++++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py index d3864a0d0..23df61335 100644 --- a/freqtrade/freqai/freqai_util.py +++ b/freqtrade/freqai/freqai_util.py @@ -63,10 +63,10 @@ def get_timerange_and_assets_end_dates_from_ready_models( all_models_end_dates.append(finish_timestamp) all_models_end_dates.sort() - start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates)).timetuple()[:3], - tzinfo=timezone.utc)) - end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates)).timetuple()[:3], - tzinfo=timezone.utc)) + start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) + end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) # add 1 day to string timerange to ensure BT module will load all dataframe data end_date = end_date + timedelta(days=1) diff --git a/tests/freqai/test_freqai_util.py b/tests/freqai/test_freqai_util.py index 9e6fdfc87..c548fd3f6 100644 --- a/tests/freqai/test_freqai_util.py +++ b/tests/freqai/test_freqai_util.py @@ -87,11 +87,16 @@ def test_get_timerange_from_ready_models(mocker, freqai_conf, model): data_load_timerange = TimeRange.parse_timerange("20180101-20180130") - new_timerange = TimeRange.parse_timerange("20180120-20180122") + # 1516233600 (2018-01-18 00:00) - Start Training 1 + # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) + # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) + # 1516838400 (2018-01-25 00:00) - End Timerange + + new_timerange = TimeRange("date", "date", 1516233600, 1516406400) freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - new_timerange = TimeRange.parse_timerange("20180122-20180124") + new_timerange = TimeRange("date", "date", 1516406400, 1516579200) freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) @@ -100,8 +105,8 @@ def test_get_timerange_from_ready_models(mocker, freqai_conf, model): pairs_end_dates) = get_timerange_and_assets_end_dates_from_ready_models(models_path=model_path) assert len(pairs_end_dates["ADA"]) == 2 - assert backtesting_timerange.startts == 1516492800 - assert backtesting_timerange.stopts == 1516924800 + assert backtesting_timerange.startts == 1516406400 + assert backtesting_timerange.stopts == 1516838400 backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) - assert backtesting_string_timerange == '20180121-20180126' + assert backtesting_string_timerange == '20180120-20180125' From 4e1bf79239935092d4a78db80b7ec70300b3e5d1 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 13 Oct 2022 15:47:31 -0300 Subject: [PATCH 16/22] backtest live models - documentation --- docs/freqai-running.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index f6aa7b2e1..b7d0b9843 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -73,12 +73,24 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to- To allow for tweaking your strategy (**not** the features!), FreqAI will automatically save the predictions during backtesting so that they can be reused for future backtests and live runs using the same `identifier` model. This provides a performance enhancement geared towards enabling **high-level hyperopting** of entry/exit criteria. -An additional directory called `predictions`, which contains all the predictions stored in `hdf` format, will be created in the `unique-id` folder. +An additional directory called `backtesting_predictions`, which contains all the predictions stored in `hdf` format, will be created in the `unique-id` folder. To change your **features**, you **must** set a new `identifier` in the config to signal to FreqAI to train new models. To save the models generated during a particular backtest so that you can start a live deployment from one of them instead of training a new model, you must set `save_backtest_models` to `True` in the config. +### Backtest live models + +FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `True` in the config. + +The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. + +Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution. + +!!! Note + Currently, there is no checking for expired models, even if the expired_hours parameter is set. + + ### Downloading data to cover the full backtest period For live/dry deployments, FreqAI will download the necessary data automatically. However, to use backtesting functionality, you need to download the necessary data using `download-data` (details [here](data-download.md#data-downloading)). You need to pay careful attention to understanding how much *additional* data needs to be downloaded to ensure that there is a sufficient amount of training data *before* the start of the backtesting time range. The amount of additional data can be roughly estimated by moving the start date of the time range backwards by `train_period_days` and the `startup_candle_count` (see the [parameter table](freqai-parameter-table.md) for detailed descriptions of these parameters) from the beginning of the desired backtesting time range. From 6606a0113f11570a15f61a3597a2619b5169d7c5 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 20 Oct 2022 14:53:25 -0300 Subject: [PATCH 17/22] refactoring - remove unnecessary config file --- freqtrade/freqai/data_kitchen.py | 114 +++++++++++++++++++--- freqtrade/freqai/freqai_util.py | 122 ------------------------ freqtrade/freqai/utils.py | 16 ++++ freqtrade/optimize/backtesting.py | 4 +- tests/freqai/test_freqai_datakitchen.py | 106 +++++++++++++++++++- tests/freqai/test_freqai_util.py | 112 ---------------------- 6 files changed, 224 insertions(+), 250 deletions(-) delete mode 100644 freqtrade/freqai/freqai_util.py delete mode 100644 tests/freqai/test_freqai_util.py diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index d2dc1fc63..ec0503f0b 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,7 +1,7 @@ import copy import logging import shutil -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from math import cos, sin from pathlib import Path from typing import Any, Dict, List, Tuple @@ -21,7 +21,6 @@ from freqtrade.configuration import TimeRange from freqtrade.constants import Config from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds -from freqtrade.freqai import freqai_util from freqtrade.strategy.interface import IStrategy @@ -84,16 +83,17 @@ class FreqaiDataKitchen: self.backtest_live_models = config.get("freqai_backtest_live_models", False) if not self.live: - self.full_path = freqai_util.get_full_models_path(self.config) - self.full_timerange = self.create_fulltimerange( - self.config["timerange"], self.freqai_config.get("train_period_days", 0) - ) + self.full_path = self.get_full_models_path(self.config) if self.backtest_live_models: - self.set_timerange_from_ready_models() - (self.training_timeranges, - self.backtesting_timeranges) = self.split_timerange_live_models() + if self.pair: + self.set_timerange_from_ready_models() + (self.training_timeranges, + self.backtesting_timeranges) = self.split_timerange_live_models() else: + self.full_timerange = self.create_fulltimerange( + self.config["timerange"], self.freqai_config.get("train_period_days", 0) + ) (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( self.full_timerange, config["freqai"]["train_period_days"], @@ -117,7 +117,7 @@ class FreqaiDataKitchen: :param metadata: dict = strategy furnished pair metadata :param trained_timestamp: int = timestamp of most recent training """ - self.full_path = freqai_util.get_full_models_path(self.config) + self.full_path = self.get_full_models_path(self.config) self.data_path = Path( self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}" @@ -1300,10 +1300,102 @@ class FreqaiDataKitchen: def set_timerange_from_ready_models(self): backtesting_timerange, \ assets_end_dates = ( - freqai_util.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) + self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) self.backtest_live_models_data = { "backtesting_timerange": backtesting_timerange, "assets_end_dates": assets_end_dates } return + + def get_full_models_path(self, config: Config) -> Path: + """ + Returns default FreqAI model path + :param config: Configuration dictionary + """ + freqai_config: Dict[str, Any] = config["freqai"] + return Path( + config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) + ) + + def get_timerange_and_assets_end_dates_from_ready_models( + self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: + """ + Returns timerange information based on a FreqAI model directory + :param models_path: FreqAI model path + + :return: a Tuple with (Timerange calculated from directory and + a Dict with pair and model end training dates info) + """ + all_models_end_dates = [] + assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models( + models_path) + for key in assets_end_dates: + for model_end_date in assets_end_dates[key]: + if model_end_date not in all_models_end_dates: + all_models_end_dates.append(model_end_date) + + if len(all_models_end_dates) == 0: + raise OperationalException( + 'At least 1 saved model is required to ' + 'run backtest with the freqai-backtest-live-models option' + ) + + if len(all_models_end_dates) == 1: + logger.warning( + "Only 1 model was found. Backtesting will run with the " + "timerange from the end of the training date to the current date" + ) + + finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) + if len(all_models_end_dates) > 1: + # After last model end date, use the same period from previous model + # to finish the backtest + all_models_end_dates.sort(reverse=True) + finish_timestamp = all_models_end_dates[0] + \ + (all_models_end_dates[0] - all_models_end_dates[1]) + + all_models_end_dates.append(finish_timestamp) + all_models_end_dates.sort() + start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) + end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) + + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date = end_date + timedelta(days=1) + backtesting_timerange = TimeRange( + 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) + ) + return backtesting_timerange, assets_end_dates + + def get_assets_timestamps_training_from_ready_models( + self, models_path: Path) -> Dict[str, Any]: + """ + Scan the models path and returns all assets end training dates (timestamp) + :param models_path: FreqAI model path + + :return: a Dict with asset and model end training dates info + """ + assets_end_dates: Dict[str, Any] = {} + if not models_path.is_dir(): + raise OperationalException( + 'Model folders not found. Saved models are required ' + 'to run backtest with the freqai-backtest-live-models option' + ) + for model_dir in models_path.iterdir(): + if str(model_dir.name).startswith("sub-train"): + model_end_date = int(model_dir.name.split("_")[1]) + asset = model_dir.name.split("_")[0].replace("sub-train-", "") + model_file_name = ( + f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" + "_model.joblib" + ) + + model_path_file = Path(model_dir / model_file_name) + if model_path_file.is_file(): + if asset not in assets_end_dates: + assets_end_dates[asset] = [] + assets_end_dates[asset].append(model_end_date) + + return assets_end_dates diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py deleted file mode 100644 index 23df61335..000000000 --- a/freqtrade/freqai/freqai_util.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -FreqAI generic functions -""" -import logging -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Dict, Tuple - -from freqtrade.configuration import TimeRange -from freqtrade.constants import Config -from freqtrade.exceptions import OperationalException - - -logger = logging.getLogger(__name__) - - -def get_full_models_path(config: Config) -> Path: - """ - Returns default FreqAI model path - :param config: Configuration dictionary - """ - freqai_config: Dict[str, Any] = config["freqai"] - return Path( - config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) - ) - - -def get_timerange_and_assets_end_dates_from_ready_models( - models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: - """ - Returns timerange information based on a FreqAI model directory - :param models_path: FreqAI model path - - :return: a Tuple with (Timerange calculated from directory and - a Dict with pair and model end training dates info) - """ - all_models_end_dates = [] - assets_end_dates: Dict[str, Any] = get_assets_timestamps_training_from_ready_models(models_path) - for key in assets_end_dates: - for model_end_date in assets_end_dates[key]: - if model_end_date not in all_models_end_dates: - all_models_end_dates.append(model_end_date) - - if len(all_models_end_dates) == 0: - raise OperationalException( - 'At least 1 saved model is required to ' - 'run backtest with the freqai-backtest-live-models option' - ) - - if len(all_models_end_dates) == 1: - logger.warning( - "Only 1 model was found. Backtesting will run with the " - "timerange from the end of the training date to the current date" - ) - - finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - if len(all_models_end_dates) > 1: - # After last model end date, use the same period from previous model - # to finish the backtest - all_models_end_dates.sort(reverse=True) - finish_timestamp = all_models_end_dates[0] + \ - (all_models_end_dates[0] - all_models_end_dates[1]) - - all_models_end_dates.append(finish_timestamp) - all_models_end_dates.sort() - start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - - # add 1 day to string timerange to ensure BT module will load all dataframe data - end_date = end_date + timedelta(days=1) - backtesting_timerange = TimeRange( - 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) - ) - return backtesting_timerange, assets_end_dates - - -def get_assets_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: - """ - Scan the models path and returns all assets end training dates (timestamp) - :param models_path: FreqAI model path - - :return: a Dict with asset and model end training dates info - """ - assets_end_dates: Dict[str, Any] = {} - if not models_path.is_dir(): - raise OperationalException( - 'Model folders not found. Saved models are required ' - 'to run backtest with the freqai-backtest-live-models option' - ) - for model_dir in models_path.iterdir(): - if str(model_dir.name).startswith("sub-train"): - model_end_date = int(model_dir.name.split("_")[1]) - asset = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = ( - f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" - "_model.joblib" - ) - - model_path_file = Path(model_dir / model_file_name) - if model_path_file.is_file(): - if asset not in assets_end_dates: - assets_end_dates[asset] = [] - assets_end_dates[asset].append(model_end_date) - - return assets_end_dates - - -def get_timerange_backtest_live_models(config: Config): - """ - Returns a formated timerange for backtest live/ready models - :param config: Configuration dictionary - - :return: a string timerange (format example: '20220801-20220822') - """ - models_path = get_full_models_path(config) - timerange, _ = get_timerange_and_assets_end_dates_from_ready_models(models_path) - start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) - end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) - tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" - return tr diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index 22bc1e06e..d3ba09592 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -191,3 +191,19 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen, fig.update_layout(title_text=f"Best and worst features by importance {pair}") label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path) + + +def get_timerange_backtest_live_models(config: Config): + """ + Returns a formated timerange for backtest live/ready models + :param config: Configuration dictionary + + :return: a string timerange (format example: '20220801-20220822') + """ + dk = FreqaiDataKitchen(config) + models_path = dk.get_full_models_path(config) + timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) + start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) + tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" + return tr diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index e789ece79..91d60847e 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -135,8 +135,8 @@ class Backtesting: self.precision_mode = self.exchange.precisionMode if self.config.get('freqai_backtest_live_models', False): - from freqtrade.freqai import freqai_util - self.config['timerange'] = freqai_util.get_timerange_backtest_live_models(self.config) + from freqtrade.freqai import utils + self.config['timerange'] = utils.get_timerange_backtest_live_models(self.config) self.timerange = TimeRange.parse_timerange( None if self.config.get('timerange') is None else str(self.config.get('timerange'))) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index f60b29bf1..43acae7c3 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -1,13 +1,18 @@ import shutil from datetime import datetime, timedelta, timezone from pathlib import Path +from unittest.mock import MagicMock import pytest +from freqtrade.configuration import TimeRange +from freqtrade.data.dataprovider import DataProvider from freqtrade.exceptions import OperationalException -from tests.conftest import log_has_re -from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary, - make_unfiltered_dataframe) +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.utils import get_timerange_backtest_live_models +from tests.conftest import get_patched_exchange, log_has_re +from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy, + make_data_dictionary, make_unfiltered_dataframe) @pytest.mark.parametrize( @@ -158,3 +163,98 @@ def test_make_train_test_datasets(mocker, freqai_conf): assert data_dictionary assert len(data_dictionary) == 7 assert len(data_dictionary['train_features'].index) == 1916 + + +def test_get_pairs_timestamp_validation(mocker, freqai_conf): + exchange = get_patched_exchange(mocker, freqai_conf) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai_conf['freqai'].update({"identifier": "invalid_id"}) + model_path = freqai.dk.get_full_models_path(freqai_conf) + with pytest.raises( + OperationalException, + match=r'.*required to run backtest with the freqai-backtest-live-models.*' + ): + freqai.dk.get_assets_timestamps_training_from_ready_models(model_path) + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_timerange_from_ready_models(mocker, freqai_conf, model): + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180101-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180101-20180130") + + # 1516233600 (2018-01-18 00:00) - Start Training 1 + # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) + # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) + # 1516838400 (2018-01-25 00:00) - End Timerange + + new_timerange = TimeRange("date", "date", 1516233600, 1516406400) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + new_timerange = TimeRange("date", "date", 1516406400, 1516579200) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = freqai.dk.get_full_models_path(freqai_conf) + (backtesting_timerange, + pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models( + models_path=model_path) + + assert len(pairs_end_dates["ADA"]) == 2 + assert backtesting_timerange.startts == 1516406400 + assert backtesting_timerange.stopts == 1516838400 + + backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) + assert backtesting_string_timerange == '20180120-20180125' + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_full_model_path(mocker, freqai_conf, model): + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = freqai.dk.get_full_models_path(freqai_conf) + assert model_path.is_dir() is True diff --git a/tests/freqai/test_freqai_util.py b/tests/freqai/test_freqai_util.py deleted file mode 100644 index c548fd3f6..000000000 --- a/tests/freqai/test_freqai_util.py +++ /dev/null @@ -1,112 +0,0 @@ -import platform -from unittest.mock import MagicMock - -import pytest - -from freqtrade.configuration import TimeRange -from freqtrade.data.dataprovider import DataProvider -from freqtrade.exceptions import OperationalException -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.freqai_util import (get_assets_timestamps_training_from_ready_models, - get_full_models_path, - get_timerange_and_assets_end_dates_from_ready_models, - get_timerange_backtest_live_models) -from tests.conftest import get_patched_exchange -from tests.freqai.conftest import get_patched_freqai_strategy - - -def is_arm() -> bool: - machine = platform.machine() - return "arm" in machine or "aarch64" in machine - - -@pytest.mark.parametrize('model', [ - 'LightGBMRegressor' - ]) -def test_get_full_model_path(mocker, freqai_conf, model): - if is_arm() and model == 'CatboostRegressor': - pytest.skip("CatBoost is not supported on ARM") - - freqai_conf.update({"freqaimodel": model}) - freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_strat"}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - - freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180110-20180130") - new_timerange = TimeRange.parse_timerange("20180120-20180130") - - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - model_path = get_full_models_path(freqai_conf) - assert model_path.is_dir() is True - - -def test_get_pairs_timestamp_validation(mocker, freqai_conf): - model_path = get_full_models_path(freqai_conf) - with pytest.raises( - OperationalException, - match=r'.*required to run backtest with the freqai-backtest-live-models.*' - ): - get_assets_timestamps_training_from_ready_models(model_path) - - -@pytest.mark.parametrize('model', [ - 'LightGBMRegressor' - ]) -def test_get_timerange_from_ready_models(mocker, freqai_conf, model): - if is_arm() and model == 'CatboostRegressor': - pytest.skip("CatBoost is not supported on ARM") - - freqai_conf.update({"freqaimodel": model}) - freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_strat"}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180101-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - - freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180101-20180130") - - # 1516233600 (2018-01-18 00:00) - Start Training 1 - # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) - # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) - # 1516838400 (2018-01-25 00:00) - End Timerange - - new_timerange = TimeRange("date", "date", 1516233600, 1516406400) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - new_timerange = TimeRange("date", "date", 1516406400, 1516579200) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - model_path = get_full_models_path(freqai_conf) - (backtesting_timerange, - pairs_end_dates) = get_timerange_and_assets_end_dates_from_ready_models(models_path=model_path) - - assert len(pairs_end_dates["ADA"]) == 2 - assert backtesting_timerange.startts == 1516406400 - assert backtesting_timerange.stopts == 1516838400 - - backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) - assert backtesting_string_timerange == '20180120-20180125' From 356d79b38aabf37547b1e45d25e4f5c7ecb21d7e Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 3 Nov 2022 13:27:56 -0300 Subject: [PATCH 18/22] verify mean and std exists in model metadata --- freqtrade/freqai/data_kitchen.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ec0503f0b..dbb25bf7d 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -981,8 +981,10 @@ class FreqaiDataKitchen: append_df[label] = predictions[label] if append_df[label].dtype == object: continue - append_df[f"{label}_mean"] = self.data["labels_mean"][label] - append_df[f"{label}_std"] = self.data["labels_std"][label] + if "labels_mean" in self.data: + append_df[f"{label}_mean"] = self.data["labels_mean"][label] + if "labels_std" in self.data: + append_df[f"{label}_std"] = self.data["labels_std"][label] append_df["do_predict"] = do_predict if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: From a7acfb7ab77fda2603d0636c8ab6e2e45c1a10f0 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Fri, 4 Nov 2022 09:02:28 -0300 Subject: [PATCH 19/22] fix dict key error --- freqtrade/freqai/data_kitchen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 7241bb48d..6daf41c2a 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -248,7 +248,7 @@ class FreqaiDataKitchen: self.data["filter_drop_index_training"] = drop_index else: - if len(self.data['constant_features_list']): + if 'constant_features_list' in self.data and len(self.data['constant_features_list']): filtered_df = self.check_pred_labels(filtered_df) # we are backtesting so we need to preserve row number to send back to strategy, # so now we use do_predict to avoid any prediction based on a NaN From 19d90b813a19f808a511a83fe6cea77e58294120 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 4 Nov 2022 16:10:46 +0100 Subject: [PATCH 20/22] improve readibility in start_backtesting() --- freqtrade/freqai/data_kitchen.py | 6 +-- freqtrade/freqai/freqai_interface.py | 77 ++++++++++++++++++---------- 2 files changed, 53 insertions(+), 30 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index d5b9da1aa..35148a6e2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1338,11 +1338,11 @@ class FreqaiDataKitchen: def check_if_backtest_prediction_is_valid( self, - length_backtesting_dataframe: int + len_backtest_df: int ) -> bool: """ Check if a backtesting prediction already exists and if the predictions - to append has the same size of backtesting dataframe slice + to append have the same size as the backtesting dataframe slice :param length_backtesting_dataframe: Length of backtesting dataframe slice :return: :boolean: whether the prediction file is valid. @@ -1356,7 +1356,7 @@ class FreqaiDataKitchen: if file_exists: append_df = self.get_backtesting_prediction() - if len(append_df) == length_backtesting_dataframe: + if len(append_df) == len_backtest_df: logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") return True else: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 8adb8b3ce..e7f573077 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -261,35 +261,12 @@ class IFreqaiModel(ABC): dataframe_train = dk.slice_dataframe(tr_train, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) - if dk.backtest_live_models and len(dataframe_backtest) == 0: - tr_backtest_startts_str = datetime.fromtimestamp( - tr_backtest.startts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - tr_backtest_stopts_str = datetime.fromtimestamp( - tr_backtest.stopts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} " - f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. " - "Probably more than one training within the same candle period.") + if not self.ensure_data_exists(dataframe_backtest, tr_backtest, pair): continue - trained_timestamp = tr_train - tr_train_startts_str = datetime.fromtimestamp( - tr_train.startts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) - tr_train_stopts_str = datetime.fromtimestamp( - tr_train.stopts, - tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + self.log_backtesting_progress(tr_train, pair, train_it, total_trains) - if not dk.backtest_live_models: - logger.info( - f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" - f" from {tr_train_startts_str} " - f"to {tr_train_stopts_str}, {train_it}/{total_trains} " - "trains" - ) - - timestamp_model_id = int(trained_timestamp.stopts) + timestamp_model_id = int(tr_train.stopts) if dk.backtest_live_models: timestamp_model_id = int(tr_backtest.startts) @@ -309,7 +286,7 @@ class IFreqaiModel(ABC): dk.find_labels(dataframe_train) self.model = self.train(dataframe_train, pair, dk) self.dd.pair_dict[pair]["trained_timestamp"] = int( - trained_timestamp.stopts) + tr_train.stopts) if self.plot_features: plot_feature_importance(self.model, pair, dk, self.plot_features) if self.save_backtest_models: @@ -788,6 +765,52 @@ class IFreqaiModel(ABC): return dataframe + def ensure_data_exists(self, dataframe_backtest: DataFrame, + tr_backtest: TimeRange, pair: str) -> bool: + """ + Check if the dataframe is empty, if not, report useful information to user. + :param dataframe_backtest: the backtesting dataframe, maybe empty. + :param tr_backtest: current backtesting timerange. + :param pair: current pair + :return: if the data exists or not + """ + if self.config.get("freqai_backtest_live_models", False) and len(dataframe_backtest) == 0: + tr_backtest_startts_str = datetime.fromtimestamp( + tr_backtest.startts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + tr_backtest_stopts_str = datetime.fromtimestamp( + tr_backtest.stopts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} " + f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. " + "Probably more than one training within the same candle period.") + return True + return False + + def log_backtesting_progress(self, tr_train: TimeRange, pair: str, + train_it: int, total_trains: int): + """ + Log the backtesting progress so user knows how many pairs have been trained and + hoe many more pairs/trains remain. + :param tr_train: the training timerange + :param train_it: the train iteration for the current pair (the sliding window progress) + :param pair: the current pair + :param total_trains: total trains (total number of slides for the sliding window) + """ + tr_train_startts_str = datetime.fromtimestamp( + tr_train.startts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + tr_train_stopts_str = datetime.fromtimestamp( + tr_train.stopts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + + if not self.config.get("freqai_backtest_live_models", False): + logger.info( + f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" + f" from {tr_train_startts_str} " + f"to {tr_train_stopts_str}, {train_it}/{total_trains} " + "trains" + ) # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. From 8bdc99a3d654f0536eac1b5fa520a8f6b8fbdbc6 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 4 Nov 2022 16:41:38 +0100 Subject: [PATCH 21/22] fix self-imposed bug --- freqtrade/freqai/freqai_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index e7f573077..ee0fbebc3 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -784,8 +784,8 @@ class IFreqaiModel(ABC): logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} " f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. " "Probably more than one training within the same candle period.") - return True - return False + return False + return True def log_backtesting_progress(self, tr_train: TimeRange, pair: str, train_it: int, total_trains: int): From 884014a4b96a57aec3114e77f4b4badc1dfac0dd Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 7 Nov 2022 18:35:28 +0000 Subject: [PATCH 22/22] Fix some minor typos --- docs/freqai-running.md | 2 +- freqtrade/freqai/freqai_interface.py | 2 +- freqtrade/freqai/utils.py | 2 +- freqtrade/optimize/backtesting.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/freqai-running.md b/docs/freqai-running.md index 693fe087c..f97ed0ab4 100644 --- a/docs/freqai-running.md +++ b/docs/freqai-running.md @@ -88,7 +88,7 @@ The `--timerange` parameter must not be informed, as it will be automatically ca Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution. !!! Note - Currently, there is no checking for expired models, even if the expired_hours parameter is set. + Currently, there is no checking for expired models, even if the `expired_hours` parameter is set. ### Downloading data to cover the full backtest period diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index c0be5a69e..ae123f852 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -804,7 +804,7 @@ class IFreqaiModel(ABC): train_it: int, total_trains: int): """ Log the backtesting progress so user knows how many pairs have been trained and - hoe many more pairs/trains remain. + how many more pairs/trains remain. :param tr_train: the training timerange :param train_it: the train iteration for the current pair (the sliding window progress) :param pair: the current pair diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index 5a04497d0..e854bcf0b 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -220,7 +220,7 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None: ) -def get_timerange_backtest_live_models(config: Config): +def get_timerange_backtest_live_models(config: Config) -> str: """ Returns a formated timerange for backtest live/ready models :param config: Configuration dictionary diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 91d60847e..3436eac44 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -135,8 +135,8 @@ class Backtesting: self.precision_mode = self.exchange.precisionMode if self.config.get('freqai_backtest_live_models', False): - from freqtrade.freqai import utils - self.config['timerange'] = utils.get_timerange_backtest_live_models(self.config) + from freqtrade.freqai.utils import get_timerange_backtest_live_models + self.config['timerange'] = get_timerange_backtest_live_models(self.config) self.timerange = TimeRange.parse_timerange( None if self.config.get('timerange') is None else str(self.config.get('timerange')))