From 7f116db95e187cbadebcc5fca2274bc64a277bb7 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Sat, 24 Sep 2022 13:01:53 -0300 Subject: [PATCH] added generic function to get timerange from existent models --- freqtrade/freqai/data_kitchen.py | 16 ++++- freqtrade/freqai/freqai_interface.py | 91 +++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 4 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 58279c7e3..b9fc3f1b2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -99,6 +99,7 @@ class FreqaiDataKitchen: self.train_dates: DataFrame = pd.DataFrame() self.unique_classes: Dict[str, list] = {} self.unique_class_list: list = [] + self.backtest_live_models_data: Dict[str, Any] = {} def set_paths( self, @@ -1262,7 +1263,15 @@ class FreqaiDataKitchen: return file_exists def get_timerange_from_ready_models(self): - return self.gen_get_timerange_from_ready_models(self.full_path) + backtesting_timerange, \ + backtesting_string_timerange, \ + pairs_end_dates = self.gen_get_timerange_from_ready_models(self.full_path) + self.backtest_live_models_data = { + "backtesting_timerange": backtesting_timerange, + "backtesting_string_timerange": backtesting_string_timerange, + "pairs_end_dates": pairs_end_dates + } + return def gen_get_timerange_from_ready_models(self, models_path: Path): all_models_end_dates = [] @@ -1271,8 +1280,9 @@ class FreqaiDataKitchen: if str(model_dir.name).startswith("sub-train"): model_end_date = model_dir.name.split("_")[1] pair = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = f"cb\ - _{str(model_dir.name).replace('sub-train-', '').lower()}_model.joblib" + model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}") + model_file_name = f"{model_file_name}_model.joblib" + model_path_file = Path(model_dir / model_file_name) if model_path_file.is_file(): if pair not in pairs_end_dates: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 2315d4017..dda9b8bcc 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -139,7 +139,12 @@ class IFreqaiModel(ABC): dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"] ) - dk = self.start_backtesting(dataframe, metadata, self.dk) + + backtest_live_models = True # temp + if not backtest_live_models: + dk = self.start_backtesting(dataframe, metadata, self.dk) + else: + dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) dataframe = dk.remove_features_from_df(dk.return_dataframe) self.clean_up() @@ -304,6 +309,90 @@ class IFreqaiModel(ABC): return dk + def start_backtesting_live_models( + self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen + ) -> FreqaiDataKitchen: + """ + The main broad execution for backtesting. For backtesting, each pair enters and then gets + trained for each window along the sliding window defined by "train_period_days" + (training window) and "backtest_period_days" (backtest window, i.e. window immediately + following the training window). FreqAI slides the window and sequentially builds + the backtesting results before returning the concatenated results for the full + backtesting period back to the strategy. + :param dataframe: DataFrame = strategy passed dataframe + :param metadata: Dict = pair metadata + :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only + :return: + FreqaiDataKitchen = Data management/analysis tool associated to present pair only + """ + + self.pair_it += 1 + train_it = 0 + # Loop enforcing the sliding window training/backtesting paradigm + # tr_train is the training time range e.g. 1 historical month + # tr_backtest is the backtesting time range e.g. the week directly + # following tr_train. Both of these windows slide through the + # entire backtest + for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges): + pair = metadata["pair"] + (_, _, _) = self.dd.get_pair_dict_info(pair) + train_it += 1 + total_trains = len(dk.backtesting_timeranges) + self.training_timerange = tr_train + dataframe_train = dk.slice_dataframe(tr_train, dataframe) + dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) + + trained_timestamp = tr_train + tr_train_startts_str = datetime.fromtimestamp( + tr_train.startts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + tr_train_stopts_str = datetime.fromtimestamp( + tr_train.stopts, + tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) + logger.info( + f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" + f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " + "trains" + ) + + trained_timestamp_int = int(trained_timestamp.stopts) + dk.data_path = Path( + dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}" + ) + + dk.set_new_model_names(pair, trained_timestamp) + + if dk.check_if_backtest_prediction_exists(): + self.dd.load_metadata(dk) + self.check_if_feature_list_matches_strategy(dataframe_train, dk) + append_df = dk.get_backtesting_prediction() + dk.append_predictions(append_df) + else: + if not self.model_exists( + pair, dk, trained_timestamp=trained_timestamp_int + ): + dk.find_features(dataframe_train) + self.model = self.train(dataframe_train, pair, dk) + self.dd.pair_dict[pair]["trained_timestamp"] = int( + trained_timestamp.stopts) + + if self.save_backtest_models: + logger.info('Saving backtest model to disk.') + self.dd.save_data(self.model, pair, dk) + else: + self.model = self.dd.load_data(pair, dk) + + self.check_if_feature_list_matches_strategy(dataframe_train, dk) + + pred_df, do_preds = self.predict(dataframe_backtest, dk) + append_df = dk.get_predictions_to_append(pred_df, do_preds) + dk.append_predictions(append_df) + dk.save_backtesting_prediction(append_df) + + dk.fill_predictions(dataframe) + + return dk + def start_live( self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen ) -> FreqaiDataKitchen: