From df51da22ee699e9a362d980747dba28e578d6c47 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Wed, 31 Aug 2022 11:23:48 -0300 Subject: [PATCH] refactoring freqai backtesting --- freqtrade/freqai/data_kitchen.py | 37 ++++++++++- freqtrade/freqai/freqai_interface.py | 98 ++++++++++++++++++++++++---- 2 files changed, 119 insertions(+), 16 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 763a07375..80b795b8e 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,6 +1,7 @@ import copy import datetime import logging +import os import shutil from pathlib import Path from typing import Any, Dict, List, Tuple @@ -780,9 +781,10 @@ class FreqaiDataKitchen: weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1] return weights - def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None: + def get_predictions_to_append(self, predictions: DataFrame, + do_predict: npt.ArrayLike) -> DataFrame: """ - Append backtest prediction from current backtest period to all previous periods + Get backtest prediction from current backtest period """ append_df = DataFrame() @@ -797,12 +799,19 @@ class FreqaiDataKitchen: if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: append_df["DI_values"] = self.DI_values + return append_df + + def append_predictions(self, append_df: DataFrame) -> None: + """ + Append backtest prediction from current backtest period to all previous periods + """ + if self.full_df.empty: self.full_df = append_df else: self.full_df = pd.concat([self.full_df, append_df], axis=0) - return + return append_df def fill_predictions(self, dataframe): """ @@ -1089,3 +1098,25 @@ class FreqaiDataKitchen: if self.unique_classes: for label in self.unique_classes: self.unique_class_list += list(self.unique_classes[label]) + + def save_backtesting_prediction( + self, file_name: str, root_folder: str, append_df: DataFrame + ) -> None: + + """ + Save prediction dataframe from backtesting to h5 file format + :param file_name: h5 file name + :param root_folder: folder to save h5 file + """ + os.makedirs(root_folder, exist_ok=True) + append_df.to_hdf(file_name, key='append_df', mode='w') + + def get_backtesting_prediction(self, prediction_file_name: str) -> DataFrame: + """ + Retrive from disk the prediction dataframe + :param prediction_file_name: prediction file full path + :return: + :Dataframe: Backtesting prediction from current backtesting period + """ + append_df = pd.read_hdf(prediction_file_name) + return append_df diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 4106f24e0..d396113e8 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -224,28 +224,50 @@ class IFreqaiModel(ABC): "trains" ) + trained_timestamp_int = int(trained_timestamp.stopts) dk.data_path = Path( dk.full_path / - f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}" + f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}" ) - if not self.model_exists( - metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts) + + if self.backtest_prediction_exists( + metadata["pair"], dk, trained_timestamp=trained_timestamp_int ): - dk.find_features(dataframe_train) - self.model = self.train(dataframe_train, metadata["pair"], dk) - self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int( - trained_timestamp.stopts) - dk.set_new_model_names(metadata["pair"], trained_timestamp) - self.dd.save_data(self.model, metadata["pair"], dk) + prediction_filename, _ = self.get_backtesting_prediction_file_name( + metadata["pair"], + dk, + trained_timestamp=int(trained_timestamp.stopts)) + + append_df = dk.get_backtesting_prediction(prediction_filename) + dk.append_predictions(append_df) else: - self.model = self.dd.load_data(metadata["pair"], dk) + if not self.model_exists( + metadata["pair"], dk, trained_timestamp=trained_timestamp_int + ): + dk.find_features(dataframe_train) + self.model = self.train(dataframe_train, metadata["pair"], dk) + self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int( + trained_timestamp.stopts) + dk.set_new_model_names(metadata["pair"], trained_timestamp) + self.dd.save_data(self.model, metadata["pair"], dk) + else: + self.model = self.dd.load_data(metadata["pair"], dk) - self.check_if_feature_list_matches_strategy(dataframe_train, dk) + self.check_if_feature_list_matches_strategy(dataframe_train, dk) - pred_df, do_preds = self.predict(dataframe_backtest, dk) + pred_df, do_preds = self.predict(dataframe_backtest, dk) + append_df = dk.get_predictions_to_append(pred_df, do_preds) + dk.append_predictions(append_df) - dk.append_predictions(pred_df, do_preds) + prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name( + metadata["pair"], + dk, + trained_timestamp_int) + + dk.save_backtesting_prediction(prediction_file_name, + root_prediction, + append_df) dk.fill_predictions(dataframe) @@ -643,6 +665,56 @@ class IFreqaiModel(ABC): self.train_time = 0 return + def backtest_prediction_exists( + self, + pair: str, + dk: FreqaiDataKitchen, + trained_timestamp: int, + scanning: bool = False, + ) -> bool: + """ + Given a pair and path, check if a backtesting prediction already exists + :param pair: pair e.g. BTC/USD + :param path: path to prediction + :return: + :boolean: whether the prediction file exists or not. + """ + if not self.live: + prediction_file_name, _ = self.get_backtesting_prediction_file_name( + pair, dk, trained_timestamp + ) + path_to_predictionfile = Path(prediction_file_name) + + file_exists = path_to_predictionfile.is_file() + if file_exists and not scanning: + logger.info("Found backtesting prediction file at %s", prediction_file_name) + elif not scanning: + logger.info( + "Could not find backtesting prediction file at %s", prediction_file_name + ) + return file_exists + else: + return False + + def get_backtesting_prediction_file_name( + self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int + ): + """ + Given a pair, path and a trained timestamp, + returns the path and name of the predictions file + :param pair: pair e.g. BTC/USD + :param dk: FreqaiDataKitchen + :trained_timestamp: current backtesting timestamp period + :return: + :str: prediction file name + :str: prediction root path + """ + coin, _ = pair.split("/") + prediction_base_filename = f"{coin.lower()}_{trained_timestamp}" + root_prediction = f'{dk.full_path}/backtesting_predictions' + prediction_file_name = f"{root_prediction}/{prediction_base_filename}_predictions.h5" + return prediction_file_name, root_prediction + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example.