Merge pull request #7325 from wagnercosta/develop
Improve Freqai backtesting performance
This commit is contained in:
@@ -69,6 +69,8 @@ class FreqaiDataKitchen:
|
||||
self.label_list: List = []
|
||||
self.training_features_list: List = []
|
||||
self.model_filename: str = ""
|
||||
self.backtesting_results_path = Path()
|
||||
self.backtest_predictions_folder: str = "backtesting_predictions"
|
||||
self.live = live
|
||||
self.pair = pair
|
||||
|
||||
@@ -778,9 +780,10 @@ class FreqaiDataKitchen:
|
||||
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
|
||||
return weights
|
||||
|
||||
def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
|
||||
def get_predictions_to_append(self, predictions: DataFrame,
|
||||
do_predict: npt.ArrayLike) -> DataFrame:
|
||||
"""
|
||||
Append backtest prediction from current backtest period to all previous periods
|
||||
Get backtest prediction from current backtest period
|
||||
"""
|
||||
|
||||
append_df = DataFrame()
|
||||
@@ -795,13 +798,18 @@ class FreqaiDataKitchen:
|
||||
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
|
||||
append_df["DI_values"] = self.DI_values
|
||||
|
||||
return append_df
|
||||
|
||||
def append_predictions(self, append_df: DataFrame) -> None:
|
||||
"""
|
||||
Append backtest prediction from current backtest period to all previous periods
|
||||
"""
|
||||
|
||||
if self.full_df.empty:
|
||||
self.full_df = append_df
|
||||
else:
|
||||
self.full_df = pd.concat([self.full_df, append_df], axis=0)
|
||||
|
||||
return
|
||||
|
||||
def fill_predictions(self, dataframe):
|
||||
"""
|
||||
Back fill values to before the backtesting range so that the dataframe matches size
|
||||
@@ -1060,3 +1068,50 @@ class FreqaiDataKitchen:
|
||||
if self.unique_classes:
|
||||
for label in self.unique_classes:
|
||||
self.unique_class_list += list(self.unique_classes[label])
|
||||
|
||||
def save_backtesting_prediction(
|
||||
self, append_df: DataFrame
|
||||
) -> None:
|
||||
|
||||
"""
|
||||
Save prediction dataframe from backtesting to h5 file format
|
||||
:param append_df: dataframe for backtesting period
|
||||
"""
|
||||
full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder)
|
||||
if not full_predictions_folder.is_dir():
|
||||
full_predictions_folder.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
|
||||
|
||||
def get_backtesting_prediction(
|
||||
self
|
||||
) -> DataFrame:
|
||||
|
||||
"""
|
||||
Get prediction dataframe from h5 file format
|
||||
"""
|
||||
append_df = pd.read_hdf(self.backtesting_results_path)
|
||||
return append_df
|
||||
|
||||
def check_if_backtest_prediction_exists(
|
||||
self
|
||||
) -> bool:
|
||||
"""
|
||||
Check if a backtesting prediction already exists
|
||||
:param dk: FreqaiDataKitchen
|
||||
:return:
|
||||
:boolean: whether the prediction file exists or not.
|
||||
"""
|
||||
path_to_predictionfile = Path(self.full_path /
|
||||
self.backtest_predictions_folder /
|
||||
f"{self.model_filename}_prediction.h5")
|
||||
self.backtesting_results_path = path_to_predictionfile
|
||||
|
||||
file_exists = path_to_predictionfile.is_file()
|
||||
if file_exists:
|
||||
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
|
||||
else:
|
||||
logger.info(
|
||||
f"Could not find backtesting prediction file at {path_to_predictionfile}"
|
||||
)
|
||||
return file_exists
|
||||
|
@@ -71,6 +71,9 @@ class IFreqaiModel(ABC):
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
|
||||
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
|
||||
if self.save_backtest_models:
|
||||
logger.info('Backtesting module configured to save all models.')
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.scanning = False
|
||||
@@ -124,10 +127,9 @@ class IFreqaiModel(ABC):
|
||||
elif not self.follow_mode:
|
||||
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
|
||||
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
|
||||
with self.analysis_lock:
|
||||
dataframe = self.dk.use_strategy_to_populate_indicators(
|
||||
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
|
||||
)
|
||||
dataframe = self.dk.use_strategy_to_populate_indicators(
|
||||
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
|
||||
)
|
||||
dk = self.start_backtesting(dataframe, metadata, self.dk)
|
||||
|
||||
dataframe = dk.remove_features_from_df(dk.return_dataframe)
|
||||
@@ -224,28 +226,39 @@ class IFreqaiModel(ABC):
|
||||
"trains"
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path
|
||||
/
|
||||
f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
|
||||
f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
if not self.model_exists(
|
||||
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
|
||||
):
|
||||
dk.find_features(dataframe_train)
|
||||
self.model = self.train(dataframe_train, metadata["pair"], dk)
|
||||
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
|
||||
trained_timestamp.stopts)
|
||||
dk.set_new_model_names(metadata["pair"], trained_timestamp)
|
||||
self.dd.save_data(self.model, metadata["pair"], dk)
|
||||
|
||||
dk.set_new_model_names(metadata["pair"], trained_timestamp)
|
||||
|
||||
if dk.check_if_backtest_prediction_exists():
|
||||
append_df = dk.get_backtesting_prediction()
|
||||
dk.append_predictions(append_df)
|
||||
else:
|
||||
self.model = self.dd.load_data(metadata["pair"], dk)
|
||||
if not self.model_exists(
|
||||
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
|
||||
):
|
||||
dk.find_features(dataframe_train)
|
||||
self.model = self.train(dataframe_train, metadata["pair"], dk)
|
||||
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
|
||||
trained_timestamp.stopts)
|
||||
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
if self.save_backtest_models:
|
||||
logger.info('Saving backtest model to disk.')
|
||||
self.dd.save_data(self.model, metadata["pair"], dk)
|
||||
else:
|
||||
self.model = self.dd.load_data(metadata["pair"], dk)
|
||||
|
||||
pred_df, do_preds = self.predict(dataframe_backtest, dk)
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
|
||||
dk.append_predictions(pred_df, do_preds)
|
||||
pred_df, do_preds = self.predict(dataframe_backtest, dk)
|
||||
append_df = dk.get_predictions_to_append(pred_df, do_preds)
|
||||
dk.append_predictions(append_df)
|
||||
dk.save_backtesting_prediction(append_df)
|
||||
|
||||
dk.fill_predictions(dataframe)
|
||||
|
||||
@@ -456,11 +469,6 @@ class IFreqaiModel(ABC):
|
||||
:return:
|
||||
:boolean: whether the model file exists or not.
|
||||
"""
|
||||
coin, _ = pair.split("/")
|
||||
|
||||
if not self.live:
|
||||
dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
|
||||
|
||||
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
|
||||
file_exists = path_to_modelfile.is_file()
|
||||
if file_exists and not scanning:
|
||||
|
Reference in New Issue
Block a user