refactoring freqai backtesting

This commit is contained in:
Wagner Costa Santos 2022-08-31 11:23:48 -03:00
parent 4aec2db14d
commit df51da22ee
2 changed files with 119 additions and 16 deletions

View File

@ -1,6 +1,7 @@
import copy import copy
import datetime import datetime
import logging import logging
import os
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
@ -780,9 +781,10 @@ class FreqaiDataKitchen:
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1] weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights return weights
def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None: def get_predictions_to_append(self, predictions: DataFrame,
do_predict: npt.ArrayLike) -> DataFrame:
""" """
Append backtest prediction from current backtest period to all previous periods Get backtest prediction from current backtest period
""" """
append_df = DataFrame() append_df = DataFrame()
@ -797,12 +799,19 @@ class FreqaiDataKitchen:
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values append_df["DI_values"] = self.DI_values
return append_df
def append_predictions(self, append_df: DataFrame) -> None:
"""
Append backtest prediction from current backtest period to all previous periods
"""
if self.full_df.empty: if self.full_df.empty:
self.full_df = append_df self.full_df = append_df
else: else:
self.full_df = pd.concat([self.full_df, append_df], axis=0) self.full_df = pd.concat([self.full_df, append_df], axis=0)
return return append_df
def fill_predictions(self, dataframe): def fill_predictions(self, dataframe):
""" """
@ -1089,3 +1098,25 @@ class FreqaiDataKitchen:
if self.unique_classes: if self.unique_classes:
for label in self.unique_classes: for label in self.unique_classes:
self.unique_class_list += list(self.unique_classes[label]) self.unique_class_list += list(self.unique_classes[label])
def save_backtesting_prediction(
self, file_name: str, root_folder: str, append_df: DataFrame
) -> None:
"""
Save prediction dataframe from backtesting to h5 file format
:param file_name: h5 file name
:param root_folder: folder to save h5 file
"""
os.makedirs(root_folder, exist_ok=True)
append_df.to_hdf(file_name, key='append_df', mode='w')
def get_backtesting_prediction(self, prediction_file_name: str) -> DataFrame:
"""
Retrive from disk the prediction dataframe
:param prediction_file_name: prediction file full path
:return:
:Dataframe: Backtesting prediction from current backtesting period
"""
append_df = pd.read_hdf(prediction_file_name)
return append_df

View File

@ -224,28 +224,50 @@ class IFreqaiModel(ABC):
"trains" "trains"
) )
trained_timestamp_int = int(trained_timestamp.stopts)
dk.data_path = Path( dk.data_path = Path(
dk.full_path dk.full_path
/ /
f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}" f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
) )
if not self.model_exists(
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts) if self.backtest_prediction_exists(
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
): ):
dk.find_features(dataframe_train) prediction_filename, _ = self.get_backtesting_prediction_file_name(
self.model = self.train(dataframe_train, metadata["pair"], dk) metadata["pair"],
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int( dk,
trained_timestamp.stopts) trained_timestamp=int(trained_timestamp.stopts))
dk.set_new_model_names(metadata["pair"], trained_timestamp)
self.dd.save_data(self.model, metadata["pair"], dk) append_df = dk.get_backtesting_prediction(prediction_filename)
dk.append_predictions(append_df)
else: else:
self.model = self.dd.load_data(metadata["pair"], dk) if not self.model_exists(
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, metadata["pair"], dk)
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
trained_timestamp.stopts)
dk.set_new_model_names(metadata["pair"], trained_timestamp)
self.dd.save_data(self.model, metadata["pair"], dk)
else:
self.model = self.dd.load_data(metadata["pair"], dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk) self.check_if_feature_list_matches_strategy(dataframe_train, dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds)
dk.append_predictions(append_df)
dk.append_predictions(pred_df, do_preds) prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
metadata["pair"],
dk,
trained_timestamp_int)
dk.save_backtesting_prediction(prediction_file_name,
root_prediction,
append_df)
dk.fill_predictions(dataframe) dk.fill_predictions(dataframe)
@ -643,6 +665,56 @@ class IFreqaiModel(ABC):
self.train_time = 0 self.train_time = 0
return return
def backtest_prediction_exists(
self,
pair: str,
dk: FreqaiDataKitchen,
trained_timestamp: int,
scanning: bool = False,
) -> bool:
"""
Given a pair and path, check if a backtesting prediction already exists
:param pair: pair e.g. BTC/USD
:param path: path to prediction
:return:
:boolean: whether the prediction file exists or not.
"""
if not self.live:
prediction_file_name, _ = self.get_backtesting_prediction_file_name(
pair, dk, trained_timestamp
)
path_to_predictionfile = Path(prediction_file_name)
file_exists = path_to_predictionfile.is_file()
if file_exists and not scanning:
logger.info("Found backtesting prediction file at %s", prediction_file_name)
elif not scanning:
logger.info(
"Could not find backtesting prediction file at %s", prediction_file_name
)
return file_exists
else:
return False
def get_backtesting_prediction_file_name(
self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int
):
"""
Given a pair, path and a trained timestamp,
returns the path and name of the predictions file
:param pair: pair e.g. BTC/USD
:param dk: FreqaiDataKitchen
:trained_timestamp: current backtesting timestamp period
:return:
:str: prediction file name
:str: prediction root path
"""
coin, _ = pair.split("/")
prediction_base_filename = f"{coin.lower()}_{trained_timestamp}"
root_prediction = f'{dk.full_path}/backtesting_predictions'
prediction_file_name = f"{root_prediction}/{prediction_base_filename}_predictions.h5"
return prediction_file_name, root_prediction
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example. # See freqai/prediction_models/CatboostPredictionModel.py for an example.