Merge pull request #7325 from wagnercosta/develop

Improve Freqai backtesting performance
This commit is contained in:
Robert Caulk
2022-09-03 19:28:04 +02:00
committed by GitHub
6 changed files with 121 additions and 47 deletions

View File

@@ -69,6 +69,8 @@ class FreqaiDataKitchen:
self.label_list: List = []
self.training_features_list: List = []
self.model_filename: str = ""
self.backtesting_results_path = Path()
self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live
self.pair = pair
@@ -778,9 +780,10 @@ class FreqaiDataKitchen:
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights
def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
def get_predictions_to_append(self, predictions: DataFrame,
do_predict: npt.ArrayLike) -> DataFrame:
"""
Append backtest prediction from current backtest period to all previous periods
Get backtest prediction from current backtest period
"""
append_df = DataFrame()
@@ -795,13 +798,18 @@ class FreqaiDataKitchen:
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values
return append_df
def append_predictions(self, append_df: DataFrame) -> None:
"""
Append backtest prediction from current backtest period to all previous periods
"""
if self.full_df.empty:
self.full_df = append_df
else:
self.full_df = pd.concat([self.full_df, append_df], axis=0)
return
def fill_predictions(self, dataframe):
"""
Back fill values to before the backtesting range so that the dataframe matches size
@@ -1060,3 +1068,50 @@ class FreqaiDataKitchen:
if self.unique_classes:
for label in self.unique_classes:
self.unique_class_list += list(self.unique_classes[label])
def save_backtesting_prediction(
self, append_df: DataFrame
) -> None:
"""
Save prediction dataframe from backtesting to h5 file format
:param append_df: dataframe for backtesting period
"""
full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder)
if not full_predictions_folder.is_dir():
full_predictions_folder.mkdir(parents=True, exist_ok=True)
append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
def get_backtesting_prediction(
self
) -> DataFrame:
"""
Get prediction dataframe from h5 file format
"""
append_df = pd.read_hdf(self.backtesting_results_path)
return append_df
def check_if_backtest_prediction_exists(
self
) -> bool:
"""
Check if a backtesting prediction already exists
:param dk: FreqaiDataKitchen
:return:
:boolean: whether the prediction file exists or not.
"""
path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder /
f"{self.model_filename}_prediction.h5")
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
else:
logger.info(
f"Could not find backtesting prediction file at {path_to_predictionfile}"
)
return file_exists

View File

@@ -71,6 +71,9 @@ class IFreqaiModel(ABC):
self.first = True
self.set_full_path()
self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
@@ -124,10 +127,9 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
with self.analysis_lock:
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
@@ -224,28 +226,39 @@ class IFreqaiModel(ABC):
"trains"
)
trained_timestamp_int = int(trained_timestamp.stopts)
dk.data_path = Path(
dk.full_path
/
f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
)
if not self.model_exists(
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, metadata["pair"], dk)
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
trained_timestamp.stopts)
dk.set_new_model_names(metadata["pair"], trained_timestamp)
self.dd.save_data(self.model, metadata["pair"], dk)
dk.set_new_model_names(metadata["pair"], trained_timestamp)
if dk.check_if_backtest_prediction_exists():
append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df)
else:
self.model = self.dd.load_data(metadata["pair"], dk)
if not self.model_exists(
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, metadata["pair"], dk)
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
trained_timestamp.stopts)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
if self.save_backtest_models:
logger.info('Saving backtest model to disk.')
self.dd.save_data(self.model, metadata["pair"], dk)
else:
self.model = self.dd.load_data(metadata["pair"], dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
dk.append_predictions(pred_df, do_preds)
pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds)
dk.append_predictions(append_df)
dk.save_backtesting_prediction(append_df)
dk.fill_predictions(dataframe)
@@ -456,11 +469,6 @@ class IFreqaiModel(ABC):
:return:
:boolean: whether the model file exists or not.
"""
coin, _ = pair.split("/")
if not self.live:
dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
file_exists = path_to_modelfile.is_file()
if file_exists and not scanning: