improve readibility in start_backtesting()

This commit is contained in:
robcaulk 2022-11-04 16:10:46 +01:00
parent 8008c63319
commit 19d90b813a
2 changed files with 53 additions and 30 deletions

View File

@ -1338,11 +1338,11 @@ class FreqaiDataKitchen:
def check_if_backtest_prediction_is_valid( def check_if_backtest_prediction_is_valid(
self, self,
length_backtesting_dataframe: int len_backtest_df: int
) -> bool: ) -> bool:
""" """
Check if a backtesting prediction already exists and if the predictions Check if a backtesting prediction already exists and if the predictions
to append has the same size of backtesting dataframe slice to append have the same size as the backtesting dataframe slice
:param length_backtesting_dataframe: Length of backtesting dataframe slice :param length_backtesting_dataframe: Length of backtesting dataframe slice
:return: :return:
:boolean: whether the prediction file is valid. :boolean: whether the prediction file is valid.
@ -1356,7 +1356,7 @@ class FreqaiDataKitchen:
if file_exists: if file_exists:
append_df = self.get_backtesting_prediction() append_df = self.get_backtesting_prediction()
if len(append_df) == length_backtesting_dataframe: if len(append_df) == len_backtest_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
return True return True
else: else:

View File

@ -261,35 +261,12 @@ class IFreqaiModel(ABC):
dataframe_train = dk.slice_dataframe(tr_train, dataframe) dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
if dk.backtest_live_models and len(dataframe_backtest) == 0: if not self.ensure_data_exists(dataframe_backtest, tr_backtest, pair):
tr_backtest_startts_str = datetime.fromtimestamp(
tr_backtest.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_backtest_stopts_str = datetime.fromtimestamp(
tr_backtest.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} "
f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. "
"Probably more than one training within the same candle period.")
continue continue
trained_timestamp = tr_train self.log_backtesting_progress(tr_train, pair, train_it, total_trains)
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
if not dk.backtest_live_models: timestamp_model_id = int(tr_train.stopts)
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} "
f"to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
timestamp_model_id = int(trained_timestamp.stopts)
if dk.backtest_live_models: if dk.backtest_live_models:
timestamp_model_id = int(tr_backtest.startts) timestamp_model_id = int(tr_backtest.startts)
@ -309,7 +286,7 @@ class IFreqaiModel(ABC):
dk.find_labels(dataframe_train) dk.find_labels(dataframe_train)
self.model = self.train(dataframe_train, pair, dk) self.model = self.train(dataframe_train, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = int( self.dd.pair_dict[pair]["trained_timestamp"] = int(
trained_timestamp.stopts) tr_train.stopts)
if self.plot_features: if self.plot_features:
plot_feature_importance(self.model, pair, dk, self.plot_features) plot_feature_importance(self.model, pair, dk, self.plot_features)
if self.save_backtest_models: if self.save_backtest_models:
@ -788,6 +765,52 @@ class IFreqaiModel(ABC):
return dataframe return dataframe
def ensure_data_exists(self, dataframe_backtest: DataFrame,
tr_backtest: TimeRange, pair: str) -> bool:
"""
Check if the dataframe is empty, if not, report useful information to user.
:param dataframe_backtest: the backtesting dataframe, maybe empty.
:param tr_backtest: current backtesting timerange.
:param pair: current pair
:return: if the data exists or not
"""
if self.config.get("freqai_backtest_live_models", False) and len(dataframe_backtest) == 0:
tr_backtest_startts_str = datetime.fromtimestamp(
tr_backtest.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_backtest_stopts_str = datetime.fromtimestamp(
tr_backtest.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} "
f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. "
"Probably more than one training within the same candle period.")
return True
return False
def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
train_it: int, total_trains: int):
"""
Log the backtesting progress so user knows how many pairs have been trained and
hoe many more pairs/trains remain.
:param tr_train: the training timerange
:param train_it: the train iteration for the current pair (the sliding window progress)
:param pair: the current pair
:param total_trains: total trains (total number of slides for the sliding window)
"""
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
if not self.config.get("freqai_backtest_live_models", False):
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} "
f"to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example. # See freqai/prediction_models/CatboostPredictionModel.py for an example.