change start_backtesting to handle backtest_live_models

This commit is contained in:
Wagner Costa Santos 2022-09-25 10:35:55 -03:00
parent 7f116db95e
commit 0ed7b2bfc3
2 changed files with 82 additions and 123 deletions

View File

@ -62,6 +62,7 @@ class FreqaiDataKitchen:
live: bool = False, live: bool = False,
pair: str = "", pair: str = "",
): ):
self.backtest_live_models = False # temp
self.data: Dict[str, Any] = {} self.data: Dict[str, Any] = {}
self.data_dictionary: Dict[str, DataFrame] = {} self.data_dictionary: Dict[str, DataFrame] = {}
self.config = config self.config = config
@ -88,6 +89,11 @@ class FreqaiDataKitchen:
self.config["timerange"], self.freqai_config.get("train_period_days", 0) self.config["timerange"], self.freqai_config.get("train_period_days", 0)
) )
if self.backtest_live_models:
self.get_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
else:
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
self.full_timerange, self.full_timerange,
config["freqai"]["train_period_days"], config["freqai"]["train_period_days"],
@ -451,6 +457,26 @@ class FreqaiDataKitchen:
# print(tr_training_list, tr_backtesting_list) # print(tr_training_list, tr_backtesting_list)
return tr_training_list_timerange, tr_backtesting_list_timerange return tr_training_list_timerange, tr_backtesting_list_timerange
def split_timerange_live_models(
self
) -> Tuple[list, list]:
tr_backtesting_list_timerange = []
pair = self.pair.split("/")[0].split(":")[0]
pair_data = self.backtest_live_models_data["pairs_end_dates"][pair]
model_end_dates = []
backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
for data in pair_data:
model_end_dates.append(data["model_end_date"])
model_end_dates.append(backtesting_timerange.stopts)
model_end_dates.sort()
for index, item in enumerate(model_end_dates):
if len(model_end_dates) > (index + 1):
tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
tr_backtesting_list_timerange.append(tr_to_add)
return tr_backtesting_list_timerange, tr_backtesting_list_timerange
def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
""" """
Given a full dataframe, extract the user desired window Given a full dataframe, extract the user desired window
@ -1093,15 +1119,15 @@ class FreqaiDataKitchen:
return retrain, trained_timerange, data_load_timerange return retrain, trained_timerange, data_load_timerange
def set_new_model_names(self, pair: str, trained_timerange: TimeRange): def set_new_model_names(self, pair: str, timestamp_id: int):
coin, _ = pair.split("/") coin, _ = pair.split("/")
self.data_path = Path( self.data_path = Path(
self.full_path self.full_path
/ f"sub-train-{pair.split('/')[0]}_{int(trained_timerange.stopts)}" / f"sub-train-{pair.split('/')[0]}_{timestamp_id}"
) )
self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}" self.model_filename = f"cb_{coin.lower()}_{timestamp_id}"
def set_all_pairs(self) -> None: def set_all_pairs(self) -> None:
@ -1278,7 +1304,7 @@ class FreqaiDataKitchen:
pairs_end_dates: Dict[str, Any] = {} pairs_end_dates: Dict[str, Any] = {}
for model_dir in models_path.iterdir(): for model_dir in models_path.iterdir():
if str(model_dir.name).startswith("sub-train"): if str(model_dir.name).startswith("sub-train"):
model_end_date = model_dir.name.split("_")[1] model_end_date = int(model_dir.name.split("_")[1])
pair = model_dir.name.split("_")[0].replace("sub-train-", "") pair = model_dir.name.split("_")[0].replace("sub-train-", "")
model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}") model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}")
model_file_name = f"{model_file_name}_model.joblib" model_file_name = f"{model_file_name}_model.joblib"
@ -1289,14 +1315,24 @@ class FreqaiDataKitchen:
pairs_end_dates[pair] = [] pairs_end_dates[pair] = []
pairs_end_dates[pair].append({ pairs_end_dates[pair].append({
"model_end_date": int(model_end_date), "model_end_date": model_end_date,
"model_path_file": model_path_file, "model_path_file": model_path_file,
"model_dir": model_dir "model_dir": model_dir
}) })
if model_end_date not in all_models_end_dates: if model_end_date not in all_models_end_dates:
all_models_end_dates.append(int(model_end_date)) all_models_end_dates.append(model_end_date)
finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
if len(all_models_end_dates) > 1:
# After last model end date, use the same period from previous model
# to finish the backtest
all_models_end_dates.sort(reverse=True)
finish_timestamp = all_models_end_dates[0] + \
(all_models_end_dates[0] - all_models_end_dates[1])
all_models_end_dates.append(finish_timestamp)
all_models_end_dates.sort()
start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc)
stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc)
backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}" backtesting_string_timerange = f"{start.strftime('%Y%m%d')}-{stop.strftime('%Y%m%d')}"

View File

@ -134,17 +134,17 @@ class IFreqaiModel(ABC):
# the concatenated results for the full backtesting period back to the strategy. # the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode: elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
self.dk.get_timerange_from_ready_models() if(self.dk.backtest_live_models):
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (Live Models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges") logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators( dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"] strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
) )
backtest_live_models = True # temp
if not backtest_live_models:
dk = self.start_backtesting(dataframe, metadata, self.dk) dk = self.start_backtesting(dataframe, metadata, self.dk)
else: # else:
dk = self.start_backtesting_live_models(dataframe, metadata, self.dk) # dk = self.start_backtesting_live_models(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe) dataframe = dk.remove_features_from_df(dk.return_dataframe)
self.clean_up() self.clean_up()
@ -265,28 +265,39 @@ class IFreqaiModel(ABC):
tr_train_stopts_str = datetime.fromtimestamp( tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts, tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
if not dk.backtest_live_models:
logger.info( logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs" f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " f" from {tr_train_startts_str}"
f" to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains" "trains"
) )
trained_timestamp_int = int(trained_timestamp.stopts) timestamp_model_id = int(trained_timestamp.stopts)
if dk.backtest_live_models:
timestamp_model_id = int(tr_backtest.startts)
dk.data_path = Path( dk.data_path = Path(
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}" dk.full_path / f"sub-train-{pair.split('/')[0]}_{timestamp_model_id}"
) )
dk.set_new_model_names(pair, trained_timestamp) dk.set_new_model_names(pair, timestamp_model_id)
if dk.check_if_backtest_prediction_exists(): if dk.check_if_backtest_prediction_exists():
self.dd.load_metadata(dk) self.dd.load_metadata(dk)
if not dk.backtest_live_models:
self.check_if_feature_list_matches_strategy(dataframe_train, dk) self.check_if_feature_list_matches_strategy(dataframe_train, dk)
append_df = dk.get_backtesting_prediction() append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df) dk.append_predictions(append_df)
else: else:
if not self.model_exists( if not self.model_exists(dk):
pair, dk, trained_timestamp=trained_timestamp_int if dk.backtest_live_models:
): raise OperationalException(
"Training models is not allowed "
"in backtest_live_models backtesting "
"mode"
)
dk.find_features(dataframe_train) dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, pair, dk) self.model = self.train(dataframe_train, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = int( self.dd.pair_dict[pair]["trained_timestamp"] = int(
@ -306,91 +317,6 @@ class IFreqaiModel(ABC):
dk.save_backtesting_prediction(append_df) dk.save_backtesting_prediction(append_df)
dk.fill_predictions(dataframe) dk.fill_predictions(dataframe)
return dk
def start_backtesting_live_models(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
"""
The main broad execution for backtesting. For backtesting, each pair enters and then gets
trained for each window along the sliding window defined by "train_period_days"
(training window) and "backtest_period_days" (backtest window, i.e. window immediately
following the training window). FreqAI slides the window and sequentially builds
the backtesting results before returning the concatenated results for the full
backtesting period back to the strategy.
:param dataframe: DataFrame = strategy passed dataframe
:param metadata: Dict = pair metadata
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return:
FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
self.pair_it += 1
train_it = 0
# Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly
# following tr_train. Both of these windows slide through the
# entire backtest
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
pair = metadata["pair"]
(_, _, _) = self.dd.get_pair_dict_info(pair)
train_it += 1
total_trains = len(dk.backtesting_timeranges)
self.training_timerange = tr_train
dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
trained_timestamp_int = int(trained_timestamp.stopts)
dk.data_path = Path(
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
)
dk.set_new_model_names(pair, trained_timestamp)
if dk.check_if_backtest_prediction_exists():
self.dd.load_metadata(dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df)
else:
if not self.model_exists(
pair, dk, trained_timestamp=trained_timestamp_int
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = int(
trained_timestamp.stopts)
if self.save_backtest_models:
logger.info('Saving backtest model to disk.')
self.dd.save_data(self.model, pair, dk)
else:
self.model = self.dd.load_data(pair, dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds)
dk.append_predictions(append_df)
dk.save_backtesting_prediction(append_df)
dk.fill_predictions(dataframe)
return dk return dk
def start_live( def start_live(
@ -595,10 +521,7 @@ class IFreqaiModel(ABC):
def model_exists( def model_exists(
self, self,
pair: str,
dk: FreqaiDataKitchen, dk: FreqaiDataKitchen,
trained_timestamp: int = None,
model_filename: str = "",
scanning: bool = False, scanning: bool = False,
) -> bool: ) -> bool:
""" """
@ -608,7 +531,7 @@ class IFreqaiModel(ABC):
:return: :return:
:boolean: whether the model file exists or not. :boolean: whether the model file exists or not.
""" """
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib") path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib")
file_exists = path_to_modelfile.is_file() file_exists = path_to_modelfile.is_file()
if file_exists and not scanning: if file_exists and not scanning:
logger.info("Found model at %s", dk.data_path / dk.model_filename) logger.info("Found model at %s", dk.data_path / dk.model_filename)
@ -663,7 +586,7 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, pair, dk) model = self.train(unfiltered_dataframe, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, new_trained_timerange) dk.set_new_model_names(pair, int(new_trained_timerange.stopts))
self.dd.save_data(model, pair, dk) self.dd.save_data(model, pair, dk)
if self.freqai_info["feature_parameters"].get("plot_feature_importance", False): if self.freqai_info["feature_parameters"].get("plot_feature_importance", False):