added generic function to get timerange from existent models

This commit is contained in:
Wagner Costa Santos 2022-09-24 13:01:53 -03:00
parent d9c16d4888
commit 7f116db95e
2 changed files with 103 additions and 4 deletions

View File

@ -99,6 +99,7 @@ class FreqaiDataKitchen:
self.train_dates: DataFrame = pd.DataFrame()
self.unique_classes: Dict[str, list] = {}
self.unique_class_list: list = []
self.backtest_live_models_data: Dict[str, Any] = {}
def set_paths(
self,
@ -1262,7 +1263,15 @@ class FreqaiDataKitchen:
return file_exists
def get_timerange_from_ready_models(self):
return self.gen_get_timerange_from_ready_models(self.full_path)
backtesting_timerange, \
backtesting_string_timerange, \
pairs_end_dates = self.gen_get_timerange_from_ready_models(self.full_path)
self.backtest_live_models_data = {
"backtesting_timerange": backtesting_timerange,
"backtesting_string_timerange": backtesting_string_timerange,
"pairs_end_dates": pairs_end_dates
}
return
def gen_get_timerange_from_ready_models(self, models_path: Path):
all_models_end_dates = []
@ -1271,8 +1280,9 @@ class FreqaiDataKitchen:
if str(model_dir.name).startswith("sub-train"):
model_end_date = model_dir.name.split("_")[1]
pair = model_dir.name.split("_")[0].replace("sub-train-", "")
model_file_name = f"cb\
_{str(model_dir.name).replace('sub-train-', '').lower()}_model.joblib"
model_file_name = (f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}")
model_file_name = f"{model_file_name}_model.joblib"
model_path_file = Path(model_dir / model_file_name)
if model_path_file.is_file():
if pair not in pairs_end_dates:

View File

@ -139,7 +139,12 @@ class IFreqaiModel(ABC):
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
backtest_live_models = True # temp
if not backtest_live_models:
dk = self.start_backtesting(dataframe, metadata, self.dk)
else:
dk = self.start_backtesting_live_models(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
self.clean_up()
@ -304,6 +309,90 @@ class IFreqaiModel(ABC):
return dk
def start_backtesting_live_models(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
"""
The main broad execution for backtesting. For backtesting, each pair enters and then gets
trained for each window along the sliding window defined by "train_period_days"
(training window) and "backtest_period_days" (backtest window, i.e. window immediately
following the training window). FreqAI slides the window and sequentially builds
the backtesting results before returning the concatenated results for the full
backtesting period back to the strategy.
:param dataframe: DataFrame = strategy passed dataframe
:param metadata: Dict = pair metadata
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return:
FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
self.pair_it += 1
train_it = 0
# Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly
# following tr_train. Both of these windows slide through the
# entire backtest
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
pair = metadata["pair"]
(_, _, _) = self.dd.get_pair_dict_info(pair)
train_it += 1
total_trains = len(dk.backtesting_timeranges)
self.training_timerange = tr_train
dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
trained_timestamp_int = int(trained_timestamp.stopts)
dk.data_path = Path(
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
)
dk.set_new_model_names(pair, trained_timestamp)
if dk.check_if_backtest_prediction_exists():
self.dd.load_metadata(dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df)
else:
if not self.model_exists(
pair, dk, trained_timestamp=trained_timestamp_int
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = int(
trained_timestamp.stopts)
if self.save_backtest_models:
logger.info('Saving backtest model to disk.')
self.dd.save_data(self.model, pair, dk)
else:
self.model = self.dd.load_data(pair, dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds)
dk.append_predictions(append_df)
dk.save_backtesting_prediction(append_df)
dk.fill_predictions(dataframe)
return dk
def start_live(
self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen: