Merge pull request #7737 from freqtrade/backtest_fitlivepredictions

FreqAI - Backtesting enhancements and bug fix
This commit is contained in:
Robert Caulk 2022-11-30 16:51:04 +01:00 committed by GitHub
commit 2bcd8e4e21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 248 additions and 230 deletions

View File

@ -79,16 +79,11 @@ To change your **features**, you **must** set a new `identifier` in the config t
To save the models generated during a particular backtest so that you can start a live deployment from one of them instead of training a new model, you must set `save_backtest_models` to `True` in the config. To save the models generated during a particular backtest so that you can start a live deployment from one of them instead of training a new model, you must set `save_backtest_models` to `True` in the config.
### Backtest live models ### Backtest live collected predictions
FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `True` in the config. FreqAI allow you to reuse live historic predictions through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse predictions generated in dry/run for comparison or other study.
The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models. The `--timerange` parameter must not be informed, as it will be automatically calculated through the data in the historic predictions file.
Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution.
!!! Note
Currently, there is no checking for expired models, even if the `expired_hours` parameter is set.
### Downloading data to cover the full backtest period ### Downloading data to cover the full backtest period

View File

@ -4,7 +4,7 @@ import logging
import re import re
import shutil import shutil
import threading import threading
from datetime import datetime, timezone from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Tuple, TypedDict from typing import Any, Dict, Tuple, TypedDict
@ -82,6 +82,7 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path( self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl") self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode self.follow_mode = follow_mode
if follow_mode: if follow_mode:
@ -127,6 +128,17 @@ class FreqaiDataDrawer:
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair) self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair) self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
def load_global_metadata_from_disk(self):
"""
Locate and load a previously saved global metadata in present model folder.
"""
exists = self.global_metadata_path.is_file()
if exists:
with open(self.global_metadata_path, "r") as fp:
metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
return metatada_dict
return {}
def load_drawer_from_disk(self): def load_drawer_from_disk(self):
""" """
Locate and load a previously saved data drawer full of all pair model metadata in Locate and load a previously saved data drawer full of all pair model metadata in
@ -227,6 +239,15 @@ class FreqaiDataDrawer:
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder, rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE) number_mode=rapidjson.NM_NATIVE)
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
"""
Save global metadata json to disk
"""
with self.save_lock:
with open(self.global_metadata_path, 'w') as fp:
rapidjson.dump(metadata, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def create_follower_dict(self): def create_follower_dict(self):
""" """
Create or dictionary for each follower to maintain unique persistent prediction targets Create or dictionary for each follower to maintain unique persistent prediction targets
@ -696,3 +717,31 @@ class FreqaiDataDrawer:
).reset_index(drop=True) ).reset_index(drop=True)
return corr_dataframes, base_dataframes return corr_dataframes, base_dataframes
def get_timerange_from_live_historic_predictions(self) -> TimeRange:
"""
Returns timerange information based on historic predictions file
:return: timerange calculated from saved live data
"""
if not self.historic_predictions_path.is_file():
raise OperationalException(
'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
)
self.load_historic_predictions_from_disk()
all_pairs_end_dates = []
for pair in self.historic_predictions:
pair_historic_data = self.historic_predictions[pair]
all_pairs_end_dates.append(pair_historic_data.date_pred.max())
global_metadata = self.load_global_metadata_from_disk()
start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"]))
end_date = max(all_pairs_end_dates)
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
)
return backtesting_timerange

View File

@ -1,7 +1,7 @@
import copy import copy
import logging import logging
import shutil import shutil
from datetime import datetime, timedelta, timezone from datetime import datetime, timezone
from math import cos, sin from math import cos, sin
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
@ -87,12 +87,7 @@ class FreqaiDataKitchen:
if not self.live: if not self.live:
self.full_path = self.get_full_models_path(self.config) self.full_path = self.get_full_models_path(self.config)
if self.backtest_live_models: if not self.backtest_live_models:
if self.pair:
self.set_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
else:
self.full_timerange = self.create_fulltimerange( self.full_timerange = self.create_fulltimerange(
self.config["timerange"], self.freqai_config.get("train_period_days", 0) self.config["timerange"], self.freqai_config.get("train_period_days", 0)
) )
@ -460,29 +455,6 @@ class FreqaiDataKitchen:
# print(tr_training_list, tr_backtesting_list) # print(tr_training_list, tr_backtesting_list)
return tr_training_list_timerange, tr_backtesting_list_timerange return tr_training_list_timerange, tr_backtesting_list_timerange
def split_timerange_live_models(
self
) -> Tuple[list, list]:
tr_backtesting_list_timerange = []
asset = self.pair.split("/")[0]
if asset not in self.backtest_live_models_data["assets_end_dates"]:
raise OperationalException(
f"Model not available for pair {self.pair}. "
"Please, try again after removing this pair from the configuration file."
)
asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
model_end_dates = [x for x in asset_data]
model_end_dates.append(backtesting_timerange.stopts)
model_end_dates.sort()
for index, item in enumerate(model_end_dates):
if len(model_end_dates) > (index + 1):
tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
tr_backtesting_list_timerange.append(tr_to_add)
return tr_backtesting_list_timerange, tr_backtesting_list_timerange
def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame: def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
""" """
Given a full dataframe, extract the user desired window Given a full dataframe, extract the user desired window
@ -978,7 +950,8 @@ class FreqaiDataKitchen:
return weights return weights
def get_predictions_to_append(self, predictions: DataFrame, def get_predictions_to_append(self, predictions: DataFrame,
do_predict: npt.ArrayLike) -> DataFrame: do_predict: npt.ArrayLike,
dataframe_backtest: DataFrame) -> DataFrame:
""" """
Get backtest prediction from current backtest period Get backtest prediction from current backtest period
""" """
@ -1000,7 +973,9 @@ class FreqaiDataKitchen:
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values append_df["DI_values"] = self.DI_values
return append_df dataframe_backtest.reset_index(drop=True, inplace=True)
merged_df = pd.concat([dataframe_backtest["date"], append_df], axis=1)
return merged_df
def append_predictions(self, append_df: DataFrame) -> None: def append_predictions(self, append_df: DataFrame) -> None:
""" """
@ -1010,23 +985,18 @@ class FreqaiDataKitchen:
if self.full_df.empty: if self.full_df.empty:
self.full_df = append_df self.full_df = append_df
else: else:
self.full_df = pd.concat([self.full_df, append_df], axis=0) self.full_df = pd.concat([self.full_df, append_df], axis=0, ignore_index=True)
def fill_predictions(self, dataframe): def fill_predictions(self, dataframe):
""" """
Back fill values to before the backtesting range so that the dataframe matches size Back fill values to before the backtesting range so that the dataframe matches size
when it goes back to the strategy. These rows are not included in the backtest. when it goes back to the strategy. These rows are not included in the backtest.
""" """
len_filler = len(dataframe) - len(self.full_df.index) # startup_candle_count
filler_df = pd.DataFrame(
np.zeros((len_filler, len(self.full_df.columns))), columns=self.full_df.columns
)
self.full_df = pd.concat([filler_df, self.full_df], axis=0, ignore_index=True)
to_keep = [col for col in dataframe.columns if not col.startswith("&")] to_keep = [col for col in dataframe.columns if not col.startswith("&")]
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1) self.return_dataframe = pd.merge(dataframe[to_keep],
self.full_df, how='left', on='date')
self.return_dataframe[self.full_df.columns] = (
self.return_dataframe[self.full_df.columns].fillna(value=0))
self.full_df = DataFrame() self.full_df = DataFrame()
return return
@ -1323,22 +1293,22 @@ class FreqaiDataKitchen:
self, append_df: DataFrame self, append_df: DataFrame
) -> None: ) -> None:
""" """
Save prediction dataframe from backtesting to h5 file format Save prediction dataframe from backtesting to feather file format
:param append_df: dataframe for backtesting period :param append_df: dataframe for backtesting period
""" """
full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder) full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder)
if not full_predictions_folder.is_dir(): if not full_predictions_folder.is_dir():
full_predictions_folder.mkdir(parents=True, exist_ok=True) full_predictions_folder.mkdir(parents=True, exist_ok=True)
append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w') append_df.to_feather(self.backtesting_results_path)
def get_backtesting_prediction( def get_backtesting_prediction(
self self
) -> DataFrame: ) -> DataFrame:
""" """
Get prediction dataframe from h5 file format Get prediction dataframe from feather file format
""" """
append_df = pd.read_hdf(self.backtesting_results_path) append_df = pd.read_feather(self.backtesting_results_path)
return append_df return append_df
def check_if_backtest_prediction_is_valid( def check_if_backtest_prediction_is_valid(
@ -1354,19 +1324,20 @@ class FreqaiDataKitchen:
""" """
path_to_predictionfile = Path(self.full_path / path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder / self.backtest_predictions_folder /
f"{self.model_filename}_prediction.h5") f"{self.model_filename}_prediction.feather")
self.backtesting_results_path = path_to_predictionfile self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file() file_exists = path_to_predictionfile.is_file()
if file_exists: if file_exists:
append_df = self.get_backtesting_prediction() append_df = self.get_backtesting_prediction()
if len(append_df) == len_backtest_df: if len(append_df) == len_backtest_df and 'date' in append_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
return True return True
else: else:
logger.info("A new backtesting prediction file is required. " logger.info("A new backtesting prediction file is required. "
"(Number of predictions is different from dataframe length).") "(Number of predictions is different from dataframe length or "
"old prediction file version).")
return False return False
else: else:
logger.info( logger.info(
@ -1374,17 +1345,6 @@ class FreqaiDataKitchen:
) )
return False return False
def set_timerange_from_ready_models(self):
backtesting_timerange, \
assets_end_dates = (
self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
self.backtest_live_models_data = {
"backtesting_timerange": backtesting_timerange,
"assets_end_dates": assets_end_dates
}
return
def get_full_models_path(self, config: Config) -> Path: def get_full_models_path(self, config: Config) -> Path:
""" """
Returns default FreqAI model path Returns default FreqAI model path
@ -1395,88 +1355,6 @@ class FreqaiDataKitchen:
config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
) )
def get_timerange_and_assets_end_dates_from_ready_models(
self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
"""
Returns timerange information based on a FreqAI model directory
:param models_path: FreqAI model path
:return: a Tuple with (Timerange calculated from directory and
a Dict with pair and model end training dates info)
"""
all_models_end_dates = []
assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models(
models_path)
for key in assets_end_dates:
for model_end_date in assets_end_dates[key]:
if model_end_date not in all_models_end_dates:
all_models_end_dates.append(model_end_date)
if len(all_models_end_dates) == 0:
raise OperationalException(
'At least 1 saved model is required to '
'run backtest with the freqai-backtest-live-models option'
)
if len(all_models_end_dates) == 1:
logger.warning(
"Only 1 model was found. Backtesting will run with the "
"timerange from the end of the training date to the current date"
)
finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
if len(all_models_end_dates) > 1:
# After last model end date, use the same period from previous model
# to finish the backtest
all_models_end_dates.sort(reverse=True)
finish_timestamp = all_models_end_dates[0] + \
(all_models_end_dates[0] - all_models_end_dates[1])
all_models_end_dates.append(finish_timestamp)
all_models_end_dates.sort()
start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates),
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates),
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
)
return backtesting_timerange, assets_end_dates
def get_assets_timestamps_training_from_ready_models(
self, models_path: Path) -> Dict[str, Any]:
"""
Scan the models path and returns all assets end training dates (timestamp)
:param models_path: FreqAI model path
:return: a Dict with asset and model end training dates info
"""
assets_end_dates: Dict[str, Any] = {}
if not models_path.is_dir():
raise OperationalException(
'Model folders not found. Saved models are required '
'to run backtest with the freqai-backtest-live-models option'
)
for model_dir in models_path.iterdir():
if str(model_dir.name).startswith("sub-train"):
model_end_date = int(model_dir.name.split("_")[1])
asset = model_dir.name.split("_")[0].replace("sub-train-", "")
model_file_name = (
f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
"_model.joblib"
)
model_path_file = Path(model_dir / model_file_name)
if model_path_file.is_file():
if asset not in assets_end_dates:
assets_end_dates[asset] = []
assets_end_dates[asset].append(model_end_date)
return assets_end_dates
def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame: def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
""" """
Remove all special characters from feature strings (:) Remove all special characters from feature strings (:)

View File

@ -69,6 +69,7 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models: if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.') logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date # set current candle to arbitrary historical date
self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc) self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
@ -100,6 +101,7 @@ class IFreqaiModel(ABC):
self.get_corr_dataframes: bool = True self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = [] self._threads: List[threading.Thread] = []
self._stop_event = threading.Event() self._stop_event = threading.Event()
self.metadata: Dict[str, Any] = self.dd.load_global_metadata_from_disk()
self.data_provider: Optional[DataProvider] = None self.data_provider: Optional[DataProvider] = None
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1) self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
@ -136,6 +138,7 @@ class IFreqaiModel(ABC):
self.inference_timer('start') self.inference_timer('start')
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk) dk = self.start_live(dataframe, metadata, strategy, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
# For backtesting, each pair enters and then gets trained for each window along the # For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period_days" (training window) and "live_retrain_hours" # sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
@ -144,20 +147,24 @@ class IFreqaiModel(ABC):
# the concatenated results for the full backtesting period back to the strategy. # the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode: elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
if self.dk.backtest_live_models:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators( dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"] strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
) )
if not self.config.get("freqai_backtest_live_models", False):
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dk = self.start_backtesting(dataframe, metadata, self.dk) dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe) dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
logger.info(
"Backtesting using historic predictions (live models)")
dk = self.start_backtesting_from_historic_predictions(
dataframe, metadata, self.dk)
dataframe = dk.return_dataframe
self.clean_up() self.clean_up()
if self.live: if self.live:
self.inference_timer('stop', metadata["pair"]) self.inference_timer('stop', metadata["pair"])
return dataframe return dataframe
def clean_up(self): def clean_up(self):
@ -316,10 +323,11 @@ class IFreqaiModel(ABC):
self.model = self.dd.load_data(pair, dk) self.model = self.dd.load_data(pair, dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds) append_df = dk.get_predictions_to_append(pred_df, do_preds, dataframe_backtest)
dk.append_predictions(append_df) dk.append_predictions(append_df)
dk.save_backtesting_prediction(append_df) dk.save_backtesting_prediction(append_df)
self.backtesting_fit_live_predictions(dk)
dk.fill_predictions(dataframe) dk.fill_predictions(dataframe)
return dk return dk
@ -632,6 +640,8 @@ class IFreqaiModel(ABC):
self.dd.historic_predictions[pair] = pred_df self.dd.historic_predictions[pair] = pred_df
hist_preds_df = self.dd.historic_predictions[pair] hist_preds_df = self.dd.historic_predictions[pair]
self.set_start_dry_live_date(strat_df)
for label in hist_preds_df.columns: for label in hist_preds_df.columns:
if hist_preds_df[label].dtype == object: if hist_preds_df[label].dtype == object:
continue continue
@ -672,7 +682,8 @@ class IFreqaiModel(ABC):
for label in full_labels: for label in full_labels:
if self.dd.historic_predictions[dk.pair][label].dtype == object: if self.dd.historic_predictions[dk.pair][label].dtype == object:
continue continue
f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) f = spy.stats.norm.fit(
self.dd.historic_predictions[dk.pair][label].tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
return return
@ -826,6 +837,81 @@ class IFreqaiModel(ABC):
f"to {tr_train.stop_fmt}, {train_it}/{total_trains} " f"to {tr_train.stop_fmt}, {train_it}/{total_trains} "
"trains" "trains"
) )
def backtesting_fit_live_predictions(self, dk: FreqaiDataKitchen):
"""
Apply fit_live_predictions function in backtesting with a dummy historic_predictions
The loop is required to simulate dry/live operation, as it is not possible to predict
the type of logic implemented by the user.
:param dk: datakitchen object
"""
fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0)
if fit_live_predictions_candles:
logger.info("Applying fit_live_predictions in backtesting")
label_columns = [col for col in dk.full_df.columns if (
col.startswith("&") and
not (col.startswith("&") and col.endswith("_mean")) and
not (col.startswith("&") and col.endswith("_std")) and
col not in self.dk.data["extra_returns_per_train"])
]
for index in range(len(dk.full_df)):
if index >= fit_live_predictions_candles:
self.dd.historic_predictions[self.dk.pair] = (
dk.full_df.iloc[index - fit_live_predictions_candles:index])
self.fit_live_predictions(self.dk, self.dk.pair)
for label in label_columns:
if dk.full_df[label].dtype == object:
continue
if "labels_mean" in self.dk.data:
dk.full_df.at[index, f"{label}_mean"] = (
self.dk.data["labels_mean"][label])
if "labels_std" in self.dk.data:
dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label]
for extra_col in self.dk.data["extra_returns_per_train"]:
dk.full_df.at[index, f"{extra_col}"] = (
self.dk.data["extra_returns_per_train"][extra_col])
return
def update_metadata(self, metadata: Dict[str, Any]):
"""
Update global metadata and save the updated json file
:param metadata: new global metadata dict
"""
self.dd.save_global_metadata_to_disk(metadata)
self.metadata = metadata
def set_start_dry_live_date(self, live_dataframe: DataFrame):
key_name = "start_dry_live_date"
if key_name not in self.metadata:
metadata = self.metadata
metadata[key_name] = int(
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
self.update_metadata(metadata)
def start_backtesting_from_historic_predictions(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
"""
:param dataframe: DataFrame = strategy passed dataframe
:param metadata: Dict = pair metadata
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return:
FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
pair = metadata["pair"]
dk.return_dataframe = dataframe
saved_dataframe = self.dd.historic_predictions[pair]
columns_to_drop = list(set(saved_dataframe.columns).intersection(
dk.return_dataframe.columns))
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example. # See freqai/prediction_models/CatboostPredictionModel.py for an example.

View File

@ -14,6 +14,7 @@ from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active from freqtrade.exchange.exchange import market_is_active
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
@ -229,5 +230,6 @@ def get_timerange_backtest_live_models(config: Config) -> str:
""" """
dk = FreqaiDataKitchen(config) dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config) models_path = dk.get_full_models_path(config)
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) dd = FreqaiDataDrawer(models_path, config)
timerange = dd.get_timerange_from_live_historic_predictions()
return timerange.timerange_str return timerange.timerange_str

View File

@ -65,6 +65,8 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda
mocker.patch('freqtrade.optimize.backtesting.history.load_data') mocker.patch('freqtrade.optimize.backtesting.history.load_data')
mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now)) mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now))
freqai_conf["timerange"] = "" freqai_conf["timerange"] = ""
freqai_conf.get("freqai", {}).update({"backtest_using_historic_predictions": False})
patched_configuration_load_config_file(mocker, freqai_conf) patched_configuration_load_config_file(mocker, freqai_conf)
args = [ args = [
@ -79,7 +81,7 @@ def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testda
bt_config = setup_optimize_configuration(args, RunMode.BACKTEST) bt_config = setup_optimize_configuration(args, RunMode.BACKTEST)
with pytest.raises(OperationalException, with pytest.raises(OperationalException,
match=r".* Saved models are required to run backtest .*"): match=r".* Historic predictions data is required to run backtest .*"):
Backtesting(bt_config) Backtesting(bt_config)
Backtesting.cleanup() Backtesting.cleanup()

View File

@ -2,8 +2,11 @@
import shutil import shutil
from pathlib import Path from pathlib import Path
import pytest
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider from freqtrade.data.dataprovider import DataProvider
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from tests.conftest import get_patched_exchange from tests.conftest import get_patched_exchange
from tests.freqai.conftest import get_patched_freqai_strategy from tests.freqai.conftest import get_patched_freqai_strategy
@ -93,3 +96,37 @@ def test_use_strategy_to_populate_indicators(mocker, freqai_conf):
assert len(df.columns) == 33 assert len(df.columns) == 33
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_get_timerange_from_live_historic_predictions(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180126-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180128-20180130")
_, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "ADA/BTC", freqai.dk)
base_df["5m"]["date_pred"] = base_df["5m"]["date"]
freqai.dd.historic_predictions = {}
freqai.dd.historic_predictions["ADA/USDT"] = base_df["5m"]
freqai.dd.save_historic_predictions_to_disk()
freqai.dd.save_global_metadata_to_disk({"start_dry_live_date": 1516406400})
timerange = freqai.dd.get_timerange_from_live_historic_predictions()
assert timerange.startts == 1516406400
assert timerange.stopts == 1517356500
def test_get_timerange_from_backtesting_live_df_pred_not_found(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
with pytest.raises(
OperationalException,
match=r'Historic predictions not found.*'
):
freqai.dd.get_timerange_from_live_historic_predictions()

View File

@ -9,7 +9,6 @@ from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider from freqtrade.data.dataprovider import DataProvider
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import get_timerange_backtest_live_models
from tests.conftest import get_patched_exchange, log_has_re from tests.conftest import get_patched_exchange, log_has_re
from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy, from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
make_data_dictionary, make_unfiltered_dataframe) make_data_dictionary, make_unfiltered_dataframe)
@ -166,71 +165,6 @@ def test_make_train_test_datasets(mocker, freqai_conf):
assert len(data_dictionary['train_features'].index) == 1916 assert len(data_dictionary['train_features'].index) == 1916
def test_get_pairs_timestamp_validation(mocker, freqai_conf):
exchange = get_patched_exchange(mocker, freqai_conf)
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai_conf['freqai'].update({"identifier": "invalid_id"})
model_path = freqai.dk.get_full_models_path(freqai_conf)
with pytest.raises(
OperationalException,
match=r'.*required to run backtest with the freqai-backtest-live-models.*'
):
freqai.dk.get_assets_timestamps_training_from_ready_models(model_path)
@pytest.mark.parametrize('model', [
'LightGBMRegressor'
])
def test_get_timerange_from_ready_models(mocker, freqai_conf, model):
freqai_conf.update({"freqaimodel": model})
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_strat"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180101-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180101-20180130")
# 1516233600 (2018-01-18 00:00) - Start Training 1
# 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1)
# 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2)
# 1516838400 (2018-01-25 00:00) - End Timerange
new_timerange = TimeRange("date", "date", 1516233600, 1516406400)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
new_timerange = TimeRange("date", "date", 1516406400, 1516579200)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
model_path = freqai.dk.get_full_models_path(freqai_conf)
(backtesting_timerange,
pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models(
models_path=model_path)
assert len(pairs_end_dates["ADA"]) == 2
assert backtesting_timerange.startts == 1516406400
assert backtesting_timerange.stopts == 1516838400
backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf)
assert backtesting_string_timerange == '20180120-20180125'
@pytest.mark.parametrize('model', [ @pytest.mark.parametrize('model', [
'LightGBMRegressor' 'LightGBMRegressor'
]) ])

View File

@ -301,7 +301,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"} pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk) freqai.start_backtesting(df, metadata, freqai.dk)
model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
@ -324,6 +326,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk) freqai.start_backtesting(df, metadata, freqai.dk)
assert log_has_re( assert log_has_re(
@ -331,13 +336,43 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
caplog, caplog,
) )
pair = "ETH/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk)
path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder) path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder)
prediction_files = [x for x in path.iterdir() if x.is_file()] prediction_files = [x for x in path.iterdir() if x.is_file()]
assert len(prediction_files) == 1 assert len(prediction_files) == 2
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_backtesting_fit_live_predictions(mocker, freqai_conf, caplog):
freqai_conf.get("freqai", {}).update({"fit_live_predictions_candles": 10})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180128-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180129-20180130")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
freqai.dk.pair = "ADA/BTC"
freqai.dk.full_df = df.fillna(0)
freqai.dk.full_df
assert "&-s_close_mean" not in freqai.dk.full_df.columns
assert "&-s_close_std" not in freqai.dk.full_df.columns
freqai.backtesting_fit_live_predictions(freqai.dk)
assert "&-s_close_mean" in freqai.dk.full_df.columns
assert "&-s_close_std" in freqai.dk.full_df.columns
shutil.rmtree(Path(freqai.dk.full_path))
def test_follow_mode(mocker, freqai_conf): def test_follow_mode(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})