update code to use historic_predictions for freqai_backtest_live_models

This commit is contained in:
Wagner Costa Santos
2022-11-19 14:15:58 -03:00
parent 3d3195847c
commit 80d070e9ee
8 changed files with 86 additions and 174 deletions

View File

@@ -81,6 +81,7 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
@@ -125,6 +126,17 @@ class FreqaiDataDrawer:
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
def load_global_metadata_from_disk(self):
"""
Locate and load a previously saved global metadata in present model folder.
"""
exists = self.global_metadata_path.is_file()
if exists:
with open(self.global_metadata_path, "r") as fp:
metatada_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
return metatada_dict
return {}
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
@@ -225,6 +237,15 @@ class FreqaiDataDrawer:
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
"""
Save global metadata json to disk
"""
with self.save_lock:
with open(self.global_metadata_path, 'w') as fp:
rapidjson.dump(metadata, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets

View File

@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame, read_feather
from pandas import DataFrame
from scipy import stats
from sklearn import linear_model
from sklearn.cluster import DBSCAN
@@ -74,9 +74,6 @@ class FreqaiDataKitchen:
self.training_features_list: List = []
self.model_filename: str = ""
self.backtesting_results_path = Path()
self.backtesting_live_model_folder_path = Path()
self.backtesting_live_model_path = Path()
self.backtesting_live_model_bkp_path = Path()
self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live
self.pair = pair
@@ -90,7 +87,9 @@ class FreqaiDataKitchen:
self.full_path = self.get_full_models_path(self.config)
if self.backtest_live_models:
if self.pair:
if self.pair and not (
self.freqai_config.get("backtest_using_historic_predictions", True)
):
self.set_timerange_from_ready_models()
(self.training_timeranges,
self.backtesting_timeranges) = self.split_timerange_live_models()
@@ -1488,101 +1487,30 @@ class FreqaiDataKitchen:
return dataframe
def set_backtesting_live_dataframe_folder_path(
self
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.backtesting_live_model_folder_path = Path(
self.full_path / self.backtest_predictions_folder / "live_data")
def set_backtesting_live_dataframe_path(
self, pair: str
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True)
pair_path = pair.split(":")[0].replace("/", "_").lower()
file_name = f"live_backtesting_{pair_path}.feather"
self.backtesting_live_model_path = Path(
self.full_path /
self.backtesting_live_model_folder_path /
file_name)
self.backtesting_live_model_bkp_path = Path(
self.full_path /
self.backtesting_live_model_folder_path /
file_name.replace(".feather", ".backup.feather"))
def save_backtesting_live_dataframe(
self, dataframe: DataFrame, pair: str
) -> None:
"""
Save live backtesting dataframe to feather file format
:param dataframe: current live dataframe
:param pair: current pair
"""
self.set_backtesting_live_dataframe_path(pair)
last_row_df = dataframe.tail(1)
if self.backtesting_live_model_path.is_file():
saved_dataframe = self.get_backtesting_live_dataframe()
concat_dataframe = pd.concat([saved_dataframe, last_row_df])
self.save_backtesting_live_dataframe_to_feather(concat_dataframe)
else:
self.save_backtesting_live_dataframe_to_feather(last_row_df)
shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
def save_backtesting_live_dataframe_to_feather(self, dataframe: DataFrame):
dataframe.reset_index(drop=True).to_feather(
self.backtesting_live_model_path, compression_level=9, compression='lz4')
def get_backtesting_live_dataframe(
self
) -> DataFrame:
"""
Get live backtesting dataframe from feather file format
return: saved dataframe from previous dry/run or live
"""
if self.backtesting_live_model_path.is_file():
saved_dataframe = DataFrame()
try:
saved_dataframe = read_feather(self.backtesting_live_model_path)
except Exception:
saved_dataframe = read_feather(self.backtesting_live_model_bkp_path)
return saved_dataframe
else:
raise OperationalException(
"Saved live backtesting dataframe file not found."
)
def get_timerange_from_backtesting_live_dataframe(self) -> TimeRange:
"""
Returns timerange information based on live backtesting dataframe file
Returns timerange information based on historic predictions file
:return: timerange calculated from saved live data
"""
all_assets_start_dates = []
all_assets_end_dates = []
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
dd = FreqaiDataDrawer(Path(self.full_path), self.config)
if not dd.historic_predictions_path.is_file():
raise OperationalException(
'Saved live data not found. Saved lived data is required '
'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
'and save_live_data_backtest config option as true'
'and backtest_using_historic_predictions config option as true'
)
for file_in_dir in self.backtesting_live_model_folder_path.iterdir():
if file_in_dir.is_file() and "backup" not in file_in_dir.name:
saved_dataframe = read_feather(file_in_dir)
all_assets_start_dates.append(saved_dataframe.date.min())
all_assets_end_dates.append(saved_dataframe.date.max())
start_date = min(all_assets_start_dates)
end_date = max(all_assets_end_dates)
dd.load_historic_predictions_from_disk()
all_pairs_end_dates = []
for pair in dd.historic_predictions:
pair_historic_data = dd.historic_predictions[pair]
all_pairs_end_dates.append(pair_historic_data.date_pred.max())
global_metadata = dd.load_global_metadata_from_disk()
start_date = datetime.fromtimestamp(int(global_metadata["start_dry_live_date"]))
end_date = max(all_pairs_end_dates)
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(

View File

@@ -53,6 +53,7 @@ class IFreqaiModel(ABC):
def __init__(self, config: Config) -> None:
self.config = config
self.metadata: Dict[str, Any] = {}
self.assert_config(self.config)
self.freqai_info: Dict[str, Any] = config["freqai"]
self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
@@ -67,10 +68,10 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.save_live_data_backtest: bool = self.freqai_info.get(
"save_live_data_backtest", False)
if self.save_live_data_backtest:
logger.info('Live configured to save data for backtest.')
self.backtest_using_historic_predictions: bool = self.freqai_info.get(
"backtest_using_historic_predictions", True)
if self.backtest_using_historic_predictions:
logger.info('Backtesting live models configured to use historic predictions.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date
@@ -103,6 +104,7 @@ class IFreqaiModel(ABC):
self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
self.metadata = self.dd.load_global_metadata_from_disk()
record_params(config, self.full_path)
@@ -136,6 +138,7 @@ class IFreqaiModel(ABC):
self.inference_timer('start')
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
# For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
@@ -145,14 +148,19 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
if self.dk.backtest_live_models:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
if self.backtest_using_historic_predictions:
logger.info(
"Backtesting using historic predictions (live models)")
else:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} "
"timeranges (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
if not self.save_live_data_backtest:
if not self.backtest_using_historic_predictions:
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
@@ -163,8 +171,7 @@ class IFreqaiModel(ABC):
self.clean_up()
if self.live:
self.inference_timer('stop', metadata["pair"])
if self.save_live_data_backtest:
dk.save_backtesting_live_dataframe(dataframe, metadata["pair"])
self.set_start_dry_live_date(dataframe)
return dataframe
@@ -335,14 +342,12 @@ class IFreqaiModel(ABC):
"""
pair = metadata["pair"]
dk.return_dataframe = dataframe
self.dk.set_backtesting_live_dataframe_path(pair)
saved_dataframe = self.dk.get_backtesting_live_dataframe()
columns_to_drop = list(set(dk.return_dataframe.columns).difference(
["date", "open", "high", "low", "close", "volume"]))
saved_dataframe = saved_dataframe.drop(
columns=["open", "high", "low", "close", "volume"])
saved_dataframe = self.dd.historic_predictions[pair]
columns_to_drop = list(set(saved_dataframe.columns).intersection(
dk.return_dataframe.columns))
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date')
dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
@@ -886,6 +891,22 @@ class IFreqaiModel(ABC):
return
def update_metadata(self, metadata: Dict[str, Any]):
"""
Update global metadata and save the updated json file
:param metadata: new global metadata dict
"""
self.dd.save_global_metadata_to_disk(metadata)
self.metadata = metadata
def set_start_dry_live_date(self, live_dataframe: DataFrame):
key_name = "start_dry_live_date"
if key_name not in self.metadata:
metadata = self.metadata
metadata[key_name] = int(
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
self.update_metadata(metadata)
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.

View File

@@ -230,7 +230,7 @@ def get_timerange_backtest_live_models(config: Config) -> str:
dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config)
timerange: TimeRange = TimeRange()
if not config.get("save_live_data_backtest", False):
if not config.get("freqai", {}).get("backtest_using_historic_predictions", True):
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
else:
timerange = dk.get_timerange_from_backtesting_live_dataframe()