backtest saved dataframe from live

This commit is contained in:
Wagner Costa Santos 2022-11-12 14:37:23 -03:00
parent f9c6c538be
commit 27fa9f1f4e
3 changed files with 154 additions and 4 deletions

View File

@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame, read_feather
from scipy import stats from scipy import stats
from sklearn import linear_model from sklearn import linear_model
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN
@ -73,6 +73,9 @@ class FreqaiDataKitchen:
self.training_features_list: List = [] self.training_features_list: List = []
self.model_filename: str = "" self.model_filename: str = ""
self.backtesting_results_path = Path() self.backtesting_results_path = Path()
self.backtesting_live_model_folder_path = Path()
self.backtesting_live_model_path = Path()
self.backtesting_live_model_bkp_path = Path()
self.backtest_predictions_folder: str = "backtesting_predictions" self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live self.live = live
self.pair = pair self.pair = pair
@ -1488,3 +1491,107 @@ class FreqaiDataKitchen:
dataframe.columns = dataframe.columns.str.replace(c, "") dataframe.columns = dataframe.columns.str.replace(c, "")
return dataframe return dataframe
def set_backtesting_live_dataframe_folder_path(
self
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.backtesting_live_model_folder_path = Path(
self.full_path / self.backtest_predictions_folder / "live_data")
def set_backtesting_live_dataframe_path(
self, pair: str
) -> None:
"""
Set live backtesting dataframe path
:param pair: current pair
"""
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
self.backtesting_live_model_folder_path.mkdir(parents=True, exist_ok=True)
pair_path = pair.split(":")[0].replace("/", "_").lower()
file_name = f"live_backtesting_{pair_path}.feather"
path_to_live_backtesting_file = Path(self.full_path /
self.backtesting_live_model_folder_path /
file_name)
path_to_live_backtesting_bkp_file = Path(self.full_path /
self.backtesting_live_model_folder_path /
file_name.replace(".feather", ".backup.feather"))
self.backtesting_live_model_path = path_to_live_backtesting_file
self.backtesting_live_model_bkp_path = path_to_live_backtesting_bkp_file
def save_backtesting_live_dataframe(
self, dataframe: DataFrame, pair: str
) -> None:
"""
Save live backtesting dataframe to feather file format
:param dataframe: current live dataframe
:param pair: current pair
"""
self.set_backtesting_live_dataframe_path(pair)
last_row_df = dataframe.tail(1)
if self.backtesting_live_model_path.is_file():
saved_dataframe = self.get_backtesting_live_dataframe()
concat_dataframe = pd.concat([saved_dataframe, last_row_df])
concat_dataframe.reset_index(drop=True).to_feather(
self.backtesting_live_model_path, compression_level=9, compression='lz4')
else:
last_row_df.reset_index(drop=True).to_feather(
self.backtesting_live_model_path, compression_level=9, compression='lz4')
shutil.copy(self.backtesting_live_model_path, self.backtesting_live_model_bkp_path)
def get_backtesting_live_dataframe(
self
) -> DataFrame:
"""
Get live backtesting dataframe from feather file format
return: saved dataframe from previous dry/run or live
"""
if self.backtesting_live_model_path.is_file():
saved_dataframe = DataFrame()
try:
saved_dataframe = read_feather(self.backtesting_live_model_path)
except Exception:
saved_dataframe = read_feather(self.backtesting_live_model_bkp_path)
return saved_dataframe
else:
raise OperationalException(
"Saved pair file not found"
)
def get_timerange_from_backtesting_live_dataframe(
self) -> TimeRange:
"""
Returns timerange information based on a FreqAI model directory
:param models_path: FreqAI model path
:return: timerange calculated from saved live data
"""
all_assets_start_dates = []
all_assets_end_dates = []
self.set_backtesting_live_dataframe_folder_path()
if not self.backtesting_live_model_folder_path.is_dir():
raise OperationalException(
'Saved live data not found. Saved lived data is required '
'to run backtest with the freqai-backtest-live-models option '
'and save_live_data_backtest config option as true'
)
for file_in_dir in self.backtesting_live_model_folder_path.iterdir():
if file_in_dir.is_file() and "backup" not in file_in_dir.name:
saved_dataframe = read_feather(file_in_dir)
all_assets_start_dates.append(saved_dataframe.date.min())
all_assets_end_dates.append(saved_dataframe.date.max())
start_date = min(all_assets_start_dates)
end_date = min(all_assets_end_dates)
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
)
return backtesting_timerange

View File

@ -67,6 +67,11 @@ class IFreqaiModel(ABC):
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models: if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.') logger.info('Backtesting module configured to save all models.')
self.save_live_data_backtest: bool = self.freqai_info.get(
"save_live_data_backtest", True)
if self.save_live_data_backtest:
logger.info('Live configured to save data for backtest.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date # set current candle to arbitrary historical date
self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc) self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
@ -147,12 +152,20 @@ class IFreqaiModel(ABC):
dataframe = self.dk.use_strategy_to_populate_indicators( dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"] strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
) )
if not self.save_live_data_backtest:
dk = self.start_backtesting(dataframe, metadata, self.dk) dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe) dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
dk = self.start_backtesting_from_live_saved_files(
dataframe, metadata, self.dk)
dataframe = dk.return_dataframe
self.clean_up() self.clean_up()
if self.live: if self.live:
self.inference_timer('stop', metadata["pair"]) self.inference_timer('stop', metadata["pair"])
if self.save_live_data_backtest:
dk.save_backtesting_live_dataframe(dataframe, metadata["pair"])
return dataframe return dataframe
def clean_up(self): def clean_up(self):
@ -310,6 +323,31 @@ class IFreqaiModel(ABC):
return dk return dk
def start_backtesting_from_live_saved_files(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
"""
:param dataframe: DataFrame = strategy passed dataframe
:param metadata: Dict = pair metadata
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return:
FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
pair = metadata["pair"]
dk.return_dataframe = dataframe
dk.return_dataframe = dataframe
self.dk.set_backtesting_live_dataframe_path(pair)
saved_dataframe = self.dk.get_backtesting_live_dataframe()
columns_to_drop = list(set(dk.return_dataframe.columns).difference(
["date", "open", "high", "low", "close", "volume"]))
saved_dataframe = saved_dataframe.drop(
columns=["open", "high", "low", "close", "volume"])
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(dk.return_dataframe, saved_dataframe, how='left', on='date')
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
def start_live( def start_live(
self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen: ) -> FreqaiDataKitchen:

View File

@ -229,7 +229,12 @@ def get_timerange_backtest_live_models(config: Config) -> str:
""" """
dk = FreqaiDataKitchen(config) dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config) models_path = dk.get_full_models_path(config)
timerange: TimeRange = TimeRange()
if not config.get("save_live_data_backtest", True):
timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
else:
timerange = dk.get_timerange_from_backtesting_live_dataframe()
start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"