update code to use one prediction file / pair

This commit is contained in:
Wagner Costa 2022-11-22 15:15:42 -03:00
parent 91779bb28b
commit d09157efb8
3 changed files with 39 additions and 15 deletions

View File

@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame, HDFStore
from scipy import stats from scipy import stats
from sklearn import linear_model from sklearn import linear_model
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN
@ -74,6 +74,7 @@ class FreqaiDataKitchen:
self.training_features_list: List = [] self.training_features_list: List = []
self.model_filename: str = "" self.model_filename: str = ""
self.backtesting_results_path = Path() self.backtesting_results_path = Path()
self.backtesting_h5_data: HDFStore = {}
self.backtest_predictions_folder: str = "backtesting_predictions" self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live self.live = live
self.pair = pair self.pair = pair
@ -1319,7 +1320,7 @@ class FreqaiDataKitchen:
if not full_predictions_folder.is_dir(): if not full_predictions_folder.is_dir():
full_predictions_folder.mkdir(parents=True, exist_ok=True) full_predictions_folder.mkdir(parents=True, exist_ok=True)
append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w') append_df.to_hdf(self.backtesting_results_path, key=self.model_filename)
def get_backtesting_prediction( def get_backtesting_prediction(
self self
@ -1327,9 +1328,26 @@ class FreqaiDataKitchen:
""" """
Get prediction dataframe from h5 file format Get prediction dataframe from h5 file format
""" """
append_df = pd.read_hdf(self.backtesting_results_path) append_df = self.backtesting_h5_data[self.model_filename]
return append_df return append_df
def load_prediction_pair_file(
self
) -> None:
"""
Load prediction file if it exists
"""
pair_file_name = self.pair.split(':')[0].replace('/', '_').lower()
path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder /
f"{pair_file_name}_prediction.h5")
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
self.backtesting_h5_data = pd.HDFStore(path_to_predictionfile)
else:
self.backtesting_h5_data = {}
def check_if_backtest_prediction_is_valid( def check_if_backtest_prediction_is_valid(
self, self,
len_backtest_df: int len_backtest_df: int
@ -1341,17 +1359,11 @@ class FreqaiDataKitchen:
:return: :return:
:boolean: whether the prediction file is valid. :boolean: whether the prediction file is valid.
""" """
path_to_predictionfile = Path(self.full_path / if self.model_filename in self.backtesting_h5_data:
self.backtest_predictions_folder /
f"{self.model_filename}_prediction.h5")
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
append_df = self.get_backtesting_prediction() append_df = self.get_backtesting_prediction()
if len(append_df) == len_backtest_df and 'date' in append_df: if len(append_df) == len_backtest_df and 'date' in append_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") logger.info("Found backtesting prediction file "
f"at {self.backtesting_results_path.name}")
return True return True
else: else:
logger.info("A new backtesting prediction file is required. " logger.info("A new backtesting prediction file is required. "
@ -1360,7 +1372,8 @@ class FreqaiDataKitchen:
return False return False
else: else:
logger.info( logger.info(
f"Could not find backtesting prediction file at {path_to_predictionfile}" "Could not find backtesting prediction file "
f"at {self.backtesting_results_path.name}"
) )
return False return False

View File

@ -260,6 +260,7 @@ class IFreqaiModel(ABC):
self.pair_it += 1 self.pair_it += 1
train_it = 0 train_it = 0
dk.load_prediction_pair_file()
# Loop enforcing the sliding window training/backtesting paradigm # Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month # tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly # tr_backtest is the backtesting time range e.g. the week directly

View File

@ -263,7 +263,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"} pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk) freqai.start_backtesting(df, metadata, freqai.dk)
model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()] model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
@ -286,6 +288,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC") df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk) freqai.start_backtesting(df, metadata, freqai.dk)
assert log_has_re( assert log_has_re(
@ -293,9 +298,14 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
caplog, caplog,
) )
pair = "ETH/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk)
path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder) path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder)
prediction_files = [x for x in path.iterdir() if x.is_file()] prediction_files = [x for x in path.iterdir() if x.is_file()]
assert len(prediction_files) == 5 assert len(prediction_files) == 2
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))