update code to use one prediction file / pair

This commit is contained in:
Wagner Costa 2022-11-22 15:15:42 -03:00
parent 91779bb28b
commit d09157efb8
3 changed files with 39 additions and 15 deletions

View File

@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from pandas import DataFrame, HDFStore
from scipy import stats
from sklearn import linear_model
from sklearn.cluster import DBSCAN
@ -74,6 +74,7 @@ class FreqaiDataKitchen:
self.training_features_list: List = []
self.model_filename: str = ""
self.backtesting_results_path = Path()
self.backtesting_h5_data: HDFStore = {}
self.backtest_predictions_folder: str = "backtesting_predictions"
self.live = live
self.pair = pair
@ -1319,7 +1320,7 @@ class FreqaiDataKitchen:
if not full_predictions_folder.is_dir():
full_predictions_folder.mkdir(parents=True, exist_ok=True)
append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
append_df.to_hdf(self.backtesting_results_path, key=self.model_filename)
def get_backtesting_prediction(
self
@ -1327,9 +1328,26 @@ class FreqaiDataKitchen:
"""
Get prediction dataframe from h5 file format
"""
append_df = pd.read_hdf(self.backtesting_results_path)
append_df = self.backtesting_h5_data[self.model_filename]
return append_df
def load_prediction_pair_file(
self
) -> None:
"""
Load prediction file if it exists
"""
pair_file_name = self.pair.split(':')[0].replace('/', '_').lower()
path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder /
f"{pair_file_name}_prediction.h5")
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
self.backtesting_h5_data = pd.HDFStore(path_to_predictionfile)
else:
self.backtesting_h5_data = {}
def check_if_backtest_prediction_is_valid(
self,
len_backtest_df: int
@ -1341,17 +1359,11 @@ class FreqaiDataKitchen:
:return:
:boolean: whether the prediction file is valid.
"""
path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder /
f"{self.model_filename}_prediction.h5")
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
if self.model_filename in self.backtesting_h5_data:
append_df = self.get_backtesting_prediction()
if len(append_df) == len_backtest_df and 'date' in append_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
logger.info("Found backtesting prediction file "
f"at {self.backtesting_results_path.name}")
return True
else:
logger.info("A new backtesting prediction file is required. "
@ -1360,7 +1372,8 @@ class FreqaiDataKitchen:
return False
else:
logger.info(
f"Could not find backtesting prediction file at {path_to_predictionfile}"
"Could not find backtesting prediction file "
f"at {self.backtesting_results_path.name}"
)
return False

View File

@ -260,6 +260,7 @@ class IFreqaiModel(ABC):
self.pair_it += 1
train_it = 0
dk.load_prediction_pair_file()
# Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly

View File

@ -263,7 +263,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"}
pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk)
model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
@ -286,6 +288,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
pair = "ADA/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk)
assert log_has_re(
@ -293,9 +298,14 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
caplog,
)
pair = "ETH/BTC"
metadata = {"pair": pair}
freqai.dk.pair = pair
freqai.start_backtesting(df, metadata, freqai.dk)
path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder)
prediction_files = [x for x in path.iterdir() if x.is_file()]
assert len(prediction_files) == 5
assert len(prediction_files) == 2
shutil.rmtree(Path(freqai.dk.full_path))