From d09157efb89a947e24451babd5b1ff11f3fa58e0 Mon Sep 17 00:00:00 2001
From: Wagner Costa <wagner@wagnercosta.com.br>
Date: Tue, 22 Nov 2022 15:15:42 -0300
Subject: [PATCH] update code to use one prediction file / pair

---
 freqtrade/freqai/data_kitchen.py      | 39 ++++++++++++++++++---------
 freqtrade/freqai/freqai_interface.py  |  1 +
 tests/freqai/test_freqai_interface.py | 14 ++++++++--
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index f75fd3dd8..65f3483af 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
-from pandas import DataFrame
+from pandas import DataFrame, HDFStore
 from scipy import stats
 from sklearn import linear_model
 from sklearn.cluster import DBSCAN
@@ -74,6 +74,7 @@ class FreqaiDataKitchen:
         self.training_features_list: List = []
         self.model_filename: str = ""
         self.backtesting_results_path = Path()
+        self.backtesting_h5_data: HDFStore = {}
         self.backtest_predictions_folder: str = "backtesting_predictions"
         self.live = live
         self.pair = pair
@@ -1319,7 +1320,7 @@ class FreqaiDataKitchen:
         if not full_predictions_folder.is_dir():
             full_predictions_folder.mkdir(parents=True, exist_ok=True)
 
-        append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
+        append_df.to_hdf(self.backtesting_results_path, key=self.model_filename)
 
     def get_backtesting_prediction(
         self
@@ -1327,9 +1328,26 @@ class FreqaiDataKitchen:
         """
         Get prediction dataframe from h5 file format
         """
-        append_df = pd.read_hdf(self.backtesting_results_path)
+        append_df = self.backtesting_h5_data[self.model_filename]
         return append_df
 
+    def load_prediction_pair_file(
+        self
+    ) -> None:
+        """
+        Load prediction file if it exists
+        """
+        pair_file_name = self.pair.split(':')[0].replace('/', '_').lower()
+        path_to_predictionfile = Path(self.full_path /
+                                      self.backtest_predictions_folder /
+                                      f"{pair_file_name}_prediction.h5")
+        self.backtesting_results_path = path_to_predictionfile
+        file_exists = path_to_predictionfile.is_file()
+        if file_exists:
+            self.backtesting_h5_data = pd.HDFStore(path_to_predictionfile)
+        else:
+            self.backtesting_h5_data = {}
+
     def check_if_backtest_prediction_is_valid(
         self,
         len_backtest_df: int
@@ -1341,17 +1359,11 @@ class FreqaiDataKitchen:
         :return:
         :boolean: whether the prediction file is valid.
         """
-        path_to_predictionfile = Path(self.full_path /
-                                      self.backtest_predictions_folder /
-                                      f"{self.model_filename}_prediction.h5")
-        self.backtesting_results_path = path_to_predictionfile
-
-        file_exists = path_to_predictionfile.is_file()
-
-        if file_exists:
+        if self.model_filename in self.backtesting_h5_data:
             append_df = self.get_backtesting_prediction()
             if len(append_df) == len_backtest_df and 'date' in append_df:
-                logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
+                logger.info("Found backtesting prediction file "
+                            f"at {self.backtesting_results_path.name}")
                 return True
             else:
                 logger.info("A new backtesting prediction file is required. "
@@ -1360,7 +1372,8 @@ class FreqaiDataKitchen:
                 return False
         else:
             logger.info(
-                f"Could not find backtesting prediction file at {path_to_predictionfile}"
+                "Could not find backtesting prediction file "
+                f"at {self.backtesting_results_path.name}"
             )
             return False
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 80348fda8..21851b3b6 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -260,6 +260,7 @@ class IFreqaiModel(ABC):
 
         self.pair_it += 1
         train_it = 0
+        dk.load_prediction_pair_file()
         # Loop enforcing the sliding window training/backtesting paradigm
         # tr_train is the training time range e.g. 1 historical month
         # tr_backtest is the backtesting time range e.g. the week directly
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 66b3bac17..6e2e774fe 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -263,7 +263,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
 
     df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
 
-    metadata = {"pair": "ADA/BTC"}
+    pair = "ADA/BTC"
+    metadata = {"pair": pair}
+    freqai.dk.pair = pair
     freqai.start_backtesting(df, metadata, freqai.dk)
     model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
 
@@ -286,6 +288,9 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
 
     df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
 
+    pair = "ADA/BTC"
+    metadata = {"pair": pair}
+    freqai.dk.pair = pair
     freqai.start_backtesting(df, metadata, freqai.dk)
 
     assert log_has_re(
@@ -293,9 +298,14 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
         caplog,
     )
 
+    pair = "ETH/BTC"
+    metadata = {"pair": pair}
+    freqai.dk.pair = pair
+    freqai.start_backtesting(df, metadata, freqai.dk)
+
     path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder)
     prediction_files = [x for x in path.iterdir() if x.is_file()]
-    assert len(prediction_files) == 5
+    assert len(prediction_files) == 2
 
     shutil.rmtree(Path(freqai.dk.full_path))