From df51da22ee699e9a362d980747dba28e578d6c47 Mon Sep 17 00:00:00 2001
From: Wagner Costa Santos <wagner@wagnercosta.com.br>
Date: Wed, 31 Aug 2022 11:23:48 -0300
Subject: [PATCH 1/5] refactoring freqai backtesting

---
 freqtrade/freqai/data_kitchen.py     | 37 ++++++++++-
 freqtrade/freqai/freqai_interface.py | 98 ++++++++++++++++++++++++----
 2 files changed, 119 insertions(+), 16 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 763a07375..80b795b8e 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1,6 +1,7 @@
 import copy
 import datetime
 import logging
+import os
 import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -780,9 +781,10 @@ class FreqaiDataKitchen:
         weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
         return weights
 
-    def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
+    def get_predictions_to_append(self, predictions: DataFrame,
+                                  do_predict: npt.ArrayLike) -> DataFrame:
         """
-        Append backtest prediction from current backtest period to all previous periods
+        Get backtest prediction from current backtest period
         """
 
         append_df = DataFrame()
@@ -797,12 +799,19 @@ class FreqaiDataKitchen:
         if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
             append_df["DI_values"] = self.DI_values
 
+        return append_df
+
+    def append_predictions(self, append_df: DataFrame) -> None:
+        """
+        Append backtest prediction from current backtest period to all previous periods
+        """
+
         if self.full_df.empty:
             self.full_df = append_df
         else:
             self.full_df = pd.concat([self.full_df, append_df], axis=0)
 
-        return
+        return append_df
 
     def fill_predictions(self, dataframe):
         """
@@ -1089,3 +1098,25 @@ class FreqaiDataKitchen:
         if self.unique_classes:
             for label in self.unique_classes:
                 self.unique_class_list += list(self.unique_classes[label])
+
+    def save_backtesting_prediction(
+        self, file_name: str, root_folder: str, append_df: DataFrame
+    ) -> None:
+
+        """
+        Save prediction dataframe from backtesting to h5 file format
+        :param file_name: h5 file name
+        :param root_folder: folder to save h5 file
+        """
+        os.makedirs(root_folder, exist_ok=True)
+        append_df.to_hdf(file_name, key='append_df', mode='w')
+
+    def get_backtesting_prediction(self, prediction_file_name: str) -> DataFrame:
+        """
+        Retrive from disk the prediction dataframe
+        :param prediction_file_name: prediction file full path
+        :return:
+        :Dataframe: Backtesting prediction from current backtesting period
+        """
+        append_df = pd.read_hdf(prediction_file_name)
+        return append_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 4106f24e0..d396113e8 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -224,28 +224,50 @@ class IFreqaiModel(ABC):
                 "trains"
             )
 
+            trained_timestamp_int = int(trained_timestamp.stopts)
             dk.data_path = Path(
                 dk.full_path
                 /
-                f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
+                f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                 )
-            if not self.model_exists(
-                metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
+
+            if self.backtest_prediction_exists(
+                metadata["pair"], dk, trained_timestamp=trained_timestamp_int
             ):
-                dk.find_features(dataframe_train)
-                self.model = self.train(dataframe_train, metadata["pair"], dk)
-                self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
-                    trained_timestamp.stopts)
-                dk.set_new_model_names(metadata["pair"], trained_timestamp)
-                self.dd.save_data(self.model, metadata["pair"], dk)
+                prediction_filename, _ = self.get_backtesting_prediction_file_name(
+                    metadata["pair"],
+                    dk,
+                    trained_timestamp=int(trained_timestamp.stopts))
+
+                append_df = dk.get_backtesting_prediction(prediction_filename)
+                dk.append_predictions(append_df)
             else:
-                self.model = self.dd.load_data(metadata["pair"], dk)
+                if not self.model_exists(
+                    metadata["pair"], dk, trained_timestamp=trained_timestamp_int
+                ):
+                    dk.find_features(dataframe_train)
+                    self.model = self.train(dataframe_train, metadata["pair"], dk)
+                    self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
+                        trained_timestamp.stopts)
+                    dk.set_new_model_names(metadata["pair"], trained_timestamp)
+                    self.dd.save_data(self.model, metadata["pair"], dk)
+                else:
+                    self.model = self.dd.load_data(metadata["pair"], dk)
 
-            self.check_if_feature_list_matches_strategy(dataframe_train, dk)
+                self.check_if_feature_list_matches_strategy(dataframe_train, dk)
 
-            pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                append_df = dk.get_predictions_to_append(pred_df, do_preds)
+                dk.append_predictions(append_df)
 
-            dk.append_predictions(pred_df, do_preds)
+                prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
+                    metadata["pair"],
+                    dk,
+                    trained_timestamp_int)
+
+                dk.save_backtesting_prediction(prediction_file_name,
+                                               root_prediction,
+                                               append_df)
 
         dk.fill_predictions(dataframe)
 
@@ -643,6 +665,56 @@ class IFreqaiModel(ABC):
                 self.train_time = 0
         return
 
+    def backtest_prediction_exists(
+        self,
+        pair: str,
+        dk: FreqaiDataKitchen,
+        trained_timestamp: int,
+        scanning: bool = False,
+    ) -> bool:
+        """
+        Given a pair and path, check if a backtesting prediction already exists
+        :param pair: pair e.g. BTC/USD
+        :param path: path to prediction
+        :return:
+        :boolean: whether the prediction file exists or not.
+        """
+        if not self.live:
+            prediction_file_name, _ = self.get_backtesting_prediction_file_name(
+                pair, dk, trained_timestamp
+            )
+            path_to_predictionfile = Path(prediction_file_name)
+
+            file_exists = path_to_predictionfile.is_file()
+            if file_exists and not scanning:
+                logger.info("Found backtesting prediction file at %s", prediction_file_name)
+            elif not scanning:
+                logger.info(
+                    "Could not find backtesting prediction file at %s", prediction_file_name
+                )
+            return file_exists
+        else:
+            return False
+
+    def get_backtesting_prediction_file_name(
+        self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int
+    ):
+        """
+        Given a pair, path and a trained timestamp,
+        returns the path and name of the predictions file
+        :param pair: pair e.g. BTC/USD
+        :param dk: FreqaiDataKitchen
+        :trained_timestamp: current backtesting timestamp period
+        :return:
+        :str: prediction file name
+        :str: prediction root path
+        """
+        coin, _ = pair.split("/")
+        prediction_base_filename = f"{coin.lower()}_{trained_timestamp}"
+        root_prediction = f'{dk.full_path}/backtesting_predictions'
+        prediction_file_name = f"{root_prediction}/{prediction_base_filename}_predictions.h5"
+        return prediction_file_name, root_prediction
+
     # Following methods which are overridden by user made prediction models.
     # See freqai/prediction_models/CatboostPredictionModel.py for an example.
 

From 7bed0450d2c6ae90dd00d98a51b18701be6c4874 Mon Sep 17 00:00:00 2001
From: Wagner Costa Santos <wagner@wagnercosta.com.br>
Date: Wed, 31 Aug 2022 15:36:29 -0300
Subject: [PATCH 2/5] pr review - refactoring backtesting freqai

---
 freqtrade/freqai/data_kitchen.py      | 19 ++++++++++++++-----
 freqtrade/freqai/freqai_interface.py  | 12 ++++++------
 tests/freqai/test_freqai_interface.py |  8 ++++----
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 80b795b8e..8dc94e9ec 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1,7 +1,6 @@
 import copy
 import datetime
 import logging
-import os
 import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -1108,15 +1107,25 @@ class FreqaiDataKitchen:
         :param file_name: h5 file name
         :param root_folder: folder to save h5 file
         """
-        os.makedirs(root_folder, exist_ok=True)
-        append_df.to_hdf(file_name, key='append_df', mode='w')
+        backtesting_root = Path(
+            self.full_path
+            / root_folder
+        )
+        if not backtesting_root.is_dir():
+            backtesting_root.mkdir(parents=True, exist_ok=True)
 
-    def get_backtesting_prediction(self, prediction_file_name: str) -> DataFrame:
+        full_file_path = Path(self.full_path / root_folder / file_name)
+        append_df.to_hdf(full_file_path, key='append_df', mode='w')
+
+    def get_backtesting_prediction(
+        self, root_prediction: str, prediction_file_name: str
+    ) -> DataFrame:
         """
         Retrive from disk the prediction dataframe
         :param prediction_file_name: prediction file full path
         :return:
         :Dataframe: Backtesting prediction from current backtesting period
         """
-        append_df = pd.read_hdf(prediction_file_name)
+        prediction_path = Path(self.full_path / root_prediction / prediction_file_name)
+        append_df = pd.read_hdf(prediction_path)
         return append_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index d396113e8..ad64588a7 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -234,12 +234,12 @@ class IFreqaiModel(ABC):
             if self.backtest_prediction_exists(
                 metadata["pair"], dk, trained_timestamp=trained_timestamp_int
             ):
-                prediction_filename, _ = self.get_backtesting_prediction_file_name(
+                prediction_filename, root_prediction = self.get_backtesting_prediction_file_name(
                     metadata["pair"],
                     dk,
                     trained_timestamp=int(trained_timestamp.stopts))
 
-                append_df = dk.get_backtesting_prediction(prediction_filename)
+                append_df = dk.get_backtesting_prediction(root_prediction, prediction_filename)
                 dk.append_predictions(append_df)
             else:
                 if not self.model_exists(
@@ -680,10 +680,10 @@ class IFreqaiModel(ABC):
         :boolean: whether the prediction file exists or not.
         """
         if not self.live:
-            prediction_file_name, _ = self.get_backtesting_prediction_file_name(
+            prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
                 pair, dk, trained_timestamp
             )
-            path_to_predictionfile = Path(prediction_file_name)
+            path_to_predictionfile = Path(dk.full_path / root_prediction / prediction_file_name)
 
             file_exists = path_to_predictionfile.is_file()
             if file_exists and not scanning:
@@ -711,8 +711,8 @@ class IFreqaiModel(ABC):
         """
         coin, _ = pair.split("/")
         prediction_base_filename = f"{coin.lower()}_{trained_timestamp}"
-        root_prediction = f'{dk.full_path}/backtesting_predictions'
-        prediction_file_name = f"{root_prediction}/{prediction_base_filename}_predictions.h5"
+        root_prediction = 'backtesting_predictions'
+        prediction_file_name = f"{prediction_base_filename}_predictions.h5"
         return prediction_file_name, root_prediction
 
     # Following methods which are overridden by user made prediction models.
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 792ffc467..09f5d27ff 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -192,7 +192,7 @@ def test_start_backtesting(mocker, freqai_conf):
     freqai.start_backtesting(df, metadata, freqai.dk)
     model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
 
-    assert len(model_folders) == 5
+    assert len(model_folders) == 6
 
     shutil.rmtree(Path(freqai.dk.full_path))
 
@@ -217,7 +217,7 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf):
     metadata = {"pair": "LTC/BTC"}
     freqai.start_backtesting(df, metadata, freqai.dk)
     model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
-    assert len(model_folders) == 8
+    assert len(model_folders) == 9
 
     shutil.rmtree(Path(freqai.dk.full_path))
 
@@ -242,7 +242,7 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
     freqai.start_backtesting(df, metadata, freqai.dk)
     model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
 
-    assert len(model_folders) == 5
+    assert len(model_folders) == 6
 
     # without deleting the exiting folder structure, re-run
 
@@ -263,7 +263,7 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
     freqai.start_backtesting(df, metadata, freqai.dk)
 
     assert log_has_re(
-        "Found model at ",
+        "Found backtesting prediction ",
         caplog,
     )
 

From d6e115178a117e6f22f648b1d0def25b90eec471 Mon Sep 17 00:00:00 2001
From: Wagner Costa Santos <wagner@wagnercosta.com.br>
Date: Thu, 1 Sep 2022 07:09:23 -0300
Subject: [PATCH 3/5] refactoring freqai backtesting - remove duplicate code

---
 freqtrade/freqai/data_kitchen.py     | 32 ++++++--------
 freqtrade/freqai/freqai_interface.py | 62 ++++++----------------------
 2 files changed, 24 insertions(+), 70 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 361d9872d..f88e20223 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -69,6 +69,8 @@ class FreqaiDataKitchen:
         self.label_list: List = []
         self.training_features_list: List = []
         self.model_filename: str = ""
+        self.backtesting_results_path = Path()
+        self.backtesting_prediction_folder: str = "backtesting_predictions"
         self.live = live
         self.pair = pair
 
@@ -808,8 +810,6 @@ class FreqaiDataKitchen:
         else:
             self.full_df = pd.concat([self.full_df, append_df], axis=0)
 
-        return append_df
-
     def fill_predictions(self, dataframe):
         """
         Back fill values to before the backtesting range so that the dataframe matches size
@@ -1070,33 +1070,25 @@ class FreqaiDataKitchen:
                 self.unique_class_list += list(self.unique_classes[label])
 
     def save_backtesting_prediction(
-        self, file_name: str, root_folder: str, append_df: DataFrame
+        self, append_df: DataFrame
     ) -> None:
 
         """
         Save prediction dataframe from backtesting to h5 file format
-        :param file_name: h5 file name
-        :param root_folder: folder to save h5 file
+        :param append_df: dataframe for backtesting period
         """
-        backtesting_root = Path(
-            self.full_path
-            / root_folder
-        )
-        if not backtesting_root.is_dir():
-            backtesting_root.mkdir(parents=True, exist_ok=True)
+        full_predictions_folder = Path(self.full_path / self.backtesting_prediction_folder)
+        if not full_predictions_folder.is_dir():
+            full_predictions_folder.mkdir(parents=True, exist_ok=True)
 
-        full_file_path = Path(self.full_path / root_folder / file_name)
-        append_df.to_hdf(full_file_path, key='append_df', mode='w')
+        append_df.to_hdf(self.backtesting_results_path, key='append_df', mode='w')
 
     def get_backtesting_prediction(
-        self, root_prediction: str, prediction_file_name: str
+        self
     ) -> DataFrame:
+
         """
-        Retrive from disk the prediction dataframe
-        :param prediction_file_name: prediction file full path
-        :return:
-        :Dataframe: Backtesting prediction from current backtesting period
+        Get prediction dataframe from h5 file format
         """
-        prediction_path = Path(self.full_path / root_prediction / prediction_file_name)
-        append_df = pd.read_hdf(prediction_path)
+        append_df = pd.read_hdf(self.backtesting_results_path)
         return append_df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 2297811b4..0a63e36ea 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -231,15 +231,11 @@ class IFreqaiModel(ABC):
                 f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                 )
 
-            if self.backtest_prediction_exists(
-                metadata["pair"], dk, trained_timestamp=trained_timestamp_int
-            ):
-                prediction_filename, root_prediction = self.get_backtesting_prediction_file_name(
-                    metadata["pair"],
-                    dk,
-                    trained_timestamp=int(trained_timestamp.stopts))
+            coin, _ = metadata["pair"].split("/")
+            dk.model_filename = f"cb_{coin.lower()}_{trained_timestamp_int}"
 
-                append_df = dk.get_backtesting_prediction(root_prediction, prediction_filename)
+            if self.backtest_prediction_exists(dk):
+                append_df = dk.get_backtesting_prediction()
                 dk.append_predictions(append_df)
             else:
                 if not self.model_exists(
@@ -259,15 +255,7 @@ class IFreqaiModel(ABC):
                 pred_df, do_preds = self.predict(dataframe_backtest, dk)
                 append_df = dk.get_predictions_to_append(pred_df, do_preds)
                 dk.append_predictions(append_df)
-
-                prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
-                    metadata["pair"],
-                    dk,
-                    trained_timestamp_int)
-
-                dk.save_backtesting_prediction(prediction_file_name,
-                                               root_prediction,
-                                               append_df)
+                dk.save_backtesting_prediction(append_df)
 
         dk.fill_predictions(dataframe)
 
@@ -478,11 +466,6 @@ class IFreqaiModel(ABC):
         :return:
         :boolean: whether the model file exists or not.
         """
-        coin, _ = pair.split("/")
-
-        if not self.live:
-            dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
-
         path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
         file_exists = path_to_modelfile.is_file()
         if file_exists and not scanning:
@@ -661,23 +644,21 @@ class IFreqaiModel(ABC):
 
     def backtest_prediction_exists(
         self,
-        pair: str,
         dk: FreqaiDataKitchen,
-        trained_timestamp: int,
         scanning: bool = False,
     ) -> bool:
         """
-        Given a pair and path, check if a backtesting prediction already exists
-        :param pair: pair e.g. BTC/USD
-        :param path: path to prediction
+        Check if a backtesting prediction already exists
+        :param dk: FreqaiDataKitchen
         :return:
         :boolean: whether the prediction file exists or not.
         """
         if not self.live:
-            prediction_file_name, root_prediction = self.get_backtesting_prediction_file_name(
-                pair, dk, trained_timestamp
-            )
-            path_to_predictionfile = Path(dk.full_path / root_prediction / prediction_file_name)
+            prediction_file_name = dk.model_filename
+            path_to_predictionfile = Path(dk.full_path /
+                                          dk.backtesting_prediction_folder /
+                                          f"{prediction_file_name}_prediction.h5")
+            dk.backtesting_results_path = path_to_predictionfile
 
             file_exists = path_to_predictionfile.is_file()
             if file_exists and not scanning:
@@ -690,25 +671,6 @@ class IFreqaiModel(ABC):
         else:
             return False
 
-    def get_backtesting_prediction_file_name(
-        self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int
-    ):
-        """
-        Given a pair, path and a trained timestamp,
-        returns the path and name of the predictions file
-        :param pair: pair e.g. BTC/USD
-        :param dk: FreqaiDataKitchen
-        :trained_timestamp: current backtesting timestamp period
-        :return:
-        :str: prediction file name
-        :str: prediction root path
-        """
-        coin, _ = pair.split("/")
-        prediction_base_filename = f"{coin.lower()}_{trained_timestamp}"
-        root_prediction = 'backtesting_predictions'
-        prediction_file_name = f"{prediction_base_filename}_predictions.h5"
-        return prediction_file_name, root_prediction
-
     # Following methods which are overridden by user made prediction models.
     # See freqai/prediction_models/CatboostPredictionModel.py for an example.
 

From af5460cebf2b17ea440f1d7f037a7b8c88681d6a Mon Sep 17 00:00:00 2001
From: Wagner Costa Santos <wagner@wagnercosta.com.br>
Date: Fri, 2 Sep 2022 22:01:53 -0300
Subject: [PATCH 4/5] Add option to keep models only in memory for backtest

---
 config_examples/config_freqai.example.json | 3 ++-
 docs/freqai.md                             | 1 +
 freqtrade/freqai/freqai_interface.py       | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json
index 13c7a94ea..846d37a82 100644
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@@ -56,6 +56,7 @@
         "purge_old_models": true,
         "train_period_days": 15,
         "backtest_period_days": 7,
+        "backtest_save_model": true,
         "live_retrain_hours": 0,
         "identifier": "uniqe-id",
         "feature_parameters": {
@@ -94,4 +95,4 @@
     "internals": {
         "process_throttle_secs": 5
     }
-}
\ No newline at end of file
+}
diff --git a/docs/freqai.md b/docs/freqai.md
index 482a56d2b..6ee124b9b 100644
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -93,6 +93,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
 | `purge_old_models` | Delete obsolete models (otherwise, all historic models will remain on disk). <br> **Datatype:** Boolean. Default: `False`.
 | `train_period_days` | **Required.** <br> Number of days to use for the training data (width of the sliding window). <br> **Datatype:** Positive integer.
 | `backtest_period_days` | **Required.** <br> Number of days to inference from the trained model before sliding the window defined above, and retraining the model. This can be fractional days, but beware that the user-provided `timerange` will be divided by this number to yield the number of trainings necessary to complete the backtest. <br> **Datatype:** Float.
+| `backtest_save_model` | Saves models to disk when running backtesting. <br> **Datatype:** Boolean. Default: `True`.
 | `identifier` | **Required.** <br> A unique name for the current model. This can be reused to reload pre-trained models/data. <br> **Datatype:** String.
 | `live_retrain_hours` | Frequency of retraining during dry/live runs. <br> Default set to 0, which means the model will retrain as often as possible. <br> **Datatype:** Float > 0.
 | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> Defaults set to 0, which means models never expire. <br> **Datatype:** Positive integer.
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 0a63e36ea..9c7ef05a7 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -71,6 +71,7 @@ class IFreqaiModel(ABC):
         self.first = True
         self.set_full_path()
         self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
+        self.backtest_save_model: bool = self.freqai_info.get("backtest_save_model", True)
         self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
         self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
         self.scanning = False
@@ -246,7 +247,8 @@ class IFreqaiModel(ABC):
                     self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
                         trained_timestamp.stopts)
                     dk.set_new_model_names(metadata["pair"], trained_timestamp)
-                    self.dd.save_data(self.model, metadata["pair"], dk)
+                    if self.backtest_save_model:
+                        self.dd.save_data(self.model, metadata["pair"], dk)
                 else:
                     self.model = self.dd.load_data(metadata["pair"], dk)
 

From 599c1c79fb8170a7ea4bf9d250a4a3db0a3234ba Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 3 Sep 2022 14:00:01 +0200
Subject: [PATCH 5/5] reorganized backtest utilities, test new functionality,
 improve/update doc

---
 config_examples/config_freqai.example.json   |  2 -
 docs/freqai.md                               | 32 +++++++------
 freqtrade/freqai/data_kitchen.py             | 27 ++++++++++-
 freqtrade/freqai/freqai_interface.py         | 50 +++++---------------
 freqtrade/templates/FreqaiExampleStrategy.py |  2 +-
 tests/freqai/test_freqai_interface.py        |  9 +++-
 6 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json
index 846d37a82..12eb30128 100644
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@@ -56,7 +56,6 @@
         "purge_old_models": true,
         "train_period_days": 15,
         "backtest_period_days": 7,
-        "backtest_save_model": true,
         "live_retrain_hours": 0,
         "identifier": "uniqe-id",
         "feature_parameters": {
@@ -75,7 +74,6 @@
             "weight_factor": 0.9,
             "principal_component_analysis": false,
             "use_SVM_to_remove_outliers": true,
-            "indicator_max_period_candles": 20,
             "indicator_periods_candles": [
                 10,
                 20
diff --git a/docs/freqai.md b/docs/freqai.md
index 6ee124b9b..3646362c3 100644
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -89,11 +89,10 @@ Mandatory parameters are marked as **Required**, which means that they are requi
 |------------|-------------|
 |  |  **General configuration parameters**
 | `freqai` | **Required.** <br> The parent dictionary containing all the parameters for controlling FreqAI. <br> **Datatype:** Dictionary.
-| `startup_candles` | Number of candles needed for *backtesting only* to ensure all indicators are non NaNs at the start of the first train period. <br> **Datatype:** Positive integer.
 | `purge_old_models` | Delete obsolete models (otherwise, all historic models will remain on disk). <br> **Datatype:** Boolean. Default: `False`.
 | `train_period_days` | **Required.** <br> Number of days to use for the training data (width of the sliding window). <br> **Datatype:** Positive integer.
 | `backtest_period_days` | **Required.** <br> Number of days to inference from the trained model before sliding the window defined above, and retraining the model. This can be fractional days, but beware that the user-provided `timerange` will be divided by this number to yield the number of trainings necessary to complete the backtest. <br> **Datatype:** Float.
-| `backtest_save_model` | Saves models to disk when running backtesting. <br> **Datatype:** Boolean. Default: `True`.
+| `save_backtest_models` | Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when users wish to tune entry/exit parameters). If a user wishes to save models to disk when running backtesting, they should activate `save_backtest_models`. A user may wish to do this if they plan to use the same model files for starting a dry/live instance with the same `identifier`. <br> **Datatype:** Boolean. Default: `False`.
 | `identifier` | **Required.** <br> A unique name for the current model. This can be reused to reload pre-trained models/data. <br> **Datatype:** String.
 | `live_retrain_hours` | Frequency of retraining during dry/live runs. <br> Default set to 0, which means the model will retrain as often as possible. <br> **Datatype:** Float > 0.
 | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> Defaults set to 0, which means models never expire. <br> **Datatype:** Positive integer.
@@ -280,6 +279,17 @@ The FreqAI strategy requires the user to include the following lines of code in
 
 Notice how the `populate_any_indicators()` is where the user adds their own features ([more information](#feature-engineering)) and labels ([more information](#setting-classifier-targets)). See a full example at `templates/FreqaiExampleStrategy.py`.
 
+### Setting the `startup_candle_count`
+Users need to take care to set the `startup_candle_count` in their strategy the same way they would for any normal Freqtrade strategy (see details [here](strategy-customization.md/#strategy-startup-period)). This value is used by Freqtrade to ensure that a sufficient amount of data is provided when calling on the `dataprovider` to avoid any NaNs at the beginning of the first training. Users can easily set this value by identifying the longest period (in candle units) that they pass to their indicator creation functions (e.g. talib functions). In the present example, the user would pass 20 to as this value (since it is the maximum value in their `indicators_periods_candles`).
+
+!!! Note
+    Typically it is best for users to be safe and multiply their expected `startup_candle_count` by 2. There are instances where the talib functions actually require more data than just the passed `period`. Anecdotally, multiplying the `startup_candle_count` by 2 always leads to a fully NaN free training dataset. Look out for this log message to confirm that your data is clean:
+
+    ```
+    2022-08-31 15:14:04 - freqtrade.freqai.data_kitchen - INFO - dropped 0 training points due to NaNs in populated dataset 4319.
+    ```
+
+
 ## Creating a dynamic target
 
 The `&*_std/mean` return values describe the statistical fit of the user defined label *during the most recent training*. This value allows the user to know the rarity of a given prediction. For example, `templates/FreqaiExampleStrategy.py`, creates a `target_roi` which is based on filtering out predictions that are below a given z-score of 1.25.
@@ -505,7 +515,7 @@ and if a full `live_retrain_hours` has elapsed since the end of the loaded model
 The FreqAI backtesting module can be executed with the following command:
 
 ```bash
-freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel LightGBMRegressor --timerange 20210501-20210701
+freqtrade backtesting --strategy FreqaiExampleStrategy --config config_examples/config_freqai.example.json --freqaimodel LightGBMRegressor --timerange 20210501-20210701
 ```
 
 Backtesting mode requires the user to have the data pre-downloaded (unlike in dry/live mode where FreqAI automatically downloads the necessary data). The user should be careful to consider that the time range of the downloaded data is more than the backtesting time range. This is because FreqAI needs data prior to the desired backtesting time range in order to train a model to be ready to make predictions on the first candle of the user-set backtesting time range. More details on how to calculate the data to download can be found [here](#deciding-the-sliding-training-window-and-backtesting-duration).
@@ -532,20 +542,14 @@ the user is asking FreqAI to use a training period of 30 days and backtest on th
 This means that if the user sets `--timerange 20210501-20210701`,
 FreqAI will train have trained 8 separate models at the end of `--timerange` (because the full range comprises 8 weeks). After the training of the model, FreqAI will backtest the subsequent 7 days. The "sliding window" then moves one week forward (emulating FreqAI retraining once per week in live mode) and the new model uses the previous 30 days (including the 7 days used for backtesting by the previous model) to train. This is repeated until the end of `--timerange`.
 
-In live mode, the required training data is automatically computed and downloaded. However, in backtesting mode,
-the user must manually enter the required number of `startup_candles` in the config. This value
-is used to increase the data to FreqAI, which should be sufficient to enable all indicators
-to be NaN free at the beginning of the first training. This is done by identifying the
-longest timeframe (`4h` in presented example config) and the longest indicator period (`20` days in presented example config)
-and adding this to the `train_period_days`. The units need to be in the base candle time frame:
-`startup_candles` = ( 4 hours * 20 max period * 60 minutes/hour + 30 day train_period_days * 1440 minutes per day ) / 5 min (base time frame) = 9360.
-
-!!! Note
-    In dry/live mode, this is all precomputed and handled automatically. Thus, `startup_candle` has no influence on dry/live mode.
-
 !!! Note
     Although fractional `backtest_period_days` is allowed, the user should be aware that the `--timerange` is divided by this value to determine the number of models that FreqAI will need to train in order to backtest the full range. For example, if the user wants to set a `--timerange` of 10 days, and asks for a `backtest_period_days` of 0.1, FreqAI will need to train 100 models per pair to complete the full backtest. Because of this, a true backtest of FreqAI adaptive training would take a *very* long time. The best way to fully test a model is to run it dry and let it constantly train. In this case, backtesting would take the exact same amount of time as a dry run.
 
+### Downloading data for backtesting
+Live/dry instances will download the data automatically for the user, but users who wish to use backtesting functionality still need to download the necessary data using `download-data` (details [here](data-download/#data-downloading)). FreqAI users need to pay careful attention to understanding how much *additional* data needs to be downloaded to ensure that they have a sufficient amount of training data *before* the start of their backtesting timerange. The amount of additional data can be roughly estimated by taking subtracting `train_period_days` and the `startup_candle_count` ([details](#setting-the-startupcandlecount)) from the beginning of the desired backtesting timerange. 
+
+As an example, if we wish to backtest the `--timerange` above of `20210501-20210701`, and we use the example config which sets `train_period_days` to 15. The startup candle count is 40 on a maximum `include_timeframes` of 1h. We would need 20210501 - 15 days - 40 * 1h / 24 hours = 20210414 (16.7 days earlier than the start of the desired training timerange).
+
 ### Defining model expirations
 
 During dry/live mode, FreqAI trains each coin pair sequentially (on separate threads/GPU from the main Freqtrade bot). This means that there is always an age discrepancy between models. If a user is training on 50 pairs, and each pair requires 5 minutes to train, the oldest model will be over 4 hours old. This may be undesirable if the characteristic time scale (the trade duration target) for a strategy is less than 4 hours. The user can decide to only make trade entries if the model is less than
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index f88e20223..13af1e0d2 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -70,7 +70,7 @@ class FreqaiDataKitchen:
         self.training_features_list: List = []
         self.model_filename: str = ""
         self.backtesting_results_path = Path()
-        self.backtesting_prediction_folder: str = "backtesting_predictions"
+        self.backtest_predictions_folder: str = "backtesting_predictions"
         self.live = live
         self.pair = pair
 
@@ -1077,7 +1077,7 @@ class FreqaiDataKitchen:
         Save prediction dataframe from backtesting to h5 file format
         :param append_df: dataframe for backtesting period
         """
-        full_predictions_folder = Path(self.full_path / self.backtesting_prediction_folder)
+        full_predictions_folder = Path(self.full_path / self.backtest_predictions_folder)
         if not full_predictions_folder.is_dir():
             full_predictions_folder.mkdir(parents=True, exist_ok=True)
 
@@ -1092,3 +1092,26 @@ class FreqaiDataKitchen:
         """
         append_df = pd.read_hdf(self.backtesting_results_path)
         return append_df
+
+    def check_if_backtest_prediction_exists(
+        self
+    ) -> bool:
+        """
+        Check if a backtesting prediction already exists
+        :param dk: FreqaiDataKitchen
+        :return:
+        :boolean: whether the prediction file exists or not.
+        """
+        path_to_predictionfile = Path(self.full_path /
+                                      self.backtest_predictions_folder /
+                                      f"{self.model_filename}_prediction.h5")
+        self.backtesting_results_path = path_to_predictionfile
+
+        file_exists = path_to_predictionfile.is_file()
+        if file_exists:
+            logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
+        else:
+            logger.info(
+                f"Could not find backtesting prediction file at {path_to_predictionfile}"
+            )
+        return file_exists
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 9c7ef05a7..399568c7d 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -71,7 +71,9 @@ class IFreqaiModel(ABC):
         self.first = True
         self.set_full_path()
         self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
-        self.backtest_save_model: bool = self.freqai_info.get("backtest_save_model", True)
+        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
+        if self.save_backtest_models:
+            logger.info('Backtesting module configured to save all models.')
         self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
         self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
         self.scanning = False
@@ -125,10 +127,9 @@ class IFreqaiModel(ABC):
         elif not self.follow_mode:
             self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
             logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
-            with self.analysis_lock:
-                dataframe = self.dk.use_strategy_to_populate_indicators(
-                    strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
-                )
+            dataframe = self.dk.use_strategy_to_populate_indicators(
+                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
+            )
             dk = self.start_backtesting(dataframe, metadata, self.dk)
 
         dataframe = dk.remove_features_from_df(dk.return_dataframe)
@@ -232,10 +233,9 @@ class IFreqaiModel(ABC):
                 f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                 )
 
-            coin, _ = metadata["pair"].split("/")
-            dk.model_filename = f"cb_{coin.lower()}_{trained_timestamp_int}"
+            dk.set_new_model_names(metadata["pair"], trained_timestamp)
 
-            if self.backtest_prediction_exists(dk):
+            if dk.check_if_backtest_prediction_exists():
                 append_df = dk.get_backtesting_prediction()
                 dk.append_predictions(append_df)
             else:
@@ -246,8 +246,9 @@ class IFreqaiModel(ABC):
                     self.model = self.train(dataframe_train, metadata["pair"], dk)
                     self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
                         trained_timestamp.stopts)
-                    dk.set_new_model_names(metadata["pair"], trained_timestamp)
-                    if self.backtest_save_model:
+
+                    if self.save_backtest_models:
+                        logger.info('Saving backtest model to disk.')
                         self.dd.save_data(self.model, metadata["pair"], dk)
                 else:
                     self.model = self.dd.load_data(metadata["pair"], dk)
@@ -644,35 +645,6 @@ class IFreqaiModel(ABC):
                 self.train_time = 0
         return
 
-    def backtest_prediction_exists(
-        self,
-        dk: FreqaiDataKitchen,
-        scanning: bool = False,
-    ) -> bool:
-        """
-        Check if a backtesting prediction already exists
-        :param dk: FreqaiDataKitchen
-        :return:
-        :boolean: whether the prediction file exists or not.
-        """
-        if not self.live:
-            prediction_file_name = dk.model_filename
-            path_to_predictionfile = Path(dk.full_path /
-                                          dk.backtesting_prediction_folder /
-                                          f"{prediction_file_name}_prediction.h5")
-            dk.backtesting_results_path = path_to_predictionfile
-
-            file_exists = path_to_predictionfile.is_file()
-            if file_exists and not scanning:
-                logger.info("Found backtesting prediction file at %s", prediction_file_name)
-            elif not scanning:
-                logger.info(
-                    "Could not find backtesting prediction file at %s", prediction_file_name
-                )
-            return file_exists
-        else:
-            return False
-
     # Following methods which are overridden by user made prediction models.
     # See freqai/prediction_models/CatboostPredictionModel.py for an example.
 
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index aa584bfbc..0e822a028 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -44,7 +44,7 @@ class FreqaiExampleStrategy(IStrategy):
     stoploss = -0.05
     use_exit_signal = True
     # this is the maximum period fed to talib (timeframe independent)
-    startup_candle_count: int = 20
+    startup_candle_count: int = 40
     can_short = False
 
     linear_roi_offset = DecimalParameter(
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 09f5d27ff..5441b3c24 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -174,6 +174,7 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
 
 def test_start_backtesting(mocker, freqai_conf):
     freqai_conf.update({"timerange": "20180120-20180130"})
+    freqai_conf.get("freqai", {}).update({"save_backtest_models": True})
     strategy = get_patched_freqai_strategy(mocker, freqai_conf)
     exchange = get_patched_exchange(mocker, freqai_conf)
     strategy.dp = DataProvider(freqai_conf, exchange)
@@ -200,6 +201,7 @@ def test_start_backtesting(mocker, freqai_conf):
 def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf):
     freqai_conf.update({"timerange": "20180120-20180124"})
     freqai_conf.get("freqai", {}).update({"backtest_period_days": 0.5})
+    freqai_conf.get("freqai", {}).update({"save_backtest_models": True})
     strategy = get_patched_freqai_strategy(mocker, freqai_conf)
     exchange = get_patched_exchange(mocker, freqai_conf)
     strategy.dp = DataProvider(freqai_conf, exchange)
@@ -224,6 +226,7 @@ def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf):
 
 def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
     freqai_conf.update({"timerange": "20180120-20180130"})
+    freqai_conf.get("freqai", {}).update({"save_backtest_models": True})
     strategy = get_patched_freqai_strategy(mocker, freqai_conf)
     exchange = get_patched_exchange(mocker, freqai_conf)
     strategy.dp = DataProvider(freqai_conf, exchange)
@@ -263,10 +266,14 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
     freqai.start_backtesting(df, metadata, freqai.dk)
 
     assert log_has_re(
-        "Found backtesting prediction ",
+        "Found backtesting prediction file ",
         caplog,
     )
 
+    path = (freqai.dd.full_path / freqai.dk.backtest_predictions_folder)
+    prediction_files = [x for x in path.iterdir() if x.is_file()]
+    assert len(prediction_files) == 5
+
     shutil.rmtree(Path(freqai.dk.full_path))