From 16b4a5b71ff140f5de31e5d5572f1f193457cf6b Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 3 Jun 2022 15:19:46 +0200
Subject: [PATCH 01/14] rehaul of backend data management - increasing
 performance by holding history in memory, reducing load on the ratelimit by
 only pinging exchange once per candle. Improve code readability.

---
 freqtrade/freqai/data_drawer.py               |  44 +++-
 freqtrade/freqai/data_kitchen.py              | 222 +++++++++++++++---
 freqtrade/freqai/freqai_interface.py          | 126 +++++++---
 .../CatboostPredictionModel.py                |  11 +
 freqtrade/templates/FreqaiExampleStrategy.py  |   9 +-
 5 files changed, 342 insertions(+), 70 deletions(-)

diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 77b595d56..4e52ac711 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -35,6 +35,8 @@ class FreqaiDataDrawer:
         self.model_dictionary: Dict[str, Any] = {}
         self.model_return_values: Dict[str, Any] = {}
         self.pair_data_dict: Dict[str, Any] = {}
+        self.historic_data: Dict[str, Any] = {}
+        # self.populated_historic_data: Dict[str, Any] = {} ?
         self.follower_dict: Dict[str, Any] = {}
         self.full_path = full_path
         self.follow_mode = follow_mode
@@ -45,6 +47,12 @@ class FreqaiDataDrawer:
         # self.create_training_queue(pair_whitelist)
 
     def load_drawer_from_disk(self):
+        """
+        Locate and load a previously saved data drawer full of all pair model metadata in
+        present model folder.
+        :returns:
+        exists: bool = whether or not the drawer was located
+        """
         exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists()
         if exists:
             with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
@@ -58,16 +66,25 @@ class FreqaiDataDrawer:
         return exists
 
     def save_drawer_to_disk(self):
+        """
+        Save data drawer full of all pair model metadata in present model folder.
+        """
         with open(self.full_path / str('pair_dictionary.json'), "w") as fp:
             json.dump(self.pair_dict, fp, default=self.np_encoder)
 
-    def save_follower_dict_to_dist(self):
+    def save_follower_dict_to_disk(self):
+        """
+        Save follower dictionary to disk (used by strategy for persistent prediction targets)
+        """
         follower_name = self.config.get('bot_name', 'follower1')
         with open(self.full_path / str('follower_dictionary-' +
                                        follower_name + '.json'), "w") as fp:
             json.dump(self.follower_dict, fp, default=self.np_encoder)
 
     def create_follower_dict(self):
+        """
+        Create or dictionary for each follower to maintain unique persistent prediction targets
+        """
         follower_name = self.config.get('bot_name', 'follower1')
         whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist')
 
@@ -89,6 +106,18 @@ class FreqaiDataDrawer:
             return object.item()
 
     def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
+        """
+        Locate and load existing model metadata from persistent storage. If not located,
+        create a new one and append the current pair to it and prepare it for its first
+        training
+        :params:
+        metadata: dict = strategy furnished pair metadata
+        :returns:
+        model_filename: str = unique filename used for loading persistent objects from disk
+        trained_timestamp: int = the last time the coin was trained
+        coin_first: bool = If the coin is fresh without metadata
+        return_null_array: bool = Follower could not find pair metadata
+        """
         pair_in_dict = self.pair_dict.get(metadata['pair'])
         data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None)
         return_null_array = False
@@ -137,6 +166,7 @@ class FreqaiDataDrawer:
         self.model_return_values[pair]['do_preds'] = dh.full_do_predict
         self.model_return_values[pair]['target_mean'] = dh.full_target_mean
         self.model_return_values[pair]['target_std'] = dh.full_target_std
+        self.model_return_values[pair]['DI_values'] = dh.full_DI_values
 
         # if not self.follow_mode:
         #     self.save_model_return_values_to_disk()
@@ -157,6 +187,8 @@ class FreqaiDataDrawer:
 
         self.model_return_values[pair]['predictions'] = np.append(
             self.model_return_values[pair]['predictions'][i:], predictions[-1])
+        self.model_return_values[pair]['DI_values'] = np.append(
+            self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
         self.model_return_values[pair]['do_preds'] = np.append(
             self.model_return_values[pair]['do_preds'][i:], do_preds[-1])
         self.model_return_values[pair]['target_mean'] = np.append(
@@ -168,6 +200,8 @@ class FreqaiDataDrawer:
             prepend = np.zeros(abs(length_difference) - 1)
             self.model_return_values[pair]['predictions'] = np.insert(
                 self.model_return_values[pair]['predictions'], 0, prepend)
+            self.model_return_values[pair]['DI_values'] = np.insert(
+                self.model_return_values[pair]['DI_values'], 0, prepend)
             self.model_return_values[pair]['do_preds'] = np.insert(
                 self.model_return_values[pair]['do_preds'], 0, prepend)
             self.model_return_values[pair]['target_mean'] = np.insert(
@@ -179,6 +213,7 @@ class FreqaiDataDrawer:
         dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
         dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
         dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
+        dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])
 
         # if not self.follow_mode:
         #     self.save_model_return_values_to_disk()
@@ -190,6 +225,7 @@ class FreqaiDataDrawer:
         dh.full_do_predict = np.zeros(len_df)
         dh.full_target_mean = np.zeros(len_df)
         dh.full_target_std = np.zeros(len_df)
+        dh.full_DI_values = np.zeros(len_df)
 
     def purge_old_models(self) -> None:
 
@@ -227,6 +263,12 @@ class FreqaiDataDrawer:
                     shutil.rmtree(v)
                     deleted += 1
 
+    def update_follower_metadata(self):
+        # follower needs to load from disk to get any changes made by leader to pair_dict
+        self.load_drawer_from_disk()
+        if self.config.get('freqai', {})('purge_old_models', False):
+            self.purge_old_models()
+
     # to be used if we want to send predictions directly to the follower instead of forcing
     # follower to load models and inference
     # def save_model_return_values_to_disk(self) -> None:
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index dceb721c5..4e2fb6cc9 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -25,9 +25,6 @@ from freqtrade.resolvers import ExchangeResolver
 from freqtrade.strategy.interface import IStrategy
 
 
-# import scipy as spy  # used for auto distribution assignment
-
-
 SECONDS_IN_DAY = 86400
 
 logger = logging.getLogger(__name__)
@@ -52,6 +49,7 @@ class FreqaiDataKitchen:
         self.target_std: npt.ArrayLike = np.array([])
         self.full_predictions: npt.ArrayLike = np.array([])
         self.full_do_predict: npt.ArrayLike = np.array([])
+        self.full_DI_values: npt.ArrayLike = np.array([])
         self.full_target_mean: npt.ArrayLike = np.array([])
         self.full_target_std: npt.ArrayLike = np.array([])
         self.data_path = Path()
@@ -59,6 +57,7 @@ class FreqaiDataKitchen:
         self.live = live
         self.pair = pair
         self.svm_model: linear_model.SGDOneClassSVM = None
+        self.set_all_pairs()
         if not self.live:
             self.full_timerange = self.create_fulltimerange(self.config["timerange"],
                                                             self.freqai_config.get("train_period")
@@ -73,6 +72,12 @@ class FreqaiDataKitchen:
         self.data_drawer = data_drawer
 
     def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None:
+        """
+        Set the paths to the data for the present coin/botloop
+        :params:
+        metadata: dict = strategy furnished pair metadata
+        trained_timestamp: int = timestamp of most recent training
+        """
         self.full_path = Path(self.config['user_data_dir'] /
                               "models" /
                               str(self.freqai_config.get('identifier')))
@@ -514,6 +519,11 @@ class FreqaiDataKitchen:
         return None
 
     def pca_transform(self, filtered_dataframe: DataFrame) -> None:
+        """
+        Use an existing pca transform to transform data into components
+        :params:
+        filtered_dataframe: DataFrame = the cleaned dataframe
+        """
         pca_components = self.pca.transform(filtered_dataframe)
         self.data_dictionary["prediction_features"] = pd.DataFrame(
             data=pca_components,
@@ -522,6 +532,11 @@ class FreqaiDataKitchen:
         )
 
     def compute_distances(self) -> float:
+        """
+        Compute distances between each training point and every other training
+        point. This metric defines the neighborhood of trained data and is used
+        for prediction confidence in the Dissimilarity Index
+        """
         logger.info("computing average mean distance for all training points")
         pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
         avg_mean_dist = pairwise.mean(axis=1).mean()
@@ -530,6 +545,12 @@ class FreqaiDataKitchen:
         return avg_mean_dist
 
     def use_SVM_to_remove_outliers(self, predict: bool) -> None:
+        """
+        Build/inference a Support Vector Machine to detect outliers
+        in training data and prediction
+        :params:
+        predict: bool = If true, inference an existing SVM model, else construct one
+        """
 
         if predict:
             assert self.svm_model, "No svm model available for outlier removal"
@@ -580,6 +601,13 @@ class FreqaiDataKitchen:
         return
 
     def find_features(self, dataframe: DataFrame) -> list:
+        """
+        Find features in the strategy provided dataframe
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        :returns:
+        features: list = the features to be used for training/prediction
+        """
         column_names = dataframe.columns
         features = [c for c in column_names if '%' in c]
         if not features:
@@ -600,17 +628,19 @@ class FreqaiDataKitchen:
             n_jobs=-1,
         )
 
+        self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
+
         do_predict = np.where(
-            distance.min(axis=0) / self.data["avg_mean_dist"]
+            self.DI_values
             < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
             1,
             0,
         )
 
-        # logger.info(
-        #     "Distance checker tossed %s predictions for being too far from training data",
-        #     len(do_predict) - do_predict.sum(),
-        # )
+        logger.info(
+            "DI tossed %s predictions for being too far from training data",
+            len(do_predict) - do_predict.sum(),
+        )
 
         self.do_predict += do_predict
         self.do_predict -= 1
@@ -638,6 +668,7 @@ class FreqaiDataKitchen:
 
         self.full_predictions = np.append(self.full_predictions, predictions)
         self.full_do_predict = np.append(self.full_do_predict, do_predict)
+        self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
         self.full_target_mean = np.append(self.full_target_mean, target_mean)
         self.full_target_std = np.append(self.full_target_std, target_std)
 
@@ -652,6 +683,7 @@ class FreqaiDataKitchen:
         filler = np.zeros(len_dataframe - len(self.full_predictions))  # startup_candle_count
         self.full_predictions = np.append(filler, self.full_predictions)
         self.full_do_predict = np.append(filler, self.full_do_predict)
+        self.full_DI_values = np.append(filler, self.full_DI_values)
         self.full_target_mean = np.append(filler, self.full_target_mean)
         self.full_target_std = np.append(filler, self.full_target_std)
 
@@ -711,6 +743,8 @@ class FreqaiDataKitchen:
                 logger.warning('FreqAI could not detect max timeframe and therefore may not '
                                'download the proper amount of data for training')
 
+        # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days')
+
         if trained_timestamp != 0:
             elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
             retrain = elapsed_time > self.freqai_config.get('backtest_period')
@@ -764,61 +798,176 @@ class FreqaiDataKitchen:
         # enables persistence, but not fully implemented into save/load data yer
         # self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
 
-    def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
-                                         strategy: IStrategy) -> None:
+    # SUPERCEDED
+    # def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
+    #                                      strategy: IStrategy) -> None:
 
+    #     exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
+    #                                               self.config, validate=False, freqai=True)
+    #     # exchange = strategy.dp._exchange # closes ccxt session
+    #     pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
+    #     if str(metadata['pair']) not in pairs:
+    #         pairs.append(str(metadata['pair']))
+
+    #     refresh_backtest_ohlcv_data(
+    #                     exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
+    #                     datadir=self.config['datadir'], timerange=timerange,
+    #                     new_pairs_days=self.config['new_pairs_days'],
+    #                     erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
+    #                     trading_mode=self.config.get('trading_mode', 'spot'),
+    #                     prepend=self.config.get('prepend_data', False)
+    #                 )
+
+    def download_all_data_for_training(self, timerange: TimeRange) -> None:
+        """
+        Called only once upon start of bot to download the necessary data for
+        populating indicators and training the model.
+        :params:
+        timerange: TimeRange = The full data timerange for populating the indicators
+        and training the model.
+        """
         exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
                                                   self.config, validate=False, freqai=True)
-        # exchange = strategy.dp._exchange # closes ccxt session
-        pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
-        if str(metadata['pair']) not in pairs:
-            pairs.append(str(metadata['pair']))
+
+        new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
 
         refresh_backtest_ohlcv_data(
-                        exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
+                        exchange, pairs=self.all_pairs,
+                        timeframes=self.freqai_config.get('timeframes'),
                         datadir=self.config['datadir'], timerange=timerange,
-                        new_pairs_days=self.config['new_pairs_days'],
+                        new_pairs_days=new_pairs_days,
                         erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
                         trading_mode=self.config.get('trading_mode', 'spot'),
                         prepend=self.config.get('prepend_data', False)
                     )
 
-    def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
-                                                                                  DataFrame]:
+    def update_historic_data(self, strategy: IStrategy) -> None:
+        """
+        Append new candles to our stores historic data (in memory) so that
+        we do not need to load candle history from disk and we dont need to
+        pinging exchange multiple times for the same candle.
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        """
+
+        history_data = self.data_drawer.historic_data
+
+        for pair in self.all_pairs:
+            for tf in self.freqai_config.get('timeframes'):
+                history_data[pair][tf] = pd.concat(
+                                            [history_data[pair][tf],
+                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]],
+                                            axis=0
+                                            )
+
+    def set_all_pairs(self) -> None:
+
+        self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
+        for pair in self.config.get('exchange', '').get('pair_whitelist'):
+            if pair not in self.all_pairs:
+                self.all_pairs.append(pair)
+
+    def load_all_pair_histories(self, timerange: TimeRange) -> None:
+        """
+        Load pair histories for all whitelist and corr_pairlist pairs.
+        Only called once upon startup of bot.
+        :params:
+        timerange: TimeRange = full timerange required to populate all indicators
+        for training according to user defined train_period
+        """
+        history_data = self.data_drawer.historic_data
+
+        for pair in self.all_pairs:
+            if pair not in history_data:
+                history_data[pair] = {}
+            for tf in self.freqai_config.get('timeframes'):
+                history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'],
+                                                           timeframe=tf,
+                                                           pair=pair, timerange=timerange,
+                                                           data_format=self.config.get(
+                                                            'dataformat_ohlcv', 'json'),
+                                                           candle_type=self.config.get(
+                                                            'trading_mode', 'spot'))
+
+    def get_base_and_corr_dataframes(self, timerange: TimeRange,
+                                     metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
+        """
+        Searches through our historic_data in memory and returns the dataframes relevant
+        to the present pair.
+        :params:
+        timerange: TimeRange = full timerange required to populate all indicators
+        for training according to user defined train_period
+        metadata: dict = strategy furnished pair metadata
+        """
         corr_dataframes: Dict[Any, Any] = {}
         base_dataframes: Dict[Any, Any] = {}
-        pairs = self.freqai_config.get('corr_pairlist', [])  # + [metadata['pair']]
-        # timerange = TimeRange.parse_timerange(new_timerange)
+        historic_data = self.data_drawer.historic_data
+        pairs = self.freqai_config.get('corr_pairlist', [])
 
         for tf in self.freqai_config.get('timeframes'):
-            base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
-                                                    timeframe=tf,
-                                                    pair=metadata['pair'], timerange=timerange,
-                                                    data_format=self.config.get(
-                                                    'dataformat_ohlcv', 'json'),
-                                                    candle_type=self.config.get(
-                                                    'trading_mode', 'spot'))
+            base_dataframes[tf] = self.slice_dataframe(
+                                                       timerange,
+                                                       historic_data[metadata['pair']][tf]
+                                                       )
             if pairs:
                 for p in pairs:
                     if metadata['pair'] in p:
                         continue  # dont repeat anything from whitelist
                     if p not in corr_dataframes:
                         corr_dataframes[p] = {}
-                    corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
-                                                               timeframe=tf,
-                                                               pair=p, timerange=timerange,
-                                                               data_format=self.config.get(
-                                                               'dataformat_ohlcv', 'json'),
-                                                               candle_type=self.config.get(
-                                                               'trading_mode', 'spot'))
+                    corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf])
 
         return corr_dataframes, base_dataframes
 
+    # SUPERCEDED
+    # def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
+    #                                                                               DataFrame]:
+    #     corr_dataframes: Dict[Any, Any] = {}
+    #     base_dataframes: Dict[Any, Any] = {}
+    #     pairs = self.freqai_config.get('corr_pairlist', [])  # + [metadata['pair']]
+    #     # timerange = TimeRange.parse_timerange(new_timerange)
+
+    #     for tf in self.freqai_config.get('timeframes'):
+    #         base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
+    #                                                 timeframe=tf,
+    #                                                 pair=metadata['pair'], timerange=timerange,
+    #                                                 data_format=self.config.get(
+    #                                                 'dataformat_ohlcv', 'json'),
+    #                                                 candle_type=self.config.get(
+    #                                                 'trading_mode', 'spot'))
+    #         if pairs:
+    #             for p in pairs:
+    #                 if metadata['pair'] in p:
+    #                     continue  # dont repeat anything from whitelist
+    #                 if p not in corr_dataframes:
+    #                     corr_dataframes[p] = {}
+    #                 corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
+    #                                                            timeframe=tf,
+    #                                                            pair=p, timerange=timerange,
+    #                                                            data_format=self.config.get(
+    #                                                            'dataformat_ohlcv', 'json'),
+    #                                                            candle_type=self.config.get(
+    #                                                            'trading_mode', 'spot'))
+
+    #     return corr_dataframes, base_dataframes
+
     def use_strategy_to_populate_indicators(self, strategy: IStrategy,
                                             corr_dataframes: dict,
                                             base_dataframes: dict,
                                             metadata: dict) -> DataFrame:
-
+        """
+        Use the user defined strategy for populating indicators during
+        retrain
+        :params:
+        strategy: IStrategy = user defined strategy object
+        corr_dataframes: dict = dict containing the informative pair dataframes
+        (for user defined timeframes)
+        base_dataframes: dict = dict containing the current pair dataframes
+        (for user defined timeframes)
+        metadata: dict = strategy furnished pair metadata
+        :returns:
+        dataframe: DataFrame = dataframe containing populated indicators
+        """
         dataframe = base_dataframes[self.config['timeframe']].copy()
         pairs = self.freqai_config.get("corr_pairlist", [])
 
@@ -847,6 +996,9 @@ class FreqaiDataKitchen:
         return dataframe
 
     def fit_labels(self) -> None:
+        """
+        Fit the labels with a gaussian distribution
+        """
         import scipy as spy
 
         f = spy.stats.norm.fit(self.data_dictionary["train_labels"])
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 9682ff818..04e819cc4 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -44,9 +44,9 @@ class IFreqaiModel(ABC):
         self.config = config
         self.assert_config(self.config)
         self.freqai_info = config["freqai"]
-        self.data_split_parameters = config["freqai"]["data_split_parameters"]
-        self.model_training_parameters = config["freqai"]["model_training_parameters"]
-        self.feature_parameters = config["freqai"]["feature_parameters"]
+        self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters")
+        self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
+        self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
         self.time_last_trained = None
         self.current_time = None
         self.model = None
@@ -54,6 +54,7 @@ class IFreqaiModel(ABC):
         self.training_on_separate_thread = False
         self.retrain = False
         self.first = True
+        self.update_historic_data = 0
         self.set_full_path()
         self.follow_mode = self.freqai_info.get('follow_mode', False)
         self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
@@ -95,15 +96,12 @@ class IFreqaiModel(ABC):
 
                 self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
                                             self.live, metadata["pair"])
-                dh = self.start_live(dataframe, metadata, strategy, self.dh)
+                dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True)
             else:
                 # we will have at max 2 separate instances of the kitchen at once.
                 self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer,
                                                self.live, metadata["pair"])
-                dh = self.start_live(dataframe, metadata, strategy, self.dh_fg)
-
-            # return (dh.full_predictions, dh.full_do_predict,
-            #         dh.full_target_mean, dh.full_target_std)
+                dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False)
 
         # For backtesting, each pair enters and then gets trained for each window along the
         # sliding window defined by "train_period" (training window) and "backtest_period"
@@ -115,8 +113,9 @@ class IFreqaiModel(ABC):
             logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
             dh = self.start_backtesting(dataframe, metadata, self.dh)
 
-        return (dh.full_predictions, dh.full_do_predict,
-                dh.full_target_mean, dh.full_target_std)
+        return self.return_values(dataframe, dh)
+        # return (dh.full_predictions, dh.full_do_predict,
+        #         dh.full_target_mean, dh.full_target_std)
 
     def start_backtesting(self, dataframe: DataFrame, metadata: dict,
                           dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
@@ -185,7 +184,8 @@ class IFreqaiModel(ABC):
         return dh
 
     def start_live(self, dataframe: DataFrame, metadata: dict,
-                   strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
+                   strategy: IStrategy, dh: FreqaiDataKitchen,
+                   trainable: bool) -> FreqaiDataKitchen:
         """
         The main broad execution for dry/live. This function will check if a retraining should be
         performed, and if so, retrain and reset the model.
@@ -198,25 +198,35 @@ class IFreqaiModel(ABC):
         dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
         """
 
+        # update follower
         if self.follow_mode:
-            # follower needs to load from disk to get any changes made by leader to pair_dict
-            self.data_drawer.load_drawer_from_disk()
-            if self.freqai_info.get('purge_old_models', False):
-                self.data_drawer.purge_old_models()
+            self.data_drawer.update_follower_metadata()
 
+        # get the model metadata associated with the current pair
         (model_filename,
          trained_timestamp,
          coin_first,
          return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
 
-        # if the files do not yet exist, the follower returns null arrays to strategy
+        # if the metadata doesnt exist, the follower returns null arrays to strategy
         if self.follow_mode and return_null_array:
             logger.info('Returning null array from follower to strategy')
             self.data_drawer.return_null_values_to_strategy(dataframe, dh)
             return dh
 
-        if (not self.training_on_separate_thread and not self.follow_mode
-                and self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1) or coin_first:
+        # append the historic data once per round
+        if (self.data_drawer.historic_data and
+                self.update_historic_data >= len(self.config.get('exchange', '')
+                                                 .get('pair_whitelist'))):
+            dh.update_historic_data(strategy)
+            self.update_historic_data = 1
+        else:
+            self.update_historic_data += 1
+
+        # if trainable, check if model needs training, if so compute new timerange,
+        # then save model and metadata.
+        # if not trainable, load existing data
+        if (trainable and not self.follow_mode) or coin_first:
             file_exists = False
 
             if trained_timestamp != 0:  # historical model available
@@ -231,6 +241,15 @@ class IFreqaiModel(ABC):
              data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
             dh.set_paths(metadata, new_trained_timerange.stopts)
 
+            # download candle history if it is not already in memory
+            if not self.data_drawer.historic_data:
+                logger.info('Downloading all training data for all pairs in whitelist and '
+                            'corr_pairlist, this may take a while if you do not have the '
+                            'data saved')
+                dh.download_all_data_for_training(data_load_timerange)
+                dh.load_all_pair_histories(data_load_timerange)
+
+            # train the model on the trained timerange
             if self.retrain or not file_exists:
                 if coin_first:
                     self.train_model_in_series(new_trained_timerange, metadata,
@@ -241,17 +260,24 @@ class IFreqaiModel(ABC):
                                                           metadata, strategy,
                                                           dh, data_load_timerange)
 
-        elif self.training_on_separate_thread and not self.follow_mode:
-            logger.info("FreqAI training a new model on background thread.")
+        elif not trainable and not self.follow_mode:
+            logger.info(f'{metadata["pair"]} holds spot '
+                        f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
+                        'in training queue')
         elif self.follow_mode:
             dh.set_paths(metadata, trained_timestamp)
             logger.info('FreqAI instance set to follow_mode, finding existing pair'
                         f'using { self.identifier }')
 
+        # load the model and associated data into the data kitchen
         self.model = dh.load_data(coin=metadata['pair'])
 
+        # ensure user is feeding the correct indicators to the model
         self.check_if_feature_list_matches_strategy(dataframe, dh)
 
+        # hold the historical predictions in memory so we are sending back
+        # correct array to strategy FIXME currently broken, but only affecting
+        # Frequi reporting. Signals remain unaffeted.
         if metadata['pair'] not in self.data_drawer.model_return_values:
             preds, do_preds = self.predict(dataframe, dh)
             dh.append_predictions(preds, do_preds, len(dataframe))
@@ -268,6 +294,13 @@ class IFreqaiModel(ABC):
 
     def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
                                                dh: FreqaiDataKitchen) -> None:
+        """
+        Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
+        to a folder holding existing models.
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        dh: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
+        """
         strategy_provided_features = dh.find_features(dataframe)
         if 'training_features_list_raw' in dh.data:
             feature_list = dh.data['training_features_list_raw']
@@ -356,11 +389,24 @@ class IFreqaiModel(ABC):
     def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
                                          strategy: IStrategy, dh: FreqaiDataKitchen,
                                          data_load_timerange: TimeRange):
+        """
+        Retreive data and train model on separate thread. Always called if the model folder already
+        contains a full set of trained models.
+        :params:
+        new_trained_timerange: TimeRange = the timerange to train the model on
+        metadata: dict = strategy provided metadata
+        strategy: IStrategy = user defined strategy object
+        dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
+        data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
+        (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
+        """
 
         # with nostdout():
-        dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
-        corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
-                                                                   metadata)
+        # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
+        # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
+        #                                                           metadata)
+        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
+                                                                           metadata)
 
         # protecting from common benign errors associated with grabbing new data from exchange:
         try:
@@ -408,10 +454,22 @@ class IFreqaiModel(ABC):
     def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
                               strategy: IStrategy, dh: FreqaiDataKitchen,
                               data_load_timerange: TimeRange):
-
-        dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
-        corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
-                                                                   metadata)
+        """
+        Retreive data and train model in single threaded mode (only used if model directory is empty
+        upon startup for dry/live )
+        :params:
+        new_trained_timerange: TimeRange = the timerange to train the model on
+        metadata: dict = strategy provided metadata
+        strategy: IStrategy = user defined strategy object
+        dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
+        data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
+        (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
+        """
+        # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
+        # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
+        #                                                          metadata)
+        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
+                                                                           metadata)
 
         unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
                                                                       corr_dataframes,
@@ -481,3 +539,17 @@ class IFreqaiModel(ABC):
         """
 
         return
+
+    @abstractmethod
+    def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
+        """
+        User defines the dataframe to be returned to strategy here.
+        :params:
+        dataframe: DataFrame = the full dataframe for the current prediction (live)
+        or --timerange (backtesting)
+        dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        :returns:
+        dataframe: DataFrame = dataframe filled with user defined data
+        """
+
+        return
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
index 5147faf0c..9a5059bcf 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -18,6 +18,17 @@ class CatboostPredictionModel(IFreqaiModel):
     has its own DataHandler where data is held, saved, loaded, and managed.
     """
 
+    def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
+
+        dataframe["prediction"] = dh.full_predictions
+        dataframe["do_predict"] = dh.full_do_predict
+        dataframe["target_mean"] = dh.full_target_mean
+        dataframe["target_std"] = dh.full_target_std
+        if self.freqai_info('feature_parameters', {}).get('DI-threshold', 0) > 0:
+            dataframe["DI"] = dh.full_DI_values
+
+        return dataframe
+
     def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
         """
         User defines the labels here (target values).
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index d9dc38f0d..cf04bfa6e 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -45,7 +45,7 @@ class FreqaiExampleStrategy(IStrategy):
 
     process_only_new_candles = False
     stoploss = -0.05
-    use_sell_signal = True
+    use_exit_signal = True
     startup_candle_count: int = 300
     can_short = False
 
@@ -176,12 +176,7 @@ class FreqaiExampleStrategy(IStrategy):
         # the model will return 4 values, its prediction, an indication of whether or not the
         # prediction should be accepted, the target mean/std values from the labels used during
         # each training period.
-        (
-            dataframe["prediction"],
-            dataframe["do_predict"],
-            dataframe["target_mean"],
-            dataframe["target_std"],
-        ) = self.model.bridge.start(dataframe, metadata, self)
+        dataframe = self.model.bridge.start(dataframe, metadata, self)
 
         dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
         dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]

From f2762e3b4bccd03f2e0afe79fa40cf795db5b8c1 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 3 Jun 2022 16:58:51 +0200
Subject: [PATCH 02/14] fix bug in return_values()

---
 freqtrade/freqai/prediction_models/CatboostPredictionModel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
index 9a5059bcf..3cea0eb9b 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -24,7 +24,7 @@ class CatboostPredictionModel(IFreqaiModel):
         dataframe["do_predict"] = dh.full_do_predict
         dataframe["target_mean"] = dh.full_target_mean
         dataframe["target_std"] = dh.full_target_std
-        if self.freqai_info('feature_parameters', {}).get('DI-threshold', 0) > 0:
+        if self.freqai_info.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
             dataframe["DI"] = dh.full_DI_values
 
         return dataframe

From e8c0dcf9f352620fd1cf2f7e7d7265e5eb1b6713 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 3 Jun 2022 17:14:07 +0200
Subject: [PATCH 03/14] add debug message to timerange

---
 freqtrade/freqai/data_kitchen.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 4e2fb6cc9..5c26b0598 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -743,7 +743,7 @@ class FreqaiDataKitchen:
                 logger.warning('FreqAI could not detect max timeframe and therefore may not '
                                'download the proper amount of data for training')
 
-        # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days')
+        logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days')
 
         if trained_timestamp != 0:
             elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
@@ -770,6 +770,13 @@ class FreqaiDataKitchen:
             data_load_timerange.stopts = int(time)
             retrain = True
 
+        logger.info(f'Total data download needed '
+                    f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY} '
+                    ' days')
+        logger.info(f'Total training timerange '
+                    f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} '
+                    ' days')
+
         # if retrain:
         #     coin, _ = metadata['pair'].split("/")
         #     # set the new data_path

From d6b8801f413be4f1c405314473a9d5efda3b959b Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sun, 5 Jun 2022 04:40:58 +0200
Subject: [PATCH 04/14] fix follower bug

---
 freqtrade/freqai/data_kitchen.py     | 28 ++++++++++++++--------------
 freqtrade/freqai/freqai_interface.py |  6 +++---
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 5c26b0598..4f68326fc 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -638,8 +638,8 @@ class FreqaiDataKitchen:
         )
 
         logger.info(
-            "DI tossed %s predictions for being too far from training data",
-            len(do_predict) - do_predict.sum(),
+            f'DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for '
+            'being too far from training data'
         )
 
         self.do_predict += do_predict
@@ -743,7 +743,7 @@ class FreqaiDataKitchen:
                 logger.warning('FreqAI could not detect max timeframe and therefore may not '
                                'download the proper amount of data for training')
 
-        logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days')
+        logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
 
         if trained_timestamp != 0:
             elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
@@ -770,12 +770,13 @@ class FreqaiDataKitchen:
             data_load_timerange.stopts = int(time)
             retrain = True
 
-        logger.info(f'Total data download needed '
-                    f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY} '
-                    ' days')
-        logger.info(f'Total training timerange '
-                    f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} '
-                    ' days')
+        # logger.info(
+        #     f'Total data download needed '
+        #     f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY:.2f}'
+        #     ' days')
+        # logger.info(f'Total training timerange '
+        #             f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} '
+        #             ' days')
 
         # if retrain:
         #     coin, _ = metadata['pair'].split("/")
@@ -861,11 +862,10 @@ class FreqaiDataKitchen:
 
         for pair in self.all_pairs:
             for tf in self.freqai_config.get('timeframes'):
-                history_data[pair][tf] = pd.concat(
-                                            [history_data[pair][tf],
-                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]],
-                                            axis=0
-                                            )
+                lh = len(history_data[pair][tf].index)
+                history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]
+
+        logger.info(f'Length of history data {len(history_data[pair][tf])}')
 
     def set_all_pairs(self) -> None:
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 04e819cc4..e192f9b0a 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -226,7 +226,7 @@ class IFreqaiModel(ABC):
         # if trainable, check if model needs training, if so compute new timerange,
         # then save model and metadata.
         # if not trainable, load existing data
-        if (trainable and not self.follow_mode) or coin_first:
+        if (trainable or coin_first) and not self.follow_mode:
             file_exists = False
 
             if trained_timestamp != 0:  # historical model available
@@ -416,8 +416,8 @@ class IFreqaiModel(ABC):
                                                                           metadata)
             unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
 
-        except Exception:
-            logger.warning('Mismatched sizes encountered in strategy')
+        except Exception as err:
+            logger.exception(err)
             # self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
             self.training_on_separate_thread = False
             self.retrain = False

From 2451ed8c88eb079b9293af9b802caabd8167610d Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Mon, 6 Jun 2022 15:11:54 -0600
Subject: [PATCH 05/14] Quick bug fix

---
 freqtrade/freqai/data_drawer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 4e52ac711..740998caf 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -266,7 +266,7 @@ class FreqaiDataDrawer:
     def update_follower_metadata(self):
         # follower needs to load from disk to get any changes made by leader to pair_dict
         self.load_drawer_from_disk()
-        if self.config.get('freqai', {})('purge_old_models', False):
+        if self.config.get('freqai', {}).get('purge_old_models', False):
             self.purge_old_models()
 
     # to be used if we want to send predictions directly to the follower instead of forcing

From bf19055e53332d19ac2dbacc0de171daa609b9a5 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Mon, 6 Jun 2022 15:56:12 -0600
Subject: [PATCH 06/14] Update function spelling

---
 freqtrade/templates/FreqaiExampleStrategy.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index cf04bfa6e..21dd4a39b 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -251,7 +251,7 @@ class FreqaiExampleStrategy(IStrategy):
                 if not follow_mode:
                     self.model.bridge.data_drawer.save_drawer_to_disk()
                 else:
-                    self.model.bridge.data_drawer.save_follower_dict_to_dist()
+                    self.model.bridge.data_drawer.save_follower_dict_to_disk()
         else:
             if pair_dict[pair]['prediction' + entry_tag] > 0:
                 roi_price = abs(trade_candle['prediction'])
@@ -261,7 +261,7 @@ class FreqaiExampleStrategy(IStrategy):
                     if not follow_mode:
                         self.model.bridge.data_drawer.save_drawer_to_disk()
                     else:
-                        self.model.bridge.data_drawer.save_follower_dict_to_dist()
+                        self.model.bridge.data_drawer.save_follower_dict_to_disk()
 
         roi_price = abs(trade_candle['prediction'])
         roi_time = self.max_roi_time_long.value
@@ -295,7 +295,7 @@ class FreqaiExampleStrategy(IStrategy):
             if not follow_mode:
                 self.model.bridge.data_drawer.save_drawer_to_disk()
             else:
-                self.model.bridge.data_drawer.save_follower_dict_to_dist()
+                self.model.bridge.data_drawer.save_follower_dict_to_disk()
 
         return True
 

From e6c5e737a263ced74c4889a3ed5e23ab90214c0b Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Mon, 6 Jun 2022 16:24:32 -0600
Subject: [PATCH 07/14] Fix other bugs

---
 freqtrade/templates/FreqaiExampleStrategy.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index 21dd4a39b..4775b1554 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -28,7 +28,7 @@ class FreqaiExampleStrategy(IStrategy):
     canonical freqtrade configuration file under config['freqai'].
     """
 
-    minimal_roi = {"0": 0.01, "240": -1}
+    minimal_roi = {"0": 0.1, "240": -1}
 
     plot_config = {
         "main_plot": {},
@@ -43,7 +43,7 @@ class FreqaiExampleStrategy(IStrategy):
         },
     }
 
-    process_only_new_candles = False
+    process_only_new_candles = True
     stoploss = -0.05
     use_exit_signal = True
     startup_candle_count: int = 300
@@ -245,7 +245,7 @@ class FreqaiExampleStrategy(IStrategy):
 
         entry_tag = trade.enter_tag
 
-        if 'prediction' + entry_tag not in pair_dict[pair]:
+        if 'prediction' + entry_tag not in pair_dict[pair] or pair_dict[pair]['prediction' + entry_tag] > 0::
             with self.model.bridge.lock:
                 pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction'])
                 if not follow_mode:
@@ -253,15 +253,8 @@ class FreqaiExampleStrategy(IStrategy):
                 else:
                     self.model.bridge.data_drawer.save_follower_dict_to_disk()
         else:
-            if pair_dict[pair]['prediction' + entry_tag] > 0:
-                roi_price = abs(trade_candle['prediction'])
-            else:
-                with self.model.bridge.lock:
-                    pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction'])
-                    if not follow_mode:
-                        self.model.bridge.data_drawer.save_drawer_to_disk()
-                    else:
-                        self.model.bridge.data_drawer.save_follower_dict_to_disk()
+            roi_price = abs(trade_candle['prediction'])
+
 
         roi_price = abs(trade_candle['prediction'])
         roi_time = self.max_roi_time_long.value

From 3c2e314ee54a8ed00d80fc69ce3232f5a4b25404 Mon Sep 17 00:00:00 2001
From: Robert Caulk <rob.caulk@gmail.com>
Date: Mon, 6 Jun 2022 16:26:07 -0600
Subject: [PATCH 08/14] Fix bugs

---
 freqtrade/templates/FreqaiExampleStrategy.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index 4775b1554..1aaf63dcf 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -252,11 +252,8 @@ class FreqaiExampleStrategy(IStrategy):
                     self.model.bridge.data_drawer.save_drawer_to_disk()
                 else:
                     self.model.bridge.data_drawer.save_follower_dict_to_disk()
-        else:
-            roi_price = abs(trade_candle['prediction'])
 
-
-        roi_price = abs(trade_candle['prediction'])
+        roi_price = pair_dict[pair]['prediction' + entry_tag]
         roi_time = self.max_roi_time_long.value
 
         roi_decay = roi_price * (1 - ((current_time - trade.open_date_utc).seconds) /

From 4b26b6aaec9f4b397c42bfbba44186cbe545eaf0 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 00:54:18 +0200
Subject: [PATCH 09/14] add lock to any historic data access

---
 freqtrade/freqai/data_kitchen.py     | 14 ++++++++------
 freqtrade/freqai/freqai_interface.py |  5 +++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 4f68326fc..32ea0e6ac 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -858,14 +858,16 @@ class FreqaiDataKitchen:
         dataframe: DataFrame = strategy provided dataframe
         """
 
-        history_data = self.data_drawer.historic_data
+        with self.data_drawer.history_lock:
+            history_data = self.data_drawer.historic_data
 
-        for pair in self.all_pairs:
-            for tf in self.freqai_config.get('timeframes'):
-                lh = len(history_data[pair][tf].index)
-                history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]
+            for pair in self.all_pairs:
+                for tf in self.freqai_config.get('timeframes'):
+                    lh = len(history_data[pair][tf].index)
+                    history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair,
+                                                                                    tf).iloc[-1]
 
-        logger.info(f'Length of history data {len(history_data[pair][tf])}')
+            logger.info(f'Length of history data {len(history_data[pair][tf])}')
 
     def set_all_pairs(self) -> None:
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index e192f9b0a..e74f2b92d 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -405,8 +405,9 @@ class IFreqaiModel(ABC):
         # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
         # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
         #                                                           metadata)
-        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
-                                                                           metadata)
+        with self.data_drawer.history_lock:
+            corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
+                                                                               metadata)
 
         # protecting from common benign errors associated with grabbing new data from exchange:
         try:

From cab8f517b4137aa8914c18771d44e6b022c47d88 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 01:07:30 +0200
Subject: [PATCH 10/14] add lock to datadrawer

---
 freqtrade/freqai/data_drawer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
index 740998caf..0fb399b58 100644
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -5,6 +5,7 @@ import json
 import logging
 import re
 import shutil
+import threading
 from pathlib import Path
 from typing import Any, Dict, Tuple
 
@@ -44,6 +45,7 @@ class FreqaiDataDrawer:
             self.create_follower_dict()
         self.load_drawer_from_disk()
         self.training_queue: Dict[str, int] = {}
+        self.history_lock = threading.Lock()
         # self.create_training_queue(pair_whitelist)
 
     def load_drawer_from_disk(self):

From 15d049cffe8e1b5462978424bc7501d714cdfb8e Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 19:49:20 +0200
Subject: [PATCH 11/14] detect if upper tf candles are new or not, append if
 so. Correct the epoch for candle update check

---
 freqtrade/freqai/data_kitchen.py              | 63 +++++++++++--------
 freqtrade/freqai/freqai_interface.py          | 15 ++---
 .../CatboostPredictionModel.py                |  2 +-
 3 files changed, 44 insertions(+), 36 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 32ea0e6ac..fafeda49d 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -297,7 +297,7 @@ class FreqaiDataKitchen:
             )
             if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
                 logger.warning(
-                    f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100} percent'
+                    f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent'
                     ' of training data dropped due to NaNs, model may perform inconsistent'
                     'with expectations'
                 )
@@ -538,9 +538,10 @@ class FreqaiDataKitchen:
         for prediction confidence in the Dissimilarity Index
         """
         logger.info("computing average mean distance for all training points")
-        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
+        tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1)
+        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
         avg_mean_dist = pairwise.mean(axis=1).mean()
-        logger.info("avg_mean_dist %s", avg_mean_dist)
+        logger.info(f'avg_mean_dist {avg_mean_dist:.2f}')
 
         return avg_mean_dist
 
@@ -668,7 +669,8 @@ class FreqaiDataKitchen:
 
         self.full_predictions = np.append(self.full_predictions, predictions)
         self.full_do_predict = np.append(self.full_do_predict, do_predict)
-        self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
+        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+            self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
         self.full_target_mean = np.append(self.full_target_mean, target_mean)
         self.full_target_std = np.append(self.full_target_std, target_std)
 
@@ -683,7 +685,8 @@ class FreqaiDataKitchen:
         filler = np.zeros(len_dataframe - len(self.full_predictions))  # startup_candle_count
         self.full_predictions = np.append(filler, self.full_predictions)
         self.full_do_predict = np.append(filler, self.full_do_predict)
-        self.full_DI_values = np.append(filler, self.full_DI_values)
+        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+            self.full_DI_values = np.append(filler, self.full_DI_values)
         self.full_target_mean = np.append(filler, self.full_target_mean)
         self.full_target_std = np.append(filler, self.full_target_std)
 
@@ -728,7 +731,7 @@ class FreqaiDataKitchen:
         # find the max indicator length required
         max_timeframe_chars = self.freqai_config.get('timeframes')[-1]
         max_period = self.freqai_config.get('feature_parameters', {}).get(
-                                            'indicator_max_period', 20)
+                                            'indicator_max_period', 50)
         additional_seconds = 0
         if max_timeframe_chars[-1] == 'd':
             additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
@@ -863,9 +866,17 @@ class FreqaiDataKitchen:
 
             for pair in self.all_pairs:
                 for tf in self.freqai_config.get('timeframes'):
-                    lh = len(history_data[pair][tf].index)
-                    history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair,
-                                                                                    tf).iloc[-1]
+                    # check if newest candle is already appended
+                    if (
+                         str(history_data[pair][tf].iloc[-1]['date']) ==
+                         str(strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]['date'].iloc[-1])
+                         ):
+                        continue
+                    history_data[pair][tf] = pd.concat(
+                                            [history_data[pair][tf],
+                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]],
+                                            ignore_index=True, axis=0
+                                                )
 
             logger.info(f'Length of history data {len(history_data[pair][tf])}')
 
@@ -908,23 +919,25 @@ class FreqaiDataKitchen:
         for training according to user defined train_period
         metadata: dict = strategy furnished pair metadata
         """
-        corr_dataframes: Dict[Any, Any] = {}
-        base_dataframes: Dict[Any, Any] = {}
-        historic_data = self.data_drawer.historic_data
-        pairs = self.freqai_config.get('corr_pairlist', [])
+        with self.data_drawer.history_lock:
+            corr_dataframes: Dict[Any, Any] = {}
+            base_dataframes: Dict[Any, Any] = {}
+            historic_data = self.data_drawer.historic_data
+            pairs = self.freqai_config.get('corr_pairlist', [])
 
-        for tf in self.freqai_config.get('timeframes'):
-            base_dataframes[tf] = self.slice_dataframe(
-                                                       timerange,
-                                                       historic_data[metadata['pair']][tf]
-                                                       )
-            if pairs:
-                for p in pairs:
-                    if metadata['pair'] in p:
-                        continue  # dont repeat anything from whitelist
-                    if p not in corr_dataframes:
-                        corr_dataframes[p] = {}
-                    corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf])
+            for tf in self.freqai_config.get('timeframes'):
+                base_dataframes[tf] = self.slice_dataframe(
+                                                        timerange,
+                                                        historic_data[metadata['pair']][tf]
+                                                        )
+                if pairs:
+                    for p in pairs:
+                        if metadata['pair'] in p:
+                            continue  # dont repeat anything from whitelist
+                        if p not in corr_dataframes:
+                            corr_dataframes[p] = {}
+                        corr_dataframes[p][tf] = self.slice_dataframe(timerange,
+                                                                      historic_data[p][tf])
 
         return corr_dataframes, base_dataframes
 
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index e74f2b92d..86f873f3a 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -216,12 +216,9 @@ class IFreqaiModel(ABC):
 
         # append the historic data once per round
         if (self.data_drawer.historic_data and
-                self.update_historic_data >= len(self.config.get('exchange', '')
-                                                 .get('pair_whitelist'))):
+                self.config.get('exchange', '').get('pair_whitelist').index(metadata['pair']) == 1):
             dh.update_historic_data(strategy)
-            self.update_historic_data = 1
-        else:
-            self.update_historic_data += 1
+            logger.info(f'Updating historic data on pair {metadata["pair"]}')
 
         # if trainable, check if model needs training, if so compute new timerange,
         # then save model and metadata.
@@ -405,9 +402,9 @@ class IFreqaiModel(ABC):
         # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
         # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
         #                                                           metadata)
-        with self.data_drawer.history_lock:
-            corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
-                                                                               metadata)
+
+        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
+                                                                           metadata)
 
         # protecting from common benign errors associated with grabbing new data from exchange:
         try:
@@ -419,7 +416,6 @@ class IFreqaiModel(ABC):
 
         except Exception as err:
             logger.exception(err)
-            # self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
             self.training_on_separate_thread = False
             self.retrain = False
             return
@@ -428,7 +424,6 @@ class IFreqaiModel(ABC):
             model = self.train(unfiltered_dataframe, metadata, dh)
         except ValueError:
             logger.warning('Value error encountered during training')
-            # self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
             self.training_on_separate_thread = False
             self.retrain = False
             return
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
index 3cea0eb9b..519109213 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -59,7 +59,7 @@ class CatboostPredictionModel(IFreqaiModel):
         :model: Trained model which can be used to inference (self.predict)
         """
 
-        logger.info('--------------------Starting training'
+        logger.info('--------------------Starting training '
                     f'{metadata["pair"]} --------------------')
 
         # create the full feature list based on user config info

From f8f25e36efd63e42041bceb7ec3ddcb2b1ab1c4f Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 19:54:45 +0200
Subject: [PATCH 12/14] update example config/strat

---
 config_examples/config_freqai_futures.example.json | 10 +++++-----
 freqtrade/templates/FreqaiExampleStrategy.py       |  9 +++++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/config_examples/config_freqai_futures.example.json b/config_examples/config_freqai_futures.example.json
index 30eb6fc3e..e5207a906 100644
--- a/config_examples/config_freqai_futures.example.json
+++ b/config_examples/config_freqai_futures.example.json
@@ -66,8 +66,8 @@
             "1h"
         ],
         "train_period": 20,
-        "backtest_period": 2,
-        "identifier": "example2",
+        "backtest_period": 0.001,
+        "identifier": "constant_retrain_live",
         "live_trained_timestamp": 0,
         "corr_pairlist": [
             "BTC/USDT:USDT",
@@ -76,20 +76,20 @@
         "feature_parameters": {
             "period": 20,
             "shift": 2,
-            "DI_threshold": 0,
+            "DI_threshold": 0.9,
             "weight_factor": 0.9,
             "principal_component_analysis": false,
             "use_SVM_to_remove_outliers": true,
             "stratify": 0,
             "indicator_max_period": 20,
-            "indicator_periods": [10, 20, 30]
+            "indicator_periods": [10, 20]
         },
         "data_split_parameters": {
             "test_size": 0.33,
             "random_state": 1
         },
         "model_training_parameters": {
-            "n_estimators": 200,
+            "n_estimators": 1000,
             "task_type": "CPU"
         }
     },
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index 1aaf63dcf..608f24239 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -47,7 +47,7 @@ class FreqaiExampleStrategy(IStrategy):
     stoploss = -0.05
     use_exit_signal = True
     startup_candle_count: int = 300
-    can_short = False
+    can_short = True
 
     linear_roi_offset = DecimalParameter(0.00, 0.02, default=0.005, space='sell',
                                          optimize=False, load=True)
@@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy):
         # each training period.
         dataframe = self.model.bridge.start(dataframe, metadata, self)
 
-        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
-        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]
+        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.25
+        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.25
         return dataframe
 
     def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
@@ -245,7 +245,8 @@ class FreqaiExampleStrategy(IStrategy):
 
         entry_tag = trade.enter_tag
 
-        if 'prediction' + entry_tag not in pair_dict[pair] or pair_dict[pair]['prediction' + entry_tag] > 0::
+        if ('prediction' + entry_tag not in pair_dict[pair] or
+                pair_dict[pair]['prediction' + entry_tag] > 0):
             with self.model.bridge.lock:
                 pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction'])
                 if not follow_mode:

From 66800c7a45fb69a70d5e6542a462697cc80b41a5 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 20:24:23 +0200
Subject: [PATCH 13/14] ensure newest candles are always appended

---
 freqtrade/freqai/freqai_interface.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 86f873f3a..1f194860d 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -215,8 +215,7 @@ class IFreqaiModel(ABC):
             return dh
 
         # append the historic data once per round
-        if (self.data_drawer.historic_data and
-                self.config.get('exchange', '').get('pair_whitelist').index(metadata['pair']) == 1):
+        if self.data_drawer.historic_data:
             dh.update_historic_data(strategy)
             logger.info(f'Updating historic data on pair {metadata["pair"]}')
 

From d9b79d94e46fab456e4a0699ad372f2839a29b33 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Tue, 7 Jun 2022 20:57:10 +0200
Subject: [PATCH 14/14] increase candle update flexibility to allow long
 sequential trainings that may last more than one candle

---
 freqtrade/freqai/data_kitchen.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index fafeda49d..11c9142a6 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -866,15 +866,22 @@ class FreqaiDataKitchen:
 
             for pair in self.all_pairs:
                 for tf in self.freqai_config.get('timeframes'):
+
                     # check if newest candle is already appended
+                    df_dp = strategy.dp.get_pair_dataframe(pair, tf)
                     if (
                          str(history_data[pair][tf].iloc[-1]['date']) ==
-                         str(strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]['date'].iloc[-1])
+                         str(df_dp.iloc[-1:]['date'].iloc[-1])
                          ):
                         continue
+
+                    index = df_dp.loc[
+                                df_dp['date'] ==
+                                history_data[pair][tf].iloc[-1]['date']
+                                ].index[0] + 1
                     history_data[pair][tf] = pd.concat(
                                             [history_data[pair][tf],
-                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]],
+                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[index:]],
                                             ignore_index=True, axis=0
                                                 )