improve model youth by constantly scanning pairs in dry/live and always training new models. Fix bug in DI return values

2022-06-17 16:06:51 +02:00
parent 61040c9f8e f631ae911b
commit 5e914d5756
7 changed files with 203 additions and 118 deletions
@@ -452,6 +452,24 @@ config:

 which will automatically purge all models older than the two most recently trained ones.

+## Defining model expirations
+
+During dry/live, FreqAI trains each pair sequentially (on separate threads/GPU from the main
+Freqtrade bot). This means there is always an age discrepancy between models. If a user is training
+on 50 pairs, and each pair requires 5 minutes to train, the oldest model will be over 4 hours old. 
+This may be undesirable if the characteristic time scale (read trade duration target) for a strategy 
+is much less than 4 hours. The user can decide to only make trade entries if the model is less than 
+a certain number of hours in age by setting the `expiration_hours` in the config file:
+
+```json
+    "freqai": {
+        "expiration_hours": 0.5,
+    }
+```
+
+In the present example, the user will only allow predictions on models that are less than 1/2 hours
+old. 
+
 <!-- ## Dynamic target expectation

 The labels used for model training have a unique statistical distribution for each separate model training. 
@@ -30,6 +30,7 @@ class FreqaiDataDrawer:
    def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):

        self.config = config
+        self.freqai_info = config.get('freqai', {})
        # dictionary holding all pair metadata necessary to load in from disk
        self.pair_dict: Dict[str, Any] = {}
        # dictionary holding all actively inferenced models in memory given a model filename
@@ -107,7 +108,7 @@ class FreqaiDataDrawer:
        if isinstance(object, np.generic):
            return object.item()

-    def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
+    def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool, bool]:
        """
        Locate and load existing model metadata from persistent storage. If not located,
        create a new one and append the current pair to it and prepare it for its first
@@ -120,22 +121,22 @@ class FreqaiDataDrawer:
        coin_first: bool = If the coin is fresh without metadata
        return_null_array: bool = Follower could not find pair metadata
        """
-        pair_in_dict = self.pair_dict.get(metadata['pair'])
-        data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None)
+        pair_in_dict = self.pair_dict.get(pair)
+        data_path_set = self.pair_dict.get(pair, {}).get('data_path', None)
        return_null_array = False

        if pair_in_dict:
-            model_filename = self.pair_dict[metadata['pair']]['model_filename']
-            trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp']
-            coin_first = self.pair_dict[metadata['pair']]['first']
+            model_filename = self.pair_dict[pair]['model_filename']
+            trained_timestamp = self.pair_dict[pair]['trained_timestamp']
+            coin_first = self.pair_dict[pair]['first']
        elif not self.follow_mode:
-            self.pair_dict[metadata['pair']] = {}
-            model_filename = self.pair_dict[metadata['pair']]['model_filename'] = ''
-            coin_first = self.pair_dict[metadata['pair']]['first'] = True
-            trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0
+            self.pair_dict[pair] = {}
+            model_filename = self.pair_dict[pair]['model_filename'] = ''
+            coin_first = self.pair_dict[pair]['first'] = True
+            trained_timestamp = self.pair_dict[pair]['trained_timestamp'] = 0

        if not data_path_set and self.follow_mode:
-            logger.warning(f'Follower could not find current pair {metadata["pair"]} in '
+            logger.warning(f'Follower could not find current pair {pair} in '
                           f'pair_dictionary at path {self.full_path}, sending null values '
                           'back to strategy.')
            return_null_array = True
@@ -168,7 +169,8 @@ class FreqaiDataDrawer:
        self.model_return_values[pair]['do_preds'] = dh.full_do_predict
        self.model_return_values[pair]['target_mean'] = dh.full_target_mean
        self.model_return_values[pair]['target_std'] = dh.full_target_std
-        self.model_return_values[pair]['DI_values'] = dh.full_DI_values
+        if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
+            self.model_return_values[pair]['DI_values'] = dh.full_DI_values

        # if not self.follow_mode:
        #     self.save_model_return_values_to_disk()
@@ -189,8 +191,9 @@ class FreqaiDataDrawer:

        self.model_return_values[pair]['predictions'] = np.append(
            self.model_return_values[pair]['predictions'][i:], predictions[-1])
-        self.model_return_values[pair]['DI_values'] = np.append(
-            self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
+        if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
+            self.model_return_values[pair]['DI_values'] = np.append(
+                self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
        self.model_return_values[pair]['do_preds'] = np.append(
            self.model_return_values[pair]['do_preds'][i:], do_preds[-1])
        self.model_return_values[pair]['target_mean'] = np.append(
@@ -202,8 +205,9 @@ class FreqaiDataDrawer:
            prepend = np.zeros(abs(length_difference) - 1)
            self.model_return_values[pair]['predictions'] = np.insert(
                self.model_return_values[pair]['predictions'], 0, prepend)
-            self.model_return_values[pair]['DI_values'] = np.insert(
-                self.model_return_values[pair]['DI_values'], 0, prepend)
+            if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
+                self.model_return_values[pair]['DI_values'] = np.insert(
+                    self.model_return_values[pair]['DI_values'], 0, prepend)
            self.model_return_values[pair]['do_preds'] = np.insert(
                self.model_return_values[pair]['do_preds'], 0, prepend)
            self.model_return_values[pair]['target_mean'] = np.insert(
@@ -215,7 +219,8 @@ class FreqaiDataDrawer:
        dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
        dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
        dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
-        dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])
+        if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
+            dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])

        # if not self.follow_mode:
        #     self.save_model_return_values_to_disk()
@@ -227,7 +232,8 @@ class FreqaiDataDrawer:
        dh.full_do_predict = np.zeros(len_df)
        dh.full_target_mean = np.zeros(len_df)
        dh.full_target_std = np.zeros(len_df)
-        dh.full_DI_values = np.zeros(len_df)
+        if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
+            dh.full_DI_values = np.zeros(len_df)

    def purge_old_models(self) -> None:

@@ -71,7 +71,7 @@ class FreqaiDataKitchen:

        self.data_drawer = data_drawer

-    def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None:
+    def set_paths(self, pair: str, trained_timestamp: int = None,) -> None:
        """
        Set the paths to the data for the present coin/botloop
        :params:
@@ -83,7 +83,7 @@ class FreqaiDataKitchen:
                              str(self.freqai_config.get('identifier')))

        self.data_path = Path(self.full_path / str("sub-train" + "-" +
-                                                   metadata['pair'].split("/")[0] +
+                                                   pair.split("/")[0] +
                                                   str(trained_timestamp)))

        return
@@ -151,6 +151,9 @@ class FreqaiDataKitchen:
        :model: User trained model which can be inferenced for new predictions
        """

+        if not self.data_drawer.pair_dict[coin]['model_filename']:
+            return None
+
        if self.live:
            self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
            self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
@@ -670,7 +673,7 @@ class FreqaiDataKitchen:

        self.full_predictions = np.append(self.full_predictions, predictions)
        self.full_do_predict = np.append(self.full_do_predict, do_predict)
-        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+        if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
            self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
        self.full_target_mean = np.append(self.full_target_mean, target_mean)
        self.full_target_std = np.append(self.full_target_std, target_std)
@@ -686,7 +689,7 @@ class FreqaiDataKitchen:
        filler = np.zeros(len_dataframe - len(self.full_predictions))  # startup_candle_count
        self.full_predictions = np.append(filler, self.full_predictions)
        self.full_do_predict = np.append(filler, self.full_do_predict)
-        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+        if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
            self.full_DI_values = np.append(filler, self.full_DI_values)
        self.full_target_mean = np.append(filler, self.full_target_mean)
        self.full_target_std = np.append(filler, self.full_target_std)
@@ -722,6 +725,12 @@ class FreqaiDataKitchen:

        return full_timerange

+    def check_if_model_expired(self, trained_timestamp: int) -> bool:
+        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        elapsed_time = (time - trained_timestamp) / 3600  # hours
+        max_time = self.freqai_config.get('expiration_hours', 0)
+        return elapsed_time > max_time
+
    def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool,
                                                                              TimeRange, TimeRange]:

@@ -747,7 +756,7 @@ class FreqaiDataKitchen:
                logger.warning('FreqAI could not detect max timeframe and therefore may not '
                               'download the proper amount of data for training')

-        logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
+        # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')

        if trained_timestamp != 0:
            elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
@@ -796,12 +805,12 @@ class FreqaiDataKitchen:

        return retrain, trained_timerange, data_load_timerange

-    def set_new_model_names(self, metadata: dict, trained_timerange: TimeRange):
+    def set_new_model_names(self, pair: str, trained_timerange: TimeRange):

-        coin, _ = metadata['pair'].split("/")
+        coin, _ = pair.split("/")
        # set the new data_path
        self.data_path = Path(self.full_path / str("sub-train" + "-" +
-                              metadata['pair'].split("/")[0] +
+                              pair.split("/")[0] +
                              str(int(trained_timerange.stopts))))

        self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
@@ -870,6 +879,8 @@ class FreqaiDataKitchen:

                    # check if newest candle is already appended
                    df_dp = strategy.dp.get_pair_dataframe(pair, tf)
+                    if len(df_dp.index) == 0:
+                        continue
                    if (
                         str(history_data[pair][tf].iloc[-1]['date']) ==
                         str(df_dp.iloc[-1:]['date'].iloc[-1])
@@ -918,7 +929,7 @@ class FreqaiDataKitchen:
                                                            'trading_mode', 'spot'))

    def get_base_and_corr_dataframes(self, timerange: TimeRange,
-                                     metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
+                                     pair: str) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
        """
        Searches through our historic_data in memory and returns the dataframes relevant
        to the present pair.
@@ -927,6 +938,7 @@ class FreqaiDataKitchen:
        for training according to user defined train_period
        metadata: dict = strategy furnished pair metadata
        """
+
        with self.data_drawer.history_lock:
            corr_dataframes: Dict[Any, Any] = {}
            base_dataframes: Dict[Any, Any] = {}
@@ -936,11 +948,11 @@ class FreqaiDataKitchen:
            for tf in self.freqai_config.get('timeframes'):
                base_dataframes[tf] = self.slice_dataframe(
                                                        timerange,
-                                                        historic_data[metadata['pair']][tf]
+                                                        historic_data[pair][tf]
                                                        )
                if pairs:
                    for p in pairs:
-                        if metadata['pair'] in p:
+                        if pair in p:
                            continue  # dont repeat anything from whitelist
                        if p not in corr_dataframes:
                            corr_dataframes[p] = {}
@@ -984,7 +996,7 @@ class FreqaiDataKitchen:
    def use_strategy_to_populate_indicators(self, strategy: IStrategy,
                                            corr_dataframes: dict,
                                            base_dataframes: dict,
-                                            metadata: dict) -> DataFrame:
+                                            pair: str) -> DataFrame:
        """
        Use the user defined strategy for populating indicators during
        retrain
@@ -1003,19 +1015,19 @@ class FreqaiDataKitchen:

        for tf in self.freqai_config.get("timeframes"):
            dataframe = strategy.populate_any_indicators(
-                                                         metadata,
-                                                         metadata['pair'],
+                                                         pair,
+                                                         pair,
                                                         dataframe.copy(),
                                                         tf,
                                                         base_dataframes[tf],
-                                                         coin=metadata['pair'].split("/")[0] + "-"
+                                                         coin=pair.split("/")[0] + "-"
                                                         )
            if pairs:
                for i in pairs:
-                    if metadata['pair'] in i:
+                    if pair in i:
                        continue  # dont repeat anything from whitelist
                    dataframe = strategy.populate_any_indicators(
-                                                                 metadata,
+                                                                 pair,
                                                                 i,
                                                                 dataframe.copy(),
                                                                 tf,
@@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
 from pathlib import Path
 from typing import Any, Dict, Tuple

+import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from pandas import DataFrame
@@ -63,6 +64,9 @@ class IFreqaiModel(ABC):
        self.lock = threading.Lock()
        self.follow_mode = self.freqai_info.get('follow_mode', False)
        self.identifier = self.freqai_info.get('identifier', 'no_id_provided')
+        self.scanning = False
+        self.ready_to_scan = False
+        self.first = True

    def assert_config(self, config: Dict[str, Any]) -> None:

@@ -91,17 +95,9 @@ class IFreqaiModel(ABC):
        # and we keep the flag self.training_on_separate_threaad in the current object to help
        # determine what the current pair will do
        if self.live:
-            if (not self.training_on_separate_thread and
-                    self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1):
-
-                self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
-                                            self.live, metadata["pair"])
-                dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True)
-            else:
-                # we will have at max 2 separate instances of the kitchen at once.
-                self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer,
-                                               self.live, metadata["pair"])
-                dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False)
+            self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
+                                        self.live, metadata["pair"])
+            dh = self.start_live(dataframe, metadata, strategy, self.dh)

        # For backtesting, each pair enters and then gets trained for each window along the
        # sliding window defined by "train_period" (training window) and "backtest_period"
@@ -114,8 +110,37 @@ class IFreqaiModel(ABC):
            dh = self.start_backtesting(dataframe, metadata, self.dh)

        return self.return_values(dataframe, dh)
-        # return (dh.full_predictions, dh.full_do_predict,
-        #         dh.full_target_mean, dh.full_target_std)
+
+    @threaded
+    def start_scanning(self, strategy: IStrategy) -> None:
+        while 1:
+            for pair in self.config.get('exchange', {}).get('pair_whitelist'):
+                if self.data_drawer.pair_dict[pair]['priority'] != 1:
+                    continue
+                dh = FreqaiDataKitchen(self.config, self.data_drawer,
+                                       self.live, pair)
+
+                (model_filename,
+                 trained_timestamp,
+                 _, _) = self.data_drawer.get_pair_dict_info(pair)
+
+                file_exists = False
+
+                dh.set_paths(pair, trained_timestamp)
+                file_exists = self.model_exists(pair,
+                                                dh,
+                                                trained_timestamp=trained_timestamp,
+                                                model_filename=model_filename,
+                                                scanning=True)
+
+                (retrain,
+                 new_trained_timerange,
+                 data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
+                dh.set_paths(pair, new_trained_timerange.stopts)
+
+                if retrain or not file_exists:
+                    self.train_model_in_series(new_trained_timerange, pair,
+                                               strategy, dh, data_load_timerange)

    def start_backtesting(self, dataframe: DataFrame, metadata: dict,
                          dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
@@ -142,7 +167,7 @@ class IFreqaiModel(ABC):
        for tr_train, tr_backtest in zip(
            dh.training_timeranges, dh.backtesting_timeranges
        ):
-            (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata)
+            (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata['pair'])
            gc.collect()
            dh.data = {}  # clean the pair specific data between training window sliding
            self.training_timerange = tr_train
@@ -163,7 +188,7 @@ class IFreqaiModel(ABC):
                                    str(int(trained_timestamp.stopts))))
            if not self.model_exists(metadata["pair"], dh,
                                     trained_timestamp=trained_timestamp.stopts):
-                self.model = self.train(dataframe_train, metadata, dh)
+                self.model = self.train(dataframe_train, metadata['pair'], dh)
                self.data_drawer.pair_dict[metadata['pair']][
                                        'trained_timestamp'] = trained_timestamp.stopts
                dh.set_new_model_names(metadata, trained_timestamp)
@@ -184,8 +209,7 @@ class IFreqaiModel(ABC):
        return dh

    def start_live(self, dataframe: DataFrame, metadata: dict,
-                   strategy: IStrategy, dh: FreqaiDataKitchen,
-                   trainable: bool) -> FreqaiDataKitchen:
+                   strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
        """
        The main broad execution for dry/live. This function will check if a retraining should be
        performed, and if so, retrain and reset the model.
@@ -203,10 +227,10 @@ class IFreqaiModel(ABC):
            self.data_drawer.update_follower_metadata()

        # get the model metadata associated with the current pair
-        (model_filename,
+        (_,
         trained_timestamp,
-         coin_first,
-         return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
+         _,
+         return_null_array) = self.data_drawer.get_pair_dict_info(metadata['pair'])

        # if the metadata doesnt exist, the follower returns null arrays to strategy
        if self.follow_mode and return_null_array:
@@ -222,20 +246,18 @@ class IFreqaiModel(ABC):
        # if trainable, check if model needs training, if so compute new timerange,
        # then save model and metadata.
        # if not trainable, load existing data
-        if (trainable or coin_first) and not self.follow_mode:
-            file_exists = False
+        if not self.follow_mode:
+            # if trained_timestamp != 0:  # historical model available
+            #     dh.set_paths(metadata['pair'], trained_timestamp)
+            #     # file_exists = self.model_exists(metadata['pair'],
+            #     #                                 dh,
+            #     #                                 trained_timestamp=trained_timestamp,
+            #     #                                 model_filename=model_filename)

-            if trained_timestamp != 0:  # historical model available
-                dh.set_paths(metadata, trained_timestamp)
-                file_exists = self.model_exists(metadata['pair'],
-                                                dh,
-                                                trained_timestamp=trained_timestamp,
-                                                model_filename=model_filename)
-
-            (self.retrain,
+            (_,
             new_trained_timerange,
             data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
-            dh.set_paths(metadata, new_trained_timerange.stopts)
+            dh.set_paths(metadata['pair'], new_trained_timerange.stopts)

            # download candle history if it is not already in memory
            if not self.data_drawer.historic_data:
@@ -245,21 +267,22 @@ class IFreqaiModel(ABC):
                dh.download_all_data_for_training(data_load_timerange)
                dh.load_all_pair_histories(data_load_timerange)

-            # train the model on the trained timerange
-            if self.retrain or not file_exists:
-                if coin_first:
-                    self.train_model_in_series(new_trained_timerange, metadata,
-                                               strategy, dh, data_load_timerange)
-                else:
-                    self.training_on_separate_thread = True  # acts like a lock
-                    self.retrain_model_on_separate_thread(new_trained_timerange,
-                                                          metadata, strategy,
-                                                          dh, data_load_timerange)
+            if not self.scanning:
+                self.scanning = True
+                self.start_scanning(strategy)

-        elif not trainable and not self.follow_mode:
-            logger.info(f'{metadata["pair"]} holds spot '
-                        f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
-                        'in training queue')
+            # train the model on the trained timerange
+            # if coin_first and not self.scanning:
+            #     self.train_model_in_series(new_trained_timerange, metadata['pair'],
+            #                                strategy, dh, data_load_timerange)
+            # elif not coin_first and not self.scanning:
+            #     self.scanning = True
+            #     self.start_scanning(strategy)
+
+        # elif not trainable and not self.follow_mode:
+        #     logger.info(f'{metadata["pair"]} holds spot '
+        #                 f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
+        #                 'in training queue')
        elif self.follow_mode:
            dh.set_paths(metadata, trained_timestamp)
            logger.info('FreqAI instance set to follow_mode, finding existing pair'
@@ -268,25 +291,46 @@ class IFreqaiModel(ABC):
        # load the model and associated data into the data kitchen
        self.model = dh.load_data(coin=metadata['pair'])

+        if not self.model:
+            logger.warning('No model ready, returning null values to strategy.')
+            self.data_drawer.return_null_values_to_strategy(dataframe, dh)
+            return dh
+
        # ensure user is feeding the correct indicators to the model
        self.check_if_feature_list_matches_strategy(dataframe, dh)

+        self.build_strategy_return_arrays(dataframe, dh, metadata['pair'], trained_timestamp)
+
+        return dh
+
+    def build_strategy_return_arrays(self, dataframe: DataFrame,
+                                     dh: FreqaiDataKitchen, pair: str,
+                                     trained_timestamp: int) -> None:
+
        # hold the historical predictions in memory so we are sending back
        # correct array to strategy FIXME currently broken, but only affecting
        # Frequi reporting. Signals remain unaffeted.
-        if metadata['pair'] not in self.data_drawer.model_return_values:
+
+        if pair not in self.data_drawer.model_return_values:
            preds, do_preds = self.predict(dataframe, dh)
            dh.append_predictions(preds, do_preds, len(dataframe))
            dh.fill_predictions(len(dataframe))
-            self.data_drawer.set_initial_return_values(metadata['pair'], dh)
+            self.data_drawer.set_initial_return_values(pair, dh)
+            return
+        elif self.dh.check_if_model_expired(trained_timestamp):
+            preds, do_preds, dh.DI_values = np.zeros(2), np.ones(2) * 2, np.zeros(2)
+            logger.warning('Model expired, returning null values to strategy. Strategy '
+                           'construction should take care to consider this event with '
+                           'prediction == 0 and do_predict == 2')
        else:
            preds, do_preds = self.predict(dataframe.iloc[-2:], dh)
-            self.data_drawer.append_model_predictions(metadata['pair'], preds, do_preds,
-                                                      dh.data["target_mean"],
-                                                      dh.data["target_std"], dh,
-                                                      len(dataframe))

-        return dh
+        self.data_drawer.append_model_predictions(pair, preds, do_preds,
+                                                  dh.data["target_mean"],
+                                                  dh.data["target_std"],
+                                                  dh,
+                                                  len(dataframe))
+        return

    def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
                                               dh: FreqaiDataKitchen) -> None:
@@ -357,7 +401,7 @@ class IFreqaiModel(ABC):
        #     dh.remove_outliers(predict=True)  # creates dropped index

    def model_exists(self, pair: str, dh: FreqaiDataKitchen, trained_timestamp: int = None,
-                     model_filename: str = '') -> bool:
+                     model_filename: str = '', scanning: bool = False) -> bool:
        """
        Given a pair and path, check if a model already exists
        :param pair: pair e.g. BTC/USD
@@ -370,9 +414,9 @@ class IFreqaiModel(ABC):

        path_to_modelfile = Path(dh.data_path / str(model_filename + "_model.joblib"))
        file_exists = path_to_modelfile.is_file()
-        if file_exists:
+        if file_exists and not scanning:
            logger.info("Found model at %s", dh.data_path / dh.model_filename)
-        else:
+        elif not scanning:
            logger.info("Could not find model at %s", dh.data_path / dh.model_filename)
        return file_exists

@@ -382,7 +426,7 @@ class IFreqaiModel(ABC):
                              str(self.freqai_info.get('identifier')))

    @threaded
-    def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
+    def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, pair: str,
                                         strategy: IStrategy, dh: FreqaiDataKitchen,
                                         data_load_timerange: TimeRange):
        """
@@ -403,14 +447,14 @@ class IFreqaiModel(ABC):
        #                                                           metadata)

        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
-                                                                           metadata)
+                                                                           pair)

        # protecting from common benign errors associated with grabbing new data from exchange:
        try:
            unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
                                                                          corr_dataframes,
                                                                          base_dataframes,
-                                                                          metadata)
+                                                                          pair)
            unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)

        except Exception as err:
@@ -420,25 +464,25 @@ class IFreqaiModel(ABC):
            return

        try:
-            model = self.train(unfiltered_dataframe, metadata, dh)
+            model = self.train(unfiltered_dataframe, pair, dh)
        except ValueError:
            logger.warning('Value error encountered during training')
            self.training_on_separate_thread = False
            self.retrain = False
            return

-        self.data_drawer.pair_dict[metadata['pair']][
+        self.data_drawer.pair_dict[pair][
                                   'trained_timestamp'] = new_trained_timerange.stopts
-        dh.set_new_model_names(metadata, new_trained_timerange)
+        dh.set_new_model_names(pair, new_trained_timerange)
        # logger.info('Training queue'
        #             f'{sorted(self.data_drawer.pair_dict.items(), key=lambda item: item[1])}')

-        if self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1:
+        if self.data_drawer.pair_dict[pair]['priority'] == 1:
            with self.lock:
-                self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
-        dh.save_data(model, coin=metadata['pair'])
-        self.training_on_separate_thread = False
-        self.retrain = False
+                self.data_drawer.pair_to_end_of_training_queue(pair)
+        dh.save_data(model, coin=pair)
+        # self.training_on_separate_thread = False
+        # self.retrain = False

        # each time we finish a training, we check the directory to purge old models.
        if self.freqai_info.get('purge_old_models', False):
@@ -446,7 +490,7 @@ class IFreqaiModel(ABC):

        return

-    def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
+    def train_model_in_series(self, new_trained_timerange: TimeRange, pair: str,
                              strategy: IStrategy, dh: FreqaiDataKitchen,
                              data_load_timerange: TimeRange):
        """
@@ -464,29 +508,35 @@ class IFreqaiModel(ABC):
        # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
        #                                                          metadata)
        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
-                                                                           metadata)
+                                                                           pair)

        unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
                                                                      corr_dataframes,
                                                                      base_dataframes,
-                                                                      metadata)
+                                                                      pair)

        unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)

-        model = self.train(unfiltered_dataframe, metadata, dh)
+        model = self.train(unfiltered_dataframe, pair, dh)

-        self.data_drawer.pair_dict[metadata['pair']][
+        self.data_drawer.pair_dict[pair][
                                   'trained_timestamp'] = new_trained_timerange.stopts
-        dh.set_new_model_names(metadata, new_trained_timerange)
-        self.data_drawer.pair_dict[metadata['pair']]['first'] = False
-        dh.save_data(model, coin=metadata['pair'])
-        self.retrain = False
+        dh.set_new_model_names(pair, new_trained_timerange)
+        self.data_drawer.pair_dict[pair]['first'] = False
+        if self.data_drawer.pair_dict[pair]['priority'] == 1 and self.scanning:
+            with self.lock:
+                self.data_drawer.pair_to_end_of_training_queue(pair)
+        dh.save_data(model, coin=pair)
+
+        if self.freqai_info.get('purge_old_models', False):
+            self.data_drawer.purge_old_models()
+        # self.retrain = False

    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModlel.py for an example.

    @abstractmethod
-    def train(self, unfiltered_dataframe: DataFrame, metadata: dict, dh: FreqaiDataKitchen) -> Any:
+    def train(self, unfiltered_dataframe: DataFrame, pair: str, dh: FreqaiDataKitchen) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datahandler
        for storing, saving, loading, and analyzing the data.
@@ -24,7 +24,7 @@ class CatboostPredictionModel(IFreqaiModel):
        dataframe["do_predict"] = dh.full_do_predict
        dataframe["target_mean"] = dh.full_target_mean
        dataframe["target_std"] = dh.full_target_std
-        if self.freqai_info.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+        if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
            dataframe["DI"] = dh.full_DI_values

        return dataframe
@@ -48,7 +48,7 @@ class CatboostPredictionModel(IFreqaiModel):
        return dataframe["s"]

    def train(self, unfiltered_dataframe: DataFrame,
-              metadata: dict, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
+              pair: str, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
        for storing, saving, loading, and analyzing the data.
@@ -60,7 +60,7 @@ class CatboostPredictionModel(IFreqaiModel):
        """

        logger.info('--------------------Starting training '
-                    f'{metadata["pair"]} --------------------')
+                    f'{pair} --------------------')

        # create the full feature list based on user config info
        dh.training_features_list = dh.find_features(unfiltered_dataframe)
@@ -88,7 +88,7 @@ class CatboostPredictionModel(IFreqaiModel):

        model = self.fit(data_dictionary)

-        logger.info(f'--------------------done training {metadata["pair"]}--------------------')
+        logger.info(f'--------------------done training {pair}--------------------')

        return model

@@ -532,7 +532,7 @@ class IStrategy(ABC, HyperStrategyMixin):
        """
        return None

-    def populate_any_indicators(self, metadata: dict, pair: str, df: DataFrame, tf: str,
+    def populate_any_indicators(self, basepair: str, pair: str, df: DataFrame, tf: str,
                                informative: DataFrame = None, coin: str = "") -> DataFrame:
        """
        Function designed to automatically generate, name and merge features
@@ -116,7 +116,6 @@ class FreqaiExampleStrategy(IStrategy):
                    informative[f"{coin}bb_upperband-period_{t}"]
                    - informative[f"{coin}bb_lowerband-period_{t}"]
                ) / informative[f"{coin}bb_middleband-period_{t}"]
-
                informative[f"%-{coin}close-bb_lower-period_{t}"] = (
                    informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
                )
@@ -153,7 +152,7 @@ class FreqaiExampleStrategy(IStrategy):
            # Add generalized indicators here (because in live, it will call this
            # function to populate indicators during training). Notice how we ensure not to
            # add them multiple times
-            if pair == metadata["pair"] and tf == self.timeframe:
+            if pair == self.freqai_info['corr_pairlist'][0] and tf == self.timeframe:
                df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
                df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25