Merge branch 'feat/freqai' of https://github.com/lolongcovas/freqtrade into feat/freqai

2022-07-11 22:53:37 +02:00 · 2022-07-11 22:53:37 +02:00 · bb1ab0be2a
commit bb1ab0be2a
parent 7c850a4c2d 8ce6b18318
14 changed files with 489 additions and 510 deletions
--- a/config_examples/config_freqai_futures.example.json
+++ b/config_examples/config_freqai_futures.example.json
@ -15,7 +15,7 @@
        "exit": 30
    },
    "exchange": {
-        "name": "okx",
+        "name": "binance",
        "key": "",
        "secret": "",
        "ccxt_config": {
@ -26,15 +26,8 @@
            "rateLimit": 200
        },
        "pair_whitelist": [
-            "AGLD/USDT:USDT",
-            "1INCH/USDT:USDT",
-            "AAVE/USDT:USDT",
-            "ALGO/USDT:USDT",
-            "ALPHA/USDT:USDT",
-            "API3/USDT:USDT",
-            "AVAX/USDT:USDT",
-            "AXS/USDT:USDT",
-            "BCH/USDT:USDT"
+            "1INCH/USDT",
+            "ALGO/USDT"
        ],
        "pair_blacklist": []
    },
@ -60,29 +53,31 @@
    ],
    "freqai": {
        "startup_candles": 10000,
-        "timeframes": [
-            "3m",
-            "15m",
-            "1h"
-        ],
-        "train_period": 20,
-        "backtest_period": 0.001,
-        "identifier": "constant_retrain_live",
+        "purge_old_models": true,
+        "train_period_days": 15,
+        "backtest_period_days": 7,
+        "live_retrain_hours": 0,
+        "identifier": "uniqe-id6",
        "live_trained_timestamp": 0,
-        "corr_pairlist": [
-            "BTC/USDT:USDT",
-            "ETH/USDT:USDT"
-        ],
        "feature_parameters": {
-            "period": 20,
-            "shift": 2,
+            "include_timeframes": [
+                "3m",
+                "15m",
+                "1h"
+            ],
+            "include_corr_pairlist": [
+                "BTC/USDT",
+                "ETH/USDT"
+            ],
+            "label_period_candles": 20,
+            "include_shifted_candles": 2,
            "DI_threshold": 0.9,
            "weight_factor": 0.9,
            "principal_component_analysis": false,
            "use_SVM_to_remove_outliers": true,
-            "stratify": 0,
-            "indicator_max_period": 20,
-            "indicator_periods": [10, 20]
+            "stratify_training_data": 0,
+            "indicator_max_period_candles": 20,
+            "indicator_periods_candles": [10, 20]
        },
        "data_split_parameters": {
            "test_size": 0.33,
--- a/config_examples/config_freqai_spot.example.json
+++ b/config_examples/config_freqai_spot.example.json
@ -52,32 +52,31 @@
    ],
    "freqai": {
        "startup_candles": 10000,
-        "timeframes": [
-            "5m",
-            "15m",
-            "4h"
-        ],
-        "train_period": 30,
-        "backtest_period": 7,
+
+        "train_period_days": 30,
+        "backtest_period_days": 7,
+        "live_retrain_hours": 1,
        "identifier": "example",
        "live_trained_timestamp": 0,
-        "corr_pairlist": [
-            "BTC/USDT",
-            "ETH/USDT",
-            "DOT/USDT",
-            "MATIC/USDT",
-            "SOL/USDT"
-        ],
        "feature_parameters": {
-            "period": 500,
-            "shift": 1,
+            "include_timeframes": [
+                "5m",
+                "15m",
+                "4h"
+            ],
+            "include_corr_pairlist": [
+                "BTC/USDT",
+                "ETH/USDT"
+            ],
+            "label_period_candles": 500,
+            "include_shifted_candles": 1,
            "DI_threshold": 0,
            "weight_factor": 0.9,
            "principal_component_analysis": false,
            "use_SVM_to_remove_outliers": false,
-            "stratify": 0,
-            "indicator_max_period": 50,
-            "indicator_periods": [10, 20]
+            "stratify_training_data": 0,
+            "indicator_max_period_candles": 50,
+            "indicator_periods_candles": [10, 20]
        },
        "data_split_parameters": {
            "test_size": 0.33,
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -77,19 +77,22 @@ config setup includes:
 ```json
    "freqai": {
                "startup_candles": 10000,
-                "timeframes" : ["5m","15m","4h"],
-                "train_period" : 30,
-                "backtest_period" : 7,
+                "purge_old_models": true,
+                "train_period_days" : 30,
+                "backtest_period_days" : 7,
                "identifier" :  "unique-id",
-                "corr_pairlist": [
-                        "ETH/USD",
-                        "LINK/USD",
-                        "BNB/USD"
-                ],
                "feature_parameters" : {
-                        "period": 24,
-                        "shift": 2,
-                        "weight_factor":  0,
+                    "include_timeframes" : ["5m","15m","4h"],
+                    "include_corr_pairlist": [
+                            "ETH/USD",
+                            "LINK/USD",
+                            "BNB/USD"
+                    ],
+                    "label_period_candles": 24,
+                    "include_shifted_candles": 2,
+                    "weight_factor":  0,
+                    "indicator_max_period_candles": 20,
+                    "indicator_periods_candles": [10, 20]
                },
                "data_split_parameters" : {
                    "test_size": 0.25,
@ -106,40 +109,99 @@ config setup includes:

 ### Building the feature set

-!! slightly out of date, please refer to templates/FreqaiExampleStrategy.py for updated method !!
 Features are added by the user inside the `populate_any_indicators()` method of the strategy 
-by prepending indicators with `%`:
+by prepending indicators with `%` and labels are added by prependng `&`. There are some important
+components/structures that the user *must* include when building their feature set. As shown below,
+`with self.model.bridge.lock:` must be used to ensure thread safety - especially when using third 
+party libraries for indicator construction such as TA-lib. Another structure to consider is the 
+location of the labels at the bottom of the example function (below `if set_generalized_indicators:`).
+This is where the user will add single features labels to their feature set to avoid duplication from 
+various configuration paramters which multiply the feature set such as `include_timeframes`.

 ```python
-    def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
-        informative['%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
-        informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
-        informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
-        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
-        informative[coin + "bb_lowerband"] = bollinger["lower"]
-        informative[coin + "bb_middleband"] = bollinger["mid"]
-        informative[coin + "bb_upperband"] = bollinger["upper"]
-        informative['%-' + coin + "bb_width"] = (
-            informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
-        ) / informative[coin + "bb_middleband"]
+    def populate_any_indicators(
+        self, metadata, pair, df, tf, informative=None, coin="", set_generalized_indicators=False
+    ):
+        """
+        Function designed to automatically generate, name and merge features
+        from user indicated timeframes in the configuration file. User controls the indicators
+        passed to the training/prediction by prepending indicators with `'%-' + coin `
+        (see convention below). I.e. user should not prepend any supporting metrics
+        (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
+        model.
+        :params:
+        :pair: pair to be used as informative
+        :df: strategy dataframe which will receive merges from informatives
+        :tf: timeframe of the dataframe which will modify the feature names
+        :informative: the dataframe associated with the informative pair
+        :coin: the name of the coin which will modify the feature names.
+        """

+        with self.model.bridge.lock:
+            if informative is None:
+                informative = self.dp.get_pair_dataframe(pair, tf)

-        
-        # The following code automatically adds features according to the `shift` parameter passed
-        # in the config. Do not remove
-        indicators = [col for col in informative if col.startswith('%')]
-        for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
-            if n == 0:
-                continue
-            informative_shift = informative[indicators].shift(n)
-            informative_shift = informative_shift.add_suffix("_shift-" + str(n))
-            informative = pd.concat((informative, informative_shift), axis=1)
+            # first loop is automatically duplicating indicators for time periods
+            for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
+                t = int(t)
+                informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
+                informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
+                informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)

-        # The following code safely merges into the base timeframe.
-        # Do not remove.
-        df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
-        skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
-        df = df.drop(columns=skip_columns)
+                bollinger = qtpylib.bollinger_bands(
+                    qtpylib.typical_price(informative), window=t, stds=2.2
+                )
+                informative[f"{coin}bb_lowerband-period_{t}"] = bollinger["lower"]
+                informative[f"{coin}bb_middleband-period_{t}"] = bollinger["mid"]
+                informative[f"{coin}bb_upperband-period_{t}"] = bollinger["upper"]
+
+                informative[f"%-{coin}bb_width-period_{t}"] = (
+                    informative[f"{coin}bb_upperband-period_{t}"]
+                    - informative[f"{coin}bb_lowerband-period_{t}"]
+                ) / informative[f"{coin}bb_middleband-period_{t}"]
+                informative[f"%-{coin}close-bb_lower-period_{t}"] = (
+                    informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
+                )
+
+                informative[f"%-{coin}relative_volume-period_{t}"] = (
+                    informative["volume"] / informative["volume"].rolling(t).mean()
+                )
+
+            indicators = [col for col in informative if col.startswith("%")]
+            # This loop duplicates and shifts all indicators to add a sense of recency to data
+            for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
+                if n == 0:
+                    continue
+                informative_shift = informative[indicators].shift(n)
+                informative_shift = informative_shift.add_suffix("_shift-" + str(n))
+                informative = pd.concat((informative, informative_shift), axis=1)
+
+            df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
+            skip_columns = [
+                (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
+            ]
+            df = df.drop(columns=skip_columns)
+
+            # Add generalized indicators here (because in live, it will call this
+            # function to populate indicators during training). Notice how we ensure not to
+            # add them multiple times
+            if set_generalized_indicators:
+                df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
+                df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
+
+                # user adds targets here by prepending them with &- (see convention below)
+                # If user wishes to use multiple targets, a multioutput prediction model
+                # needs to be used such as templates/CatboostPredictionMultiModel.py
+                df["&-s_close"] = (
+                    df["close"]
+                    .shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
+                    .rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
+                    .mean()
+                    / df["close"]
+                    - 1
+                )
+
+        return df
 ```
 The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, 
 and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the
@ -153,6 +215,7 @@ a specific pair or timeframe, they should use the following structure inside `po
 ```python
    def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):

+        ...

        # Add generalized indicators here (because in live, it will call only this function to populate 
        # indicators for retraining). Notice how we ensure not to add them multiple times by associating
@ -160,35 +223,47 @@ a specific pair or timeframe, they should use the following structure inside `po
        if pair == metadata['pair'] and tf == self.timeframe:
            df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
            df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
+
+            # user adds targets here by prepending them with &- (see convention below)
+            # If user wishes to use multiple targets, a multioutput prediction model
+            # needs to be used such as templates/CatboostPredictionMultiModel.py
+            df["&-s_close"] = (
+                df["close"]
+                .shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
+                .rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
+                .mean()
+                / df["close"]
+                - 1
+                )
 ```

 (Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)

-The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()`
+The `include_timeframes` from the example config above are the timeframes of each `populate_any_indicator()`
 included metric for inclusion in the feature set. In the present case, the user is asking for the
 `5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, and `bb_width` to be included
 in the feature set.

 In addition, the user can ask for each of these features to be included from
-informative pairs using the `corr_pairlist`. This means that the present feature
-set will include all the `base_features` on all the `timeframes` for each of
+informative pairs using the `include_corr_pairlist`. This means that the present feature
+set will include all the features from `populate_any_indicators` on all the `include_timeframes` for each of
 `ETH/USD`, `LINK/USD`, and `BNB/USD`.

-`shift` is another user controlled parameter which indicates the number of previous
-candles to include in the present feature set. In other words, `shift: 2`, tells
+`include_shifted_candles` is another user controlled parameter which indicates the number of previous
+candles to include in the present feature set. In other words, `innclude_shifted_candles: 2`, tells
 Freqai to include the the past 2 candles for each of the features included
 in the dataset.

 In total, the number of features the present user has created is:_

-no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
-3 * 3 * 3 * 2 = 54._
+legnth of `include_timeframes` * no. features in `populate_any_indicators()` * legnth of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles`_
+3 * 3 * 3 * 2 * 2 = 108._

 ### Deciding the sliding training window and backtesting duration

 Users define the backtesting timerange with the typical `--timerange` parameter in the user
-configuration file. `train_period` is the duration of the sliding training window, while
-`backtest_period` is the sliding backtesting window, both in number of days (backtest_period can be
+configuration file. `train_period_days` is the duration of the sliding training window, while
+`backtest_period_days` is the sliding backtesting window, both in number of days (backtest_period_days can be
 a float to indicate sub daily retraining in live/dry mode). In the present example,
 the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days.
 This means that if the user sets `--timerange 20210501-20210701`, 
@ -203,9 +278,9 @@ the user must manually enter the required number of `startup_candles` in the con
 is used to increase the available data to FreqAI and should be sufficient to enable all indicators 
 to be NaN free at the beginning of the first training timerange. This boils down to identifying the 
 highest timeframe (`4h` in present example)  and the longest indicator period (25 in present example)
-and adding this to the `train_period`. The units need to be in the base candle time frame:_
+and adding this to the `train_period_days`. The units need to be in the base candle time frame:_

-`startup_candles` = ( 4 hours * 25 max period * 60 minutes/hour + 30 day train_period * 1440 minutes per day ) / 5 min (base time frame) = 1488.
+`startup_candles` = ( 4 hours * 25 max period * 60 minutes/hour + 30 day train_period_days * 1440 minutes per day ) / 5 min (base time frame) = 1488.

 !!! Note
    In dry/live, this is all precomputed and handled automatically. Thus, `startup_candle` has no influence on dry/live.
@ -242,9 +317,9 @@ The Freqai strategy requires the user to include the following lines of code in

    def informative_pairs(self):
        whitelist_pairs = self.dp.current_whitelist()
-        corr_pairs = self.config["freqai"]["corr_pairlist"]
+        corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
        informative_pairs = []
-        for tf in self.config["freqai"]["timeframes"]:
+        for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
            for pair in whitelist_pairs:
                informative_pairs.append((pair, tf))
            for pair in corr_pairs:
@ -257,21 +332,37 @@ The Freqai strategy requires the user to include the following lines of code in
        self.model = CustomModel(self.config)

    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-            self.freqai_info = self.config['freqai']

-            # the following loops are necessary for building the features 
-            # indicated by the user in the configuration file.
-            for tf in self.freqai_info['timeframes']:
-                    for i in self.freqai_info['corr_pairlist']:
-                    dataframe = self.populate_any_indicators(i,
-                                    dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+        self.freqai_info = self.config["freqai"]
+        self.pair = metadata["pair"]
+        sgi = True
+        # the following loops are necessary for building the features
+        # indicated by the user in the configuration file.
+        # All indicators must be populated by populate_any_indicators() for live functionality
+        # to work correctly.
+        for tf in self.freqai_info["feature_parameters"]["include_timeframes"]:
+            dataframe = self.populate_any_indicators(
+                metadata,
+                self.pair,
+                dataframe.copy(),
+                tf,
+                coin=self.pair.split("/")[0] + "-",
+                set_generalized_indicators=sgi,
+            )
+            sgi = False
+            for pair in self.freqai_info["feature_parameters"]["include_corr_pairlist"]:
+                if metadata["pair"] in pair:
+                    continue  # do not include whitelisted pair twice if it is in corr_pairlist
+                dataframe = self.populate_any_indicators(
+                    metadata, pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
+                )

-            # the model will return 4 values, its prediction, an indication of whether or not the prediction 
-            # should be accepted, the target mean/std values from the labels used during each training period.
-            (dataframe['prediction'], dataframe['do_predict'], 
-                    dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+        # the model will return 4 values, its prediction, an indication of whether or not the
+        # prediction should be accepted, the target mean/std values from the labels used during
+        # each training period.
+        dataframe = self.model.bridge.start(dataframe, metadata, self)

-            return dataframe
+        return dataframe
 ```

 The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds 
@ -280,8 +371,7 @@ the feature set with a proper naming convention for the IFreqaiModel to use late
 ### Building an IFreqaiModel

 Freqai has an example prediction model based on the popular `Catboost` regression (`freqai/prediction_models/CatboostPredictionModel.py`). However, users can customize and create
-their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()`, `predict()`, 
-and `make_labels()` to let them customize various aspects of their training procedures.
+their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures.

 ### Running the model live

@ -293,10 +383,10 @@ freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.

 By default, Freqai will not find find any existing models and will start by training a new one 
 given the user configuration settings. Following training, it will use that model to predict for the
-duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain 
+duration of `backtest_period_days`. After a full `backtest_period_days` has elapsed, Freqai will auto retrain 
 a new model, and begin making predictions with the updated model. FreqAI backtesting and live both
-permit the user to use fractional days (i.e. 0.1) in the `backtest_period`, which enables more frequent 
-retraining. But the user should be careful that using a fractional `backtest_period` with a large
+permit the user to use fractional days (i.e. 0.1) in the `backtest_period_days`, which enables more frequent 
+retraining. But the user should be careful that using a fractional `backtest_period_days` with a large
 `--timerange` in backtesting will result in a huge amount of required trainings/models.

 If the user wishes to start dry/live from a backtested saved model, the user only needs to reuse
@ -305,12 +395,14 @@ the same `identifier` parameter
 ```json
    "freqai": {
        "identifier": "example",
+        "live_retrain_hours": 1
    }
 ```

 In this case, although Freqai will initiate with a 
 pre-trained model, it will still check to see how much time has elapsed since the model was trained,
-and if a full `backtest_period` has elapsed since the end of the loaded model, FreqAI will self retrain.
+and if a full `live_retrain_hours` has elapsed since the end of the loaded model, FreqAI will self retrain. 
+It is common to want constant retraining, in whichcase, user should set `live_retrain_hours` to 0.

 ## Data anylsis techniques

@ -412,7 +504,7 @@ The user can stratify the training/testing data using:
 ```json
    "freqai": {
        "feature_parameters" : {
-            "stratify": 3
+            "stratify_training_data": 3
        }
    }
 ```
@ -470,6 +562,28 @@ a certain number of hours in age by setting the `expiration_hours` in the config
 In the present example, the user will only allow predictions on models that are less than 1/2 hours
 old. 

+## Choosing the calculation of the `target_roi`
+
+As shown in `templates/FreqaiExampleStrategy.py`, the `target_roi` is based on two metrics computed
+by FreqAI: `label_mean` and `label_std`. These are the statistics associated with the labels used 
+*during the most recent training*. This allows the model to know what magnitude of a target to be 
+expecting since it is directly stemming from the training data. By default, FreqAI computes this based 
+on trainig data and it assumes the labels are Gaussian distributed. These are big assumptions 
+that the user should consider when creating their labels. If the user wants to consider the population
+of *historical predictions* for creating the dynamic target instead of the trained labels, the user 
+can do so by setting `fit_live_prediction_candles` to the number of historical prediction candles
+the user wishes to use to generate target statistics. 
+
+```json
+    "freqai": {
+        "fit_live_prediction_candles": 300,
+    }
+```
+
+If the user sets this value, FreqAI will initially use the predictions from the training data set
+and then subsequently begin introducing real prediction data as it is generated. FreqAI will save 
+this historical data to be reloaded if the user stops and restarts with the same `identifier`.
+
 <!-- ## Dynamic target expectation

 The labels used for model training have a unique statistical distribution for each separate model training. 
--- a/freqtrade/configuration/config_validation.py
+++ b/freqtrade/configuration/config_validation.py
@ -174,9 +174,10 @@ def _validate_freqai(conf: Dict[str, Any]) -> None:

    for param in constants.SCHEMA_FREQAI_REQUIRED:
        if param not in conf.get('freqai', {}):
-            raise OperationalException(
-                f'{param} not found in Freqai config'
-            )
+            if param not in conf.get('freqai', {}).get('feature_parameters', {}):
+                raise OperationalException(
+                    f'{param} not found in Freqai config'
+                )


 def _validate_whitelist(conf: Dict[str, Any]) -> None:
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@ -477,16 +477,16 @@ CONF_SCHEMA = {
        "freqai": {
            "type": "object",
            "properties": {
-                "timeframes": {"type": "list"},
-                "train_period": {"type": "integer", "default": 0},
-                "backtest_period": {"type": "float", "default": 7},
+                "train_period_days": {"type": "integer", "default": 0},
+                "backtest_period_days": {"type": "float", "default": 7},
                "identifier": {"type": "str", "default": "example"},
-                "corr_pairlist": {"type": "list"},
                "feature_parameters": {
                    "type": "object",
                    "properties": {
-                        "period": {"type": "integer"},
-                        "shift": {"type": "integer", "default": 0},
+                        "include_corr_pairlist": {"type": "list"},
+                        "include_timeframes": {"type": "list"},
+                        "label_period_candles": {"type": "integer"},
+                        "include_shifted_candles": {"type": "integer", "default": 0},
                        "DI_threshold": {"type": "float", "default": 0},
                        "weight_factor": {"type": "number", "default": 0},
                        "principal_component_analysis": {"type": "boolean", "default": False},
@ -555,11 +555,11 @@ SCHEMA_MINIMAL_REQUIRED = [
 ]

 SCHEMA_FREQAI_REQUIRED = [
-    'timeframes',
-    'train_period',
-    'backtest_period',
+    'include_timeframes',
+    'train_period_days',
+    'backtest_period_days',
    'identifier',
-    'corr_pairlist',
+    'include_corr_pairlist',
    'feature_parameters',
    'data_split_parameters',
    'model_training_parameters'
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@ -38,12 +38,14 @@ class FreqaiDataDrawer:
        self.model_return_values: Dict[str, Any] = {}
        self.pair_data_dict: Dict[str, Any] = {}
        self.historic_data: Dict[str, Any] = {}
+        self.historic_predictions: Dict[str, Any] = {}
        self.follower_dict: Dict[str, Any] = {}
        self.full_path = full_path
        self.follow_mode = follow_mode
        if follow_mode:
            self.create_follower_dict()
        self.load_drawer_from_disk()
+        self.load_historic_predictions_from_disk()
        self.training_queue: Dict[str, int] = {}
        self.history_lock = threading.Lock()

@ -68,6 +70,29 @@ class FreqaiDataDrawer:

        return exists

+    def load_historic_predictions_from_disk(self):
+        """
+        Locate and load a previously saved historic predictions.
+        :returns:
+        exists: bool = whether or not the drawer was located
+        """
+        exists = Path(self.full_path / str("historic_predictions.json")).resolve().exists()
+        if exists:
+            with open(self.full_path / str("historic_predictions.json"), "r") as fp:
+                self.pair_dict = json.load(fp)
+            logger.info(f"Found existing historic predictions at {self.full_path}, but beware of "
+                        "that statistics may be inaccurate if the bot has been offline for "
+                        "an extended period of time.")
+        elif not self.follow_mode:
+            logger.info("Could not find existing historic_predictions, starting from scratch")
+        else:
+            logger.warning(
+                f"Follower could not find historic predictions at {self.full_path} "
+                "sending null values back to strategy"
+            )
+
+        return exists
+
    def save_drawer_to_disk(self):
        """
        Save data drawer full of all pair model metadata in present model folder.
@ -75,6 +100,13 @@ class FreqaiDataDrawer:
        with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
            json.dump(self.pair_dict, fp, default=self.np_encoder)

+    def save_historic_predictions_to_disk(self):
+        """
+        Save data drawer full of all pair model metadata in present model folder.
+        """
+        with open(self.full_path / str("historic_predictions.json"), "w") as fp:
+            json.dump(self.historic_predictions, fp, default=self.np_encoder)
+
    def save_follower_dict_to_disk(self):
        """
        Save follower dictionary to disk (used by strategy for persistent prediction targets)
@ -176,16 +208,18 @@ class FreqaiDataDrawer:
        historical candles, and also stores historical predictions despite retrainings (so stored
        predictions are true predictions, not just inferencing on trained data)
        """
-        self.model_return_values[pair] = pd.DataFrame()
+        # dynamic df returned to strategy and plotted in frequi
+        mrv_df = self.model_return_values[pair] = pd.DataFrame()
+
        for label in dk.label_list:
-            self.model_return_values[pair][label] = pred_df[label]
-            self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
-            self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
+            mrv_df[label] = pred_df[label]
+            mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
+            mrv_df[f"{label}_std"] = dk.data["labels_std"][label]

        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
-            self.model_return_values[pair]["DI_values"] = dk.DI_values
+            mrv_df["DI_values"] = dk.DI_values

-        self.model_return_values[pair]["do_predict"] = do_preds
+        mrv_df["do_predict"] = do_preds

    def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:

@ -201,6 +235,13 @@ class FreqaiDataDrawer:
            i = length_difference + 1

        df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
+        hp_df = self.historic_predictions[pair]
+
+        # here are some pandas hula hoops to accommodate the possibility of a series
+        # or dataframe depending number of labels requested by user
+        nan_df = pd.DataFrame(np.nan, index=hp_df.index[-2:] + 2, columns=hp_df.columns)
+        hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
+        hp_df = pd.concat([hp_df, nan_df[-2:-1]], axis=0)

        for label in dk.label_list:
            df[label].iloc[-1] = predictions[label].iloc[-1]
@ -212,6 +253,9 @@ class FreqaiDataDrawer:
        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
            df["DI_values"].iloc[-1] = dk.DI_values[-1]

+        # append the new predictions to persistent storage
+        hp_df.iloc[-1] = df[label].iloc[-1]
+
        if length_difference < 0:
            prepend_df = pd.DataFrame(
                np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -26,6 +26,7 @@ from freqtrade.strategy.interface import IStrategy


 SECONDS_IN_DAY = 86400
+SECONDS_IN_HOUR = 3600

 logger = logging.getLogger(__name__)

@ -59,13 +60,13 @@ class FreqaiDataKitchen:
        self.set_all_pairs()
        if not self.live:
            self.full_timerange = self.create_fulltimerange(
-                self.config["timerange"], self.freqai_config.get("train_period")
+                self.config["timerange"], self.freqai_config.get("train_period_days")
            )

            (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
                self.full_timerange,
-                config["freqai"]["train_period"],
-                config["freqai"]["backtest_period"],
+                config["freqai"]["train_period_days"],
+                config["freqai"]["backtest_period_days"],
            )
        # self.strat_dataframe: DataFrame = strat_dataframe
        self.dd = data_drawer
@ -137,19 +138,6 @@ class FreqaiDataKitchen:
        self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
        self.dd.save_drawer_to_disk()

-        # TODO add a helper function to let user save/load any data they are custom adding. We
-        # do not want them having to edit the default save/load methods here. Below is an example
-        # of what we do NOT want.
-
-        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
-        #     self.data_dictionary["upper_quantiles"].to_pickle(
-        #         save_path / str(self.model_filename + "_upper_quantiles.pkl")
-        #     )
-
-        #     self.data_dictionary["lower_quantiles"].to_pickle(
-        #         save_path / str(self.model_filename + "_lower_quantiles.pkl")
-        #     )
-
        return

    def load_data(self, coin: str = "", keras_model=False) -> Any:
@ -183,22 +171,6 @@ class FreqaiDataKitchen:
            self.data_path / str(self.model_filename + "_trained_df.pkl")
        )

-        # TODO add a helper function to let user save/load any data they are custom adding. We
-        # do not want them having to edit the default save/load methods here. Below is an example
-        # of what we do NOT want.
-
-        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
-        #     self.data_dictionary["upper_quantiles"] = pd.read_pickle(
-        #         self.data_path / str(self.model_filename + "_upper_quantiles.pkl")
-        #     )
-
-        #     self.data_dictionary["lower_quantiles"] = pd.read_pickle(
-        #         self.data_path / str(self.model_filename + "_lower_quantiles.pkl")
-        #     )
-
-        # self.data_path = Path(self.data["data_path"])
-        # self.model_filename = self.data["model_filename"]
-
        # try to access model in memory instead of loading object from disk to save time
        if self.live and self.model_filename in self.dd.model_dictionary:
            model = self.dd.model_dictionary[self.model_filename]
@ -206,7 +178,6 @@ class FreqaiDataKitchen:
            model = load(self.data_path / str(self.model_filename + "_model.joblib"))
        else:
            from tensorflow import keras
-
            model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))

        if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
@ -234,17 +205,18 @@ class FreqaiDataKitchen:
        :filtered_dataframe: cleaned dataframe ready to be split.
        :labels: cleaned labels ready to be split.
        """
+        feat_dict = self.freqai_config.get("feature_parameters", {})

        weights: npt.ArrayLike
-        if self.freqai_config["feature_parameters"].get("weight_factor", 0) > 0:
+        if feat_dict.get("weight_factor", 0) > 0:
            weights = self.set_weights_higher_recent(len(filtered_dataframe))
        else:
            weights = np.ones(len(filtered_dataframe))

-        if self.freqai_config["feature_parameters"].get("stratify", 0) > 0:
+        if feat_dict.get("stratify_training_data", 0) > 0:
            stratification = np.zeros(len(filtered_dataframe))
            for i in range(1, len(stratification)):
-                if i % self.freqai_config.get("feature_parameters", {}).get("stratify", 0) == 0:
+                if i % feat_dict.get("stratify_training_data", 0) == 0:
                    stratification[i] = 1
        else:
            stratification = None
@ -261,7 +233,6 @@ class FreqaiDataKitchen:
            labels,
            weights,
            stratify=stratification,
-            # shuffle=False,
            **self.config["freqai"]["data_split_parameters"],
        )

@ -274,7 +245,6 @@ class FreqaiDataKitchen:
        unfiltered_dataframe: DataFrame,
        training_feature_list: List,
        label_list: List = list(),
-        # labels: DataFrame = pd.DataFrame(),
        training_filter: bool = True,
    ) -> Tuple[DataFrame, DataFrame]:
        """
@ -439,7 +409,7 @@ class FreqaiDataKitchen:
        bt_split: the backtesting length (dats). Specified in user configuration file
        """

-        train_period = train_split * SECONDS_IN_DAY
+        train_period_days = train_split * SECONDS_IN_DAY
        bt_period = bt_split * SECONDS_IN_DAY

        full_timerange = TimeRange.parse_timerange(tr)
@ -460,7 +430,7 @@ class FreqaiDataKitchen:
        while True:
            if not first:
                timerange_train.startts = timerange_train.startts + bt_period
-            timerange_train.stopts = timerange_train.startts + train_period
+            timerange_train.stopts = timerange_train.startts + train_period_days

            first = False
            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
@ -763,7 +733,7 @@ class FreqaiDataKitchen:

        return

-    def create_fulltimerange(self, backtest_tr: str, backtest_period: int) -> str:
+    def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str:
        backtest_timerange = TimeRange.parse_timerange(backtest_tr)

        if backtest_timerange.stopts == 0:
@ -771,7 +741,8 @@ class FreqaiDataKitchen:
                datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
            )

-        backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY
+        backtest_timerange.startts = (backtest_timerange.startts
+                                      - backtest_period_days * SECONDS_IN_DAY)
        start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
        stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
        full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
@ -817,7 +788,8 @@ class FreqaiDataKitchen:
        data_load_timerange = TimeRange()

        # find the max indicator length required
-        max_timeframe_chars = self.freqai_config.get("timeframes")[-1]
+        max_timeframe_chars = self.freqai_config.get(
+            "feature_parameters", {}).get("include_timeframes")[-1]
        max_period = self.freqai_config.get("feature_parameters", {}).get(
            "indicator_max_period", 50
        )
@ -840,11 +812,11 @@ class FreqaiDataKitchen:
        # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')

        if trained_timestamp != 0:
-            elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
-            retrain = elapsed_time > self.freqai_config.get("backtest_period")
+            elapsed_time = (time - trained_timestamp) / SECONDS_IN_HOUR
+            retrain = elapsed_time > self.freqai_config.get("live_retrain_hours", 0)
            if retrain:
                trained_timerange.startts = int(
-                    time - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                    time - self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
                )
                trained_timerange.stopts = int(time)
                # we want to load/populate indicators on more data than we plan to train on so
@ -852,19 +824,19 @@ class FreqaiDataKitchen:
                # unless they have data further back in time before the start of the train period
                data_load_timerange.startts = int(
                    time
-                    - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                    - self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
                    - additional_seconds
                )
                data_load_timerange.stopts = int(time)
        else:  # user passed no live_trained_timerange in config
            trained_timerange.startts = int(
-                time - self.freqai_config.get("train_period") * SECONDS_IN_DAY
+                time - self.freqai_config.get("train_period_days") * SECONDS_IN_DAY
            )
            trained_timerange.stopts = int(time)

            data_load_timerange.startts = int(
                time
-                - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                - self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
                - additional_seconds
            )
            data_load_timerange.stopts = int(time)
@ -930,7 +902,7 @@ class FreqaiDataKitchen:
        refresh_backtest_ohlcv_data(
            exchange,
            pairs=self.all_pairs,
-            timeframes=self.freqai_config.get("timeframes"),
+            timeframes=self.freqai_config.get("feature_parameters", {}).get("include_timeframes"),
            datadir=self.config["datadir"],
            timerange=timerange,
            new_pairs_days=new_pairs_days,
@ -948,12 +920,12 @@ class FreqaiDataKitchen:
        :params:
        dataframe: DataFrame = strategy provided dataframe
        """
-
+        feat_params = self.freqai_config.get("feature_parameters", {})
        with self.dd.history_lock:
            history_data = self.dd.historic_data

            for pair in self.all_pairs:
-                for tf in self.freqai_config.get("timeframes"):
+                for tf in feat_params.get("include_timeframes"):

                    # check if newest candle is already appended
                    df_dp = strategy.dp.get_pair_dataframe(pair, tf)
@ -992,7 +964,8 @@ class FreqaiDataKitchen:

    def set_all_pairs(self) -> None:

-        self.all_pairs = copy.deepcopy(self.freqai_config.get("corr_pairlist", []))
+        self.all_pairs = copy.deepcopy(self.freqai_config.get(
+            'feature_parameters', {}).get('include_corr_pairlist', []))
        for pair in self.config.get("exchange", "").get("pair_whitelist"):
            if pair not in self.all_pairs:
                self.all_pairs.append(pair)
@ -1003,14 +976,14 @@ class FreqaiDataKitchen:
        Only called once upon startup of bot.
        :params:
        timerange: TimeRange = full timerange required to populate all indicators
-        for training according to user defined train_period
+        for training according to user defined train_period_days
        """
        history_data = self.dd.historic_data

        for pair in self.all_pairs:
            if pair not in history_data:
                history_data[pair] = {}
-            for tf in self.freqai_config.get("timeframes"):
+            for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
                history_data[pair][tf] = load_pair_history(
                    datadir=self.config["datadir"],
                    timeframe=tf,
@ -1028,7 +1001,7 @@ class FreqaiDataKitchen:
        to the present pair.
        :params:
        timerange: TimeRange = full timerange required to populate all indicators
-        for training according to user defined train_period
+        for training according to user defined train_period_days
        metadata: dict = strategy furnished pair metadata
        """

@ -1036,9 +1009,10 @@ class FreqaiDataKitchen:
            corr_dataframes: Dict[Any, Any] = {}
            base_dataframes: Dict[Any, Any] = {}
            historic_data = self.dd.historic_data
-            pairs = self.freqai_config.get("corr_pairlist", [])
+            pairs = self.freqai_config.get('feature_parameters', {}).get(
+                'include_corr_pairlist', [])

-            for tf in self.freqai_config.get("timeframes"):
+            for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
                base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
                if pairs:
                    for p in pairs:
@ -1057,7 +1031,7 @@ class FreqaiDataKitchen:
    #                                                                               DataFrame]:
    #     corr_dataframes: Dict[Any, Any] = {}
    #     base_dataframes: Dict[Any, Any] = {}
-    #     pairs = self.freqai_config.get('corr_pairlist', [])  # + [metadata['pair']]
+    #     pairs = self.freqai_config.get('include_corr_pairlist', [])  # + [metadata['pair']]
    #     # timerange = TimeRange.parse_timerange(new_timerange)

    #     for tf in self.freqai_config.get('timeframes'):
@ -1101,9 +1075,9 @@ class FreqaiDataKitchen:
        dataframe: DataFrame = dataframe containing populated indicators
        """
        dataframe = base_dataframes[self.config["timeframe"]].copy()
-        pairs = self.freqai_config.get("corr_pairlist", [])
+        pairs = self.freqai_config.get('feature_parameters', {}).get('include_corr_pairlist', [])
        sgi = True
-        for tf in self.freqai_config.get("timeframes"):
+        for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
            dataframe = strategy.populate_any_indicators(
                pair,
                pair,
@ -1129,6 +1103,19 @@ class FreqaiDataKitchen:

        return dataframe

+    def fit_live_predictions(self) -> None:
+        """
+        Fit the labels with a gaussian distribution
+        """
+        import scipy as spy
+        num_candles = self.freqai_config.get('fit_live_predictions_candles', 100)
+        self.data["labels_mean"], self.data["labels_std"] = {}, {}
+        for label in self.label_list:
+            f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))
+            self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
+
+        return
+
    def fit_labels(self) -> None:
        """
        Fit the labels with a gaussian distribution
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -1,4 +1,5 @@
 # import contextlib
+import copy
 import datetime
 import gc
 import logging
@ -95,7 +96,7 @@ class IFreqaiModel(ABC):
            dk = self.start_live(dataframe, metadata, strategy, self.dk)

        # For backtesting, each pair enters and then gets trained for each window along the
-        # sliding window defined by "train_period" (training window) and "backtest_period"
+        # sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
        # (backtest window, i.e. window immediately following the training window).
        # FreqAI slides the window and sequentially builds the backtesting results before returning
        # the concatenated results for the full backtesting period back to the strategy.
@ -143,11 +144,11 @@ class IFreqaiModel(ABC):
    ) -> FreqaiDataKitchen:
        """
        The main broad execution for backtesting. For backtesting, each pair enters and then gets
-        trained for each window along the sliding window defined by "train_period" (training window)
-        and "backtest_period" (backtest window, i.e. window immediately following the
-        training window). FreqAI slides the window and sequentially builds the backtesting results
-        before returning the concatenated results for the full backtesting period back to the
-        strategy.
+        trained for each window along the sliding window defined by "train_period_days"
+        (training window) and "backtest_period_days" (backtest window, i.e. window immediately
+        following the training window). FreqAI slides the window and sequentially builds
+        the backtesting results before returning the concatenated results for the full
+        backtesting period back to the strategy.
        :params:
        dataframe: DataFrame = strategy passed dataframe
        metadata: Dict = pair metadata
@ -484,6 +485,20 @@ class IFreqaiModel(ABC):
            self.dd.purge_old_models()
        # self.retrain = False

+    def set_initial_historic_predictions(self, df: DataFrame, model: Any,
+                                         dk: FreqaiDataKitchen, pair: str) -> None:
+        trained_predictions = model.predict(df)
+        pred_df = DataFrame(trained_predictions, columns=dk.label_list)
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        self.dd.historic_predictions[pair] = pd.DataFrame()
+        self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModlel.py for an example.

--- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py
@ -0,0 +1,112 @@
+import logging
+from typing import Tuple
+
+from pandas import DataFrame
+
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+
+logger = logging.getLogger(__name__)
+
+
+class BaseRegressionModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User uses this function to add any additional return values to the dataframe.
+        e.g.
+        dataframe['volatility'] = dk.volatility_values
+        """
+
+        return dataframe
+
+    def train(
+        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
+        for storing, saving, loading, and analyzing the data.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy.
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info("--------------------Starting training " f"{pair} --------------------")
+
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_dataframe,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        if not self.freqai_info.get('fit_live_predictions', 0):
+            dk.fit_labels()
+        # normalize all data based on train_dataset only
+        data_dictionary = dk.normalize_data(data_dictionary)
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_train(dk)
+
+        logger.info(
+            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
+        )
+        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
+
+        model = self.fit(data_dictionary)
+
+        if pair not in self.dd.historic_predictions:
+            self.set_initial_historic_predictions(
+                data_dictionary['train_features'], model, dk, pair)
+        elif self.freqai_info.get('fit_live_predictions_candles', 0):
+            dk.fit_live_predictions()
+            self.dd.save_historic_predictions_to_disk()
+
+        logger.info(f"--------------------done training {pair}--------------------")
+
+        return model
+
+    def predict(
+        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        dk.find_features(unfiltered_dataframe)
+        filtered_dataframe, _ = dk.filter_features(
+            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        )
+        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
+        dk.data_dictionary["prediction_features"] = filtered_dataframe
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_predict(dk, filtered_dataframe)
+
+        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
+        pred_df = DataFrame(predictions, columns=dk.label_list)
+
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        return (pred_df, dk.do_predict)
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@ -1,94 +1,21 @@
 import logging
-from typing import Any, Dict, Tuple
+from typing import Any, Dict

 from catboost import CatBoostRegressor, Pool
-from pandas import DataFrame

-from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel


 logger = logging.getLogger(__name__)


-class CatboostPredictionModel(IFreqaiModel):
+class CatboostPredictionModel(BaseRegressionModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
-        """
-        User uses this function to add any additional return values to the dataframe.
-        e.g.
-        dataframe['volatility'] = dk.volatility_values
-        """
-
-        return dataframe
-
-    def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
-        """
-        User defines the labels here (target values).
-        :params:
-        :dataframe: the full dataframe for the present training period
-        """
-
-        dataframe["s"] = (
-            dataframe["close"]
-            .shift(-self.feature_parameters["period"])
-            .rolling(self.feature_parameters["period"])
-            .mean()
-            / dataframe["close"]
-            - 1
-        )
-
-        return dataframe["s"]
-
-    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
-        for storing, saving, loading, and analyzing the data.
-        :params:
-        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy.
-        :returns:
-        :model: Trained model which can be used to inference (self.predict)
-        """
-
-        logger.info("--------------------Starting training " f"{pair} --------------------")
-
-        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
-        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
-            dk.training_features_list,
-            dk.label_list,
-            training_filter=True,
-        )
-
-        # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
-
-        logger.info(
-            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
-        )
-        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
-
-        model = self.fit(data_dictionary)
-
-        logger.info(f"--------------------done training {pair}--------------------")
-
-        return model
-
    def fit(self, data_dictionary: Dict) -> Any:
        """
        User sets up the training and test data to fit their desired model here
@ -118,37 +45,3 @@ class CatboostPredictionModel(IFreqaiModel):
        model.fit(X=train_data, eval_set=test_data)

        return model
-
-    def predict(
-        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return:
-        :pred_df: dataframe containing the predictions
-        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
-        data (NaNs) or felt uncertain about data (PCA and DI index)
-        """
-
-        dk.find_features(unfiltered_dataframe)
-        filtered_dataframe, _ = dk.filter_features(
-            unfiltered_dataframe, dk.training_features_list, training_filter=False
-        )
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk, filtered_dataframe)
-
-        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
-        pred_df = DataFrame(predictions, columns=dk.label_list)
-
-        for label in dk.label_list:
-            pred_df[label] = (
-                (pred_df[label] + 1)
-                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
-                / 2
-            ) + dk.data["labels_min"][label]
-
-        return (pred_df, dk.do_predict)
--- a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
@ -1,77 +1,22 @@
 import logging
-from typing import Any, Dict, Tuple
+from typing import Any, Dict

 from catboost import CatBoostRegressor  # , Pool
-from pandas import DataFrame
 from sklearn.multioutput import MultiOutputRegressor

-from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel


 logger = logging.getLogger(__name__)


-class CatboostPredictionMultiModel(IFreqaiModel):
+class CatboostPredictionMultiModel(BaseRegressionModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
-        """
-        User uses this function to add any additional return values to the dataframe.
-        e.g.
-        dataframe['volatility'] = dk.volatility_values
-        """
-
-        return dataframe
-
-    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
-        for storing, saving, loading, and analyzing the data.
-        :params:
-        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy.
-        :returns:
-        :model: Trained model which can be used to inference (self.predict)
-        """
-
-        logger.info("--------------------Starting training " f"{pair} --------------------")
-
-        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
-        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
-            dk.training_features_list,
-            dk.label_list,
-            training_filter=True,
-        )
-
-        # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
-
-        logger.info(
-            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
-        )
-        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
-
-        model = self.fit(data_dictionary)
-
-        logger.info(f"--------------------done training {pair}--------------------")
-
-        return model
-
    def fit(self, data_dictionary: Dict) -> Any:
        """
        User sets up the training and test data to fit their desired model here
@ -99,37 +44,3 @@ class CatboostPredictionMultiModel(IFreqaiModel):
        test_score = model.score(*eval_set)
        logger.info(f"Train score {train_score}, Test score {test_score}")
        return model
-
-    def predict(
-        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return:
-        :pred_df: dataframe containing the predictions
-        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
-        data (NaNs) or felt uncertain about data (PCA and DI index)
-        """
-
-        dk.find_features(unfiltered_dataframe)
-        filtered_dataframe, _ = dk.filter_features(
-            unfiltered_dataframe, dk.training_features_list, training_filter=False
-        )
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk, filtered_dataframe)
-
-        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
-        pred_df = DataFrame(predictions, columns=dk.label_list)
-
-        for label in dk.label_list:
-            pred_df[label] = (
-                (pred_df[label] + 1)
-                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
-                / 2
-            ) + dk.data["labels_min"][label]
-
-        return (pred_df, dk.do_predict)
--- a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
@ -1,76 +1,21 @@
 import logging
-from typing import Any, Dict, Tuple
+from typing import Any, Dict

 from lightgbm import LGBMRegressor
-from pandas import DataFrame

-from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel


 logger = logging.getLogger(__name__)


-class LightGBMPredictionModel(IFreqaiModel):
+class LightGBMPredictionModel(BaseRegressionModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
-        """
-        User uses this function to add any additional return values to the dataframe.
-        e.g.
-        dataframe['volatility'] = dk.volatility_values
-        """
-
-        return dataframe
-
-    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
-        for storing, saving, loading, and analyzing the data.
-        :params:
-        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy.
-        :returns:
-        :model: Trained model which can be used to inference (self.predict)
-        """
-
-        logger.info("--------------------Starting training " f"{pair} --------------------")
-
-        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
-        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
-            dk.training_features_list,
-            dk.label_list,
-            training_filter=True,
-        )
-
-        # split data into train/test data.
-        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
-        # normalize all data based on train_dataset only
-        data_dictionary = dk.normalize_data(data_dictionary)
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_train(dk)
-
-        logger.info(
-            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
-        )
-        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
-
-        model = self.fit(data_dictionary)
-
-        logger.info(f"--------------------done training {pair}--------------------")
-
-        return model
-
    def fit(self, data_dictionary: Dict) -> Any:
        """
        Most regressors use the same function names and arguments e.g. user
@ -89,39 +34,3 @@ class LightGBMPredictionModel(IFreqaiModel):
        model.fit(X=X, y=y, eval_set=eval_set)

        return model
-
-    def predict(
-        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen
-    ) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return:
-        :predictions: np.array of predictions
-        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
-        data (NaNs) or felt uncertain about data (PCA and DI index)
-        """
-
-        # logger.info("--------------------Starting prediction--------------------")
-
-        dk.find_features(unfiltered_dataframe)
-        filtered_dataframe, _ = dk.filter_features(
-            unfiltered_dataframe, dk.training_features_list, training_filter=False
-        )
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
-
-        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk, filtered_dataframe)
-
-        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
-        pred_df = DataFrame(predictions, columns=dk.label_list)
-
-        for label in dk.label_list:
-            pred_df[label] = (
-                (pred_df[label] + 1)
-                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
-                / 2
-            ) + dk.data["labels_min"][label]
-
-        return (pred_df, dk.do_predict)
--- a/freqtrade/plugins/pairlist/pairlist_helpers.py
+++ b/freqtrade/plugins/pairlist/pairlist_helpers.py
@ -44,7 +44,8 @@ def expand_pairlist(wildcardpl: List[str], available_pairs: List[str],

 def dynamic_expand_pairlist(config: dict, markets: list) -> List[str]:
    if config.get('freqai', {}):
-        full_pairs = config['pairs'] + [pair for pair in config['freqai']['corr_pairlist']
+        corr_pairlist = config['freqai']['feature_parameters']['include_corr_pairlist']
+        full_pairs = config['pairs'] + [pair for pair in corr_pairlist
                                        if pair not in config['pairs']]
        expanded_pairs = expand_pairlist(full_pairs, markets)
    else:
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@ -56,9 +56,9 @@ class FreqaiExampleStrategy(IStrategy):

    def informative_pairs(self):
        whitelist_pairs = self.dp.current_whitelist()
-        corr_pairs = self.config["freqai"]["corr_pairlist"]
+        corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
        informative_pairs = []
-        for tf in self.config["freqai"]["timeframes"]:
+        for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
            for pair in whitelist_pairs:
                informative_pairs.append((pair, tf))
            for pair in corr_pairs:
@ -93,7 +93,7 @@ class FreqaiExampleStrategy(IStrategy):
                informative = self.dp.get_pair_dataframe(pair, tf)

            # first loop is automatically duplicating indicators for time periods
-            for t in self.freqai_info["feature_parameters"]["indicator_periods"]:
+            for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:

                t = int(t)
                informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
@ -123,8 +123,6 @@ class FreqaiExampleStrategy(IStrategy):
                )

                informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
-                macd = ta.MACD(informative, timeperiod=t)
-                informative[f"%-{coin}macd-period_{t}"] = macd["macd"]

                informative[f"%-{coin}relative_volume-period_{t}"] = (
                    informative["volume"] / informative["volume"].rolling(t).mean()
@ -136,7 +134,7 @@ class FreqaiExampleStrategy(IStrategy):

            indicators = [col for col in informative if col.startswith("%")]
            # This loop duplicates and shifts all indicators to add a sense of recency to data
-            for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
+            for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
                if n == 0:
                    continue
                informative_shift = informative[indicators].shift(n)
@ -161,8 +159,8 @@ class FreqaiExampleStrategy(IStrategy):
                # needs to be used such as templates/CatboostPredictionMultiModel.py
                df["&-s_close"] = (
                    df["close"]
-                    .shift(-self.freqai_info["feature_parameters"]["period"])
-                    .rolling(self.freqai_info["feature_parameters"]["period"])
+                    .shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
+                    .rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
                    .mean()
                    / df["close"]
                    - 1
@ -179,7 +177,7 @@ class FreqaiExampleStrategy(IStrategy):
        # indicated by the user in the configuration file.
        # All indicators must be populated by populate_any_indicators() for live functionality
        # to work correctly.
-        for tf in self.freqai_info["timeframes"]:
+        for tf in self.freqai_info["feature_parameters"]["include_timeframes"]:
            dataframe = self.populate_any_indicators(
                metadata,
                self.pair,
@ -189,7 +187,7 @@ class FreqaiExampleStrategy(IStrategy):
                set_generalized_indicators=sgi,
            )
            sgi = False
-            for pair in self.freqai_info["corr_pairlist"]:
+            for pair in self.freqai_info["feature_parameters"]["include_corr_pairlist"]:
                if metadata["pair"] in pair:
                    continue  # do not include whitelisted pair twice if it is in corr_pairlist
                dataframe = self.populate_any_indicators(