Merge branch 'develop' into dev-merge-rl

2022-09-14 22:49:11 +02:00
parent 69b3fcfd32 075748b21a
commit 81417cb795
59 changed files with 1259 additions and 679 deletions
--- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py
+++ b/freqtrade/freqai/prediction_models/BaseClassifierModel.py
@@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel):
    """

    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
    ) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_dataframe: Full dataframe for the current training period
+        :param unfiltered_df: Full dataframe for the current training period
        :param metadata: pair metadata from strategy.
        :return:
        :model: Trained model which can be used to inference (self.predict)
@@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel):

        # filter the features requested by user in the configuration file and elegantly handle NaNs
        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
+            unfiltered_df,
            dk.training_features_list,
            dk.label_list,
            training_filter=True,
        )

-        start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
-        end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
+        start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
+        end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
        logger.info(f"-------------------- Training on data from {start_date} to "
                    f"{end_date}--------------------")
        # split data into train/test data.
@@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel):
            f' features and {len(data_dictionary["train_features"])} data points'
        )

-        model = self.fit(data_dictionary)
+        model = self.fit(data_dictionary, dk)

        logger.info(f"--------------------done training {pair}--------------------")

        return model

    def predict(
-        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :param: unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

-        dk.find_features(unfiltered_dataframe)
-        filtered_dataframe, _ = dk.filter_features(
-            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        dk.find_features(unfiltered_df)
+        filtered_df, _ = dk.filter_features(
+            unfiltered_df, dk.training_features_list, training_filter=False
        )
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
+        filtered_df = dk.normalize_data_from_metadata(filtered_df)
+        dk.data_dictionary["prediction_features"] = filtered_df

-        self.data_cleaning_predict(dk, filtered_dataframe)
+        self.data_cleaning_predict(dk, filtered_df)

        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
        pred_df = DataFrame(predictions, columns=dk.label_list)
--- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py
@@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel):
    """

    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
    ) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_dataframe: Full dataframe for the current training period
+        :param unfiltered_df: Full dataframe for the current training period
        :param metadata: pair metadata from strategy.
        :return:
        :model: Trained model which can be used to inference (self.predict)
@@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel):

        # filter the features requested by user in the configuration file and elegantly handle NaNs
        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
+            unfiltered_df,
            dk.training_features_list,
            dk.label_list,
            training_filter=True,
        )

-        start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
-        end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
+        start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
+        end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
        logger.info(f"-------------------- Training on data from {start_date} to "
                    f"{end_date}--------------------")
        # split data into train/test data.
@@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel):
            f' features and {len(data_dictionary["train_features"])} data points'
        )

-        model = self.fit(data_dictionary)
+        model = self.fit(data_dictionary, dk)

        logger.info(f"--------------------done training {pair}--------------------")

        return model

    def predict(
-        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :param: unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

-        dk.find_features(unfiltered_dataframe)
-        filtered_dataframe, _ = dk.filter_features(
-            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        dk.find_features(unfiltered_df)
+        filtered_df, _ = dk.filter_features(
+            unfiltered_df, dk.training_features_list, training_filter=False
        )
-        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
-        dk.data_dictionary["prediction_features"] = filtered_dataframe
+        filtered_df = dk.normalize_data_from_metadata(filtered_df)
+        dk.data_dictionary["prediction_features"] = filtered_df

        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dk, filtered_dataframe)
+        self.data_cleaning_predict(dk, filtered_df)

        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
        pred_df = DataFrame(predictions, columns=dk.label_list)
--- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py
+++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py
@@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel):
    """

    def train(
-        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
    ) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_dataframe: Full dataframe for the current training period
+        :param unfiltered_df: Full dataframe for the current training period
        :param metadata: pair metadata from strategy.
        :return:
        :model: Trained model which can be used to inference (self.predict)
@@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel):

        # filter the features requested by user in the configuration file and elegantly handle NaNs
        features_filtered, labels_filtered = dk.filter_features(
-            unfiltered_dataframe,
+            unfiltered_df,
            dk.training_features_list,
            dk.label_list,
            training_filter=True,
        )

-        start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
-        end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
+        start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
+        end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
        logger.info(f"-------------------- Training on data from {start_date} to "
                    f"{end_date}--------------------")
        # split data into train/test data.
@@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel):
            f' features and {len(data_dictionary["train_features"])} data points'
        )

-        model = self.fit(data_dictionary)
+        model = self.fit(data_dictionary, dk)

        logger.info(f"--------------------done training {pair}--------------------")

--- a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
+++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
@@ -0,0 +1,65 @@
+
+from joblib import Parallel
+from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
+from sklearn.utils.fixes import delayed
+from sklearn.utils.validation import has_fit_parameter
+
+
+class FreqaiMultiOutputRegressor(MultiOutputRegressor):
+
+    def fit(self, X, y, sample_weight=None, fit_params=None):
+        """Fit the model to data, separately for each output variable.
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input data.
+        y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
+            Multi-output targets. An indicator matrix turns on multilabel
+            estimation.
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If `None`, then samples are equally weighted.
+            Only supported if the underlying regressor supports sample
+            weights.
+        fit_params : A list of dicts for the fit_params
+            Parameters passed to the ``estimator.fit`` method of each step.
+            Each dict may contain same or different values (e.g. different
+            eval_sets or init_models)
+            .. versionadded:: 0.23
+        Returns
+        -------
+        self : object
+            Returns a fitted instance.
+        """
+
+        if not hasattr(self.estimator, "fit"):
+            raise ValueError("The base estimator should implement a fit method")
+
+        y = self._validate_data(X="no_validation", y=y, multi_output=True)
+
+        if y.ndim == 1:
+            raise ValueError(
+                "y must have at least two dimensions for "
+                "multi-output regression but has only one."
+            )
+
+        if sample_weight is not None and not has_fit_parameter(
+            self.estimator, "sample_weight"
+        ):
+            raise ValueError("Underlying estimator does not support sample weights.")
+
+        if not fit_params:
+            fit_params = [None] * y.shape[1]
+
+        self.estimators_ = Parallel(n_jobs=self.n_jobs)(
+            delayed(_fit_estimator)(
+                self.estimator, X, y[:, i], sample_weight, **fit_params[i]
+            )
+            for i in range(y.shape[1])
+        )
+
+        if hasattr(self.estimators_[0], "n_features_in_"):
+            self.n_features_in_ = self.estimators_[0].n_features_in_
+        if hasattr(self.estimators_[0], "feature_names_in_"):
+            self.feature_names_in_ = self.estimators_[0].feature_names_in_
+
+        return
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -76,6 +76,8 @@ class FreqaiDataDrawer:
            self.full_path / f"follower_dictionary-{self.follower_name}.json"
        )
        self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
+        self.historic_predictions_bkp_path = Path(
+            self.full_path / "historic_predictions.backup.pkl")
        self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
        self.follow_mode = follow_mode
        if follow_mode:
@@ -119,13 +121,21 @@ class FreqaiDataDrawer:
        """
        exists = self.historic_predictions_path.is_file()
        if exists:
-            with open(self.historic_predictions_path, "rb") as fp:
-                self.historic_predictions = cloudpickle.load(fp)
-            logger.info(
-                f"Found existing historic predictions at {self.full_path}, but beware "
-                "that statistics may be inaccurate if the bot has been offline for "
-                "an extended period of time."
-            )
+            try:
+                with open(self.historic_predictions_path, "rb") as fp:
+                    self.historic_predictions = cloudpickle.load(fp)
+                logger.info(
+                    f"Found existing historic predictions at {self.full_path}, but beware "
+                    "that statistics may be inaccurate if the bot has been offline for "
+                    "an extended period of time."
+                )
+            except EOFError:
+                logger.warning(
+                    'Historical prediction file was corrupted. Trying to load backup file.')
+                with open(self.historic_predictions_bkp_path, "rb") as fp:
+                    self.historic_predictions = cloudpickle.load(fp)
+                logger.warning('FreqAI successfully loaded the backup historical predictions file.')
+
        elif not self.follow_mode:
            logger.info("Could not find existing historic_predictions, starting from scratch")
        else:
@@ -143,6 +153,9 @@ class FreqaiDataDrawer:
        with open(self.historic_predictions_path, "wb") as fp:
            cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)

+        # create a backup
+        shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
+
    def save_drawer_to_disk(self):
        """
        Save data drawer full of all pair model metadata in present model folder.
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1,7 +1,8 @@
 import copy
-import datetime
 import logging
 import shutil
+from datetime import datetime, timezone
+from math import cos, sin
 from pathlib import Path
 from typing import Any, Dict, List, Tuple

@@ -9,6 +10,7 @@ import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from pandas import DataFrame
+from scipy import stats
 from sklearn import linear_model
 from sklearn.cluster import DBSCAN
 from sklearn.metrics.pairwise import pairwise_distances
@@ -182,7 +184,7 @@ class FreqaiDataKitchen:

    def filter_features(
        self,
-        unfiltered_dataframe: DataFrame,
+        unfiltered_df: DataFrame,
        training_feature_list: List,
        label_list: List = list(),
        training_filter: bool = True,
@@ -193,31 +195,35 @@ class FreqaiDataKitchen:
        0s in the prediction dataset. However, prediction dataset do_predict will reflect any
        row that had a NaN and will shield user from that prediction.
        :params:
-        :unfiltered_dataframe: the full dataframe for the present training period
+        :unfiltered_df: the full dataframe for the present training period
        :training_feature_list: list, the training feature list constructed by
        self.build_feature_list() according to user specified parameters in the configuration file.
        :labels: the labels for the dataset
        :training_filter: boolean which lets the function know if it is training data or
        prediction data to be filtered.
        :returns:
-        :filtered_dataframe: dataframe cleaned of NaNs and only containing the user
+        :filtered_df: dataframe cleaned of NaNs and only containing the user
        requested feature set.
        :labels: labels cleaned of NaNs.
        """
-        filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
-        filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan)
+        filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
+        filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)

-        drop_index = pd.isnull(filtered_dataframe).any(1)  # get the rows that have NaNs,
+        drop_index = pd.isnull(filtered_df).any(1)  # get the rows that have NaNs,
        drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
        if (training_filter):
+            const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
+            if const_cols:
+                filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
+                logger.warning(f"Removed features {const_cols} with constant values.")
            # we don't care about total row number (total no. datapoints) in training, we only care
            # about removing any row with NaNs
            # if labels has multiple columns (user wants to train multiple modelEs), we detect here
-            labels = unfiltered_dataframe.filter(label_list, axis=1)
+            labels = unfiltered_df.filter(label_list, axis=1)
            drop_index_labels = pd.isnull(labels).any(1)
            drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
-            dates = unfiltered_dataframe['date']
-            filtered_dataframe = filtered_dataframe[
+            dates = unfiltered_df['date']
+            filtered_df = filtered_df[
                (drop_index == 0) & (drop_index_labels == 0)
            ]  # dropping values
            labels = labels[
@@ -227,13 +233,13 @@ class FreqaiDataKitchen:
                (drop_index == 0) & (drop_index_labels == 0)
            ]
            logger.info(
-                f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
-                f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
+                f"dropped {len(unfiltered_df) - len(filtered_df)} training points"
+                f" due to NaNs in populated dataset {len(unfiltered_df)}."
            )
-            if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
-                worst_indicator = str(unfiltered_dataframe.count().idxmin())
+            if (1 - len(filtered_df) / len(unfiltered_df)) > 0.1 and self.live:
+                worst_indicator = str(unfiltered_df.count().idxmin())
                logger.warning(
-                    f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.0f} percent "
+                    f" {(1 - len(filtered_df)/len(unfiltered_df)) * 100:.0f} percent "
                    " of training data dropped due to NaNs, model may perform inconsistent "
                    f"with expectations. Verify {worst_indicator}"
                )
@@ -242,9 +248,9 @@ class FreqaiDataKitchen:
        else:
            # we are backtesting so we need to preserve row number to send back to strategy,
            # so now we use do_predict to avoid any prediction based on a NaN
-            drop_index = pd.isnull(filtered_dataframe).any(1)
+            drop_index = pd.isnull(filtered_df).any(1)
            self.data["filter_drop_index_prediction"] = drop_index
-            filtered_dataframe.fillna(0, inplace=True)
+            filtered_df.fillna(0, inplace=True)
            # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
            # that was based on a single NaN is ultimately protected from buys with do_predict
            drop_index = ~drop_index
@@ -253,11 +259,11 @@ class FreqaiDataKitchen:
                logger.info(
                    "dropped %s of %s prediction data points due to NaNs.",
                    len(self.do_predict) - self.do_predict.sum(),
-                    len(filtered_dataframe),
+                    len(filtered_df),
                )
            labels = []

-        return filtered_dataframe, labels
+        return filtered_df, labels

    def build_data_dictionary(
        self,
@@ -360,7 +366,7 @@ class FreqaiDataKitchen:

    def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
        """
-        Normalize a set of data using the mean and standard deviation from
+        Denormalize a set of data using the mean and standard deviation from
        the associated training data.
        :param df: Dataframe of predictions to be denormalized
        """
@@ -399,7 +405,7 @@ class FreqaiDataKitchen:
        config_timerange = TimeRange.parse_timerange(self.config["timerange"])
        if config_timerange.stopts == 0:
            config_timerange.stopts = int(
-                datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+                datetime.now(tz=timezone.utc).timestamp()
            )
        timerange_train = copy.deepcopy(full_timerange)
        timerange_backtest = copy.deepcopy(full_timerange)
@@ -416,8 +422,8 @@ class FreqaiDataKitchen:
            timerange_train.stopts = timerange_train.startts + train_period_days

            first = False
-            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
-            stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
+            start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc)
+            stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc)
            tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
            tr_training_list_timerange.append(copy.deepcopy(timerange_train))

@@ -430,8 +436,8 @@ class FreqaiDataKitchen:
            if timerange_backtest.stopts > config_timerange.stopts:
                timerange_backtest.stopts = config_timerange.stopts

-            start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
-            stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
+            start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc)
+            stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc)
            tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
            tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))

@@ -451,13 +457,35 @@ class FreqaiDataKitchen:
                   it is sliced down to just the present training period.
        """

-        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
-        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
+        start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
+        stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
        df = df.loc[df["date"] >= start, :]
-        df = df.loc[df["date"] <= stop, :]
+        if not self.live:
+            df = df.loc[df["date"] < stop, :]

        return df

+    def remove_training_from_backtesting(
+        self
+    ) -> DataFrame:
+        """
+        Function which takes the backtesting time range and
+        remove training data from dataframe, keeping only the
+        startup_candle_count candles
+        """
+        startup_candle_count = self.config.get('startup_candle_count', 0)
+        tf = self.config['timeframe']
+        tr = self.config["timerange"]
+
+        backtesting_timerange = TimeRange.parse_timerange(tr)
+        if startup_candle_count > 0 and backtesting_timerange:
+            backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
+
+        start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
+        df = self.return_dataframe
+        df = df.loc[df["date"] >= start, :]
+        return df
+
    def principal_component_analysis(self) -> None:
        """
        Performs Principal Component Analysis on the data for dimensionality reduction
@@ -652,8 +680,6 @@ class FreqaiDataKitchen:
        is an outlier.
        """

-        from math import cos, sin
-
        if predict:
            if not self.data['DBSCAN_eps']:
                return
@@ -746,6 +772,111 @@ class FreqaiDataKitchen:

        return

+    def compute_inlier_metric(self, set_='train') -> None:
+        """
+
+        Compute inlier metric from backwards distance distributions.
+        This metric defines how well features from a timepoint fit
+        into previous timepoints.
+        """
+
+        no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
+
+        if set_ == 'train':
+            compute_df = copy.deepcopy(self.data_dictionary['train_features'])
+        elif set_ == 'test':
+            compute_df = copy.deepcopy(self.data_dictionary['test_features'])
+        else:
+            compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
+
+        compute_df_reindexed = compute_df.reindex(
+            index=np.flip(compute_df.index)
+        )
+
+        pairwise = pd.DataFrame(
+            np.triu(
+                pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
+            ),
+            columns=compute_df_reindexed.index,
+            index=compute_df_reindexed.index
+        )
+        pairwise = pairwise.round(5)
+
+        column_labels = [
+            '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
+        ]
+        distances = pd.DataFrame(
+            columns=column_labels, index=compute_df.index
+        )
+
+        for index in compute_df.index[no_prev_pts:]:
+            current_row = pairwise.loc[[index]]
+            current_row_no_zeros = current_row.loc[
+                :, (current_row != 0).any(axis=0)
+            ]
+            distances.loc[[index]] = current_row_no_zeros.iloc[
+                :, :no_prev_pts
+            ]
+        distances = distances.replace([np.inf, -np.inf], np.nan)
+        drop_index = pd.isnull(distances).any(1)
+        distances = distances[drop_index == 0]
+
+        inliers = pd.DataFrame(index=distances.index)
+        for key in distances.keys():
+            current_distances = distances[key].dropna()
+            fit_params = stats.weibull_min.fit(current_distances)
+            quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
+
+            df_inlier = pd.DataFrame(
+                {key: quantiles}, index=distances.index
+            )
+            inliers = pd.concat(
+                [inliers, df_inlier], axis=1
+            )
+
+        inlier_metric = pd.DataFrame(
+            data=inliers.sum(axis=1) / no_prev_pts,
+            columns=['inlier_metric'],
+            index=compute_df.index
+        )
+
+        inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
+                         (inlier_metric.max() - inlier_metric.min()) - 1)
+
+        if set_ in ('train', 'test'):
+            inlier_metric = inlier_metric.iloc[no_prev_pts:]
+            compute_df = compute_df.iloc[no_prev_pts:]
+            self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
+            self.data_dictionary[f'{set_}_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+        else:
+            self.data_dictionary['prediction_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+            self.data_dictionary['prediction_features'].fillna(0, inplace=True)
+
+        logger.info('Inlier metric computed and added to features.')
+
+        return None
+
+    def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
+        features = self.data_dictionary[f'{set_}_features']
+        weights = self.data_dictionary[f'{set_}_weights']
+        labels = self.data_dictionary[f'{set_}_labels']
+        self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
+        self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
+        self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
+
+    def add_noise_to_training_features(self) -> None:
+        """
+        Add noise to train features to reduce the risk of overfitting.
+        """
+        mu = 0  # no shift
+        sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
+        compute_df = self.data_dictionary['train_features']
+        noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
+        self.data_dictionary['train_features'] += noise
+        return
+
    def find_features(self, dataframe: DataFrame) -> None:
        """
        Find features in the strategy provided dataframe
@@ -848,6 +979,7 @@ class FreqaiDataKitchen:
        to_keep = [col for col in dataframe.columns if not col.startswith("&")]
        self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)

+        self.return_dataframe = self.remove_training_from_backtesting()
        self.full_df = DataFrame()

        return
@@ -871,14 +1003,14 @@ class FreqaiDataKitchen:
                                       "Please indicate the end date of your desired backtesting. "
                                       "timerange.")
            # backtest_timerange.stopts = int(
-            #     datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+            #     datetime.now(tz=timezone.utc).timestamp()
            # )

        backtest_timerange.startts = (
            backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
        )
-        start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
-        stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
+        start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
+        stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
        full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")

        self.full_path = Path(
@@ -904,7 +1036,7 @@ class FreqaiDataKitchen:
        :return:
            bool = If the model is expired or not.
        """
-        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        time = datetime.now(tz=timezone.utc).timestamp()
        elapsed_time = (time - trained_timestamp) / 3600  # hours
        max_time = self.freqai_config.get("expiration_hours", 0)
        if max_time > 0:
@@ -916,7 +1048,7 @@ class FreqaiDataKitchen:
        self, trained_timestamp: int
    ) -> Tuple[bool, TimeRange, TimeRange]:

-        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        time = datetime.now(tz=timezone.utc).timestamp()
        trained_timerange = TimeRange()
        data_load_timerange = TimeRange()

@@ -1094,7 +1226,6 @@ class FreqaiDataKitchen:
    def save_backtesting_prediction(
        self, append_df: DataFrame
    ) -> None:
-
        """
        Save prediction dataframe from backtesting to h5 file format
        :param append_df: dataframe for backtesting period
@@ -1108,7 +1239,6 @@ class FreqaiDataKitchen:
    def get_backtesting_prediction(
        self
    ) -> DataFrame:
-
        """
        Get prediction dataframe from h5 file format
        """
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -1,13 +1,12 @@
-# import contextlib
-import datetime
 import logging
 import shutil
 import threading
 import time
 from abc import ABC, abstractmethod
+from datetime import datetime, timezone
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple

 import numpy as np
 import pandas as pd
@@ -15,6 +14,7 @@ from numpy.typing import NDArray
 from pandas import DataFrame

 from freqtrade.configuration import TimeRange
+from freqtrade.constants import DATETIME_PRINT_FORMAT
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.exchange import timeframe_to_seconds
@@ -27,13 +27,6 @@ pd.options.mode.chained_assignment = None
 logger = logging.getLogger(__name__)


-def threaded(fn):
-    def wrapper(*args, **kwargs):
-        threading.Thread(target=fn, args=args, kwargs=kwargs).start()
-
-    return wrapper
-
-
 class IFreqaiModel(ABC):
    """
    Class containing all tools for training and prediction in the strategy.
@@ -66,7 +59,6 @@ class IFreqaiModel(ABC):
            "data_split_parameters", {})
        self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
            "model_training_parameters", {})
-        self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
        self.retrain = False
        self.first = True
        self.set_full_path()
@@ -77,11 +69,14 @@ class IFreqaiModel(ABC):
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
+        self.ft_params = self.freqai_info["feature_parameters"]
        self.keras: bool = self.freqai_info.get("keras", False)
-        if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
-            self.freqai_info["feature_parameters"]["DI_threshold"] = 0
+        if self.keras and self.ft_params.get("DI_threshold", 0):
+            self.ft_params["DI_threshold"] = 0
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
+        if self.ft_params.get("inlier_metric_window", 0):
+            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
        self.pair_it = 0
        self.pair_it_train = 0
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
@@ -93,6 +88,16 @@ class IFreqaiModel(ABC):
        self.begin_time: float = 0
        self.begin_time_train: float = 0
        self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
+        self.continual_learning = self.freqai_info.get('continual_learning', False)
+
+        self._threads: List[threading.Thread] = []
+        self._stop_event = threading.Event()
+
+    def __getstate__(self):
+        """
+        Return an empty state to be pickled in hyperopt
+        """
+        return ({})
        self.strategy: Optional[IStrategy] = None

    def assert_config(self, config: Dict[str, Any]) -> None:
@@ -148,15 +153,34 @@ class IFreqaiModel(ABC):
        self.model = None
        self.dk = None

-    @threaded
-    def start_scanning(self, strategy: IStrategy) -> None:
+    def shutdown(self):
+        """
+        Cleans up threads on Shutdown, set stop event. Join threads to wait
+        for current training iteration.
+        """
+        logger.info("Stopping FreqAI")
+        self._stop_event.set()
+
+        logger.info("Waiting on Training iteration")
+        for _thread in self._threads:
+            _thread.join()
+
+    def start_scanning(self, *args, **kwargs) -> None:
+        """
+        Start `self._start_scanning` in a separate thread
+        """
+        _thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
+        self._threads.append(_thread)
+        _thread.start()
+
+    def _start_scanning(self, strategy: IStrategy) -> None:
        """
        Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
        to improve model youth. This function is agnostic to data preparation/collection/storage,
        it simply trains on what ever data is available in the self.dd.
        :param strategy: IStrategy = The user defined strategy class
        """
-        while 1:
+        while not self._stop_event.is_set():
            time.sleep(1)
            for pair in self.config.get("exchange", {}).get("pair_whitelist"):

@@ -175,7 +199,7 @@ class IFreqaiModel(ABC):

                if retrain:
                    self.train_timer('start')
-                    self.train_model_in_series(
+                    self.extract_data_and_train_model(
                        new_trained_timerange, pair, strategy, dk, data_load_timerange
                    )
                    self.train_timer('stop')
@@ -215,12 +239,12 @@ class IFreqaiModel(ABC):
            dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)

            trained_timestamp = tr_train
-            tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
-                "%Y-%m-%d %H:%M:%S"
-            )
-            tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
-                "%Y-%m-%d %H:%M:%S"
-            )
+            tr_train_startts_str = datetime.fromtimestamp(
+                                                tr_train.startts,
+                                                tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
+            tr_train_stopts_str = datetime.fromtimestamp(
+                                                tr_train.stopts,
+                                                tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
            logger.info(
                f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
                f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
@@ -405,24 +429,30 @@ class IFreqaiModel(ABC):

    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
        """
-        Base data cleaning method for train
-        Any function inside this method should drop training data points from the filtered_dataframe
-        based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an
-        example of how outlier data points are dropped from the dataframe used for training.
+        Base data cleaning method for train.
+        Functions here improve/modify the input data by identifying outliers,
+        computing additional metrics, adding noise, reducing dimensionality etc.
        """

-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='train')
+            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
+                dk.compute_inlier_metric(set_='test')
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
            dk.principal_component_analysis()

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=False)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.data["avg_mean_dist"] = dk.compute_distances()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            if dk.pair in self.dd.old_DBSCAN_eps:
                eps = self.dd.old_DBSCAN_eps[dk.pair]
            else:
@@ -430,29 +460,31 @@ class IFreqaiModel(ABC):
            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']

+        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
+            dk.add_noise_to_training_features()
+
    def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
        """
        Base data cleaning method for predict.
-        These functions each modify dk.do_predict, which is a dataframe with equal length
-        to the number of candles coming from and returning to the strategy. Inside do_predict,
-         1 allows prediction and < 0 signals to the strategy that the model is not confident in
-         the prediction.
-         See FreqaiDataKitchen::remove_outliers() for an example
-        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
-        for buy signals.
+        Functions here are complementary to the functions of data_cleaning_train.
        """
-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='predict')
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
-            dk.pca_transform(dataframe)
+            dk.pca_transform(self.dk.data_dictionary['prediction_features'])

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=True)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.check_if_pred_in_training_spaces()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            dk.use_DBSCAN_to_remove_outliers(predict=True)

    def model_exists(
@@ -488,7 +520,7 @@ class IFreqaiModel(ABC):
            Path(self.full_path, Path(self.config["config_files"][0]).name),
        )

-    def train_model_in_series(
+    def extract_data_and_train_model(
        self,
        new_trained_timerange: TimeRange,
        pair: str,
@@ -580,7 +612,7 @@ class IFreqaiModel(ABC):

        # # for keras type models, the conv_window needs to be prepended so
        # # viewing is correct in frequi
-        if self.freqai_info.get('keras', False):
+        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
            n_lost_points = self.freqai_info.get('conv_width', 2)
            zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
                                 columns=hist_preds_df.columns)
@@ -646,21 +678,30 @@ class IFreqaiModel(ABC):
                self.train_time = 0
        return

+    def get_init_model(self, pair: str) -> Any:
+        if pair not in self.dd.model_dictionary or not self.continual_learning:
+            init_model = None
+        else:
+            init_model = self.dd.model_dictionary[pair]
+
+        return init_model
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.

    @abstractmethod
-    def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
+    def train(self, unfiltered_df: DataFrame, pair: str,
+              dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datahandler
        for storing, saving, loading, and analyzing the data.
-        :param unfiltered_dataframe: Full dataframe for the current training period
+        :param unfiltered_df: Full dataframe for the current training period
        :param metadata: pair metadata from strategy.
        :return: Trained model which can be used to inference (self.predict)
        """

    @abstractmethod
-    def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
+    def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
@@ -673,11 +714,11 @@ class IFreqaiModel(ABC):

    @abstractmethod
    def predict(
-        self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
-        :param unfiltered_dataframe: Full dataframe for the current backtest period.
+        :param unfiltered_df: Full dataframe for the current backtest period.
        :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
        :param first: boolean = whether this is the first prediction or not.
        :return:
--- a/freqtrade/freqai/prediction_models/CatboostClassifier.py
+++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py
@@ -3,7 +3,8 @@ from typing import Any, Dict

 from catboost import CatBoostClassifier, Pool

-from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :params:
@@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel):
            **self.model_training_parameters,
        )

-        cbr.fit(train_data)
+        init_model = self.get_init_model(dk.pair)
+
+        cbr.fit(train_data, init_model=init_model)

        return cbr
--- a/freqtrade/freqai/prediction_models/CatboostRegressor.py
+++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py
@@ -1,10 +1,10 @@
-import gc
 import logging
 from typing import Any, Dict

 from catboost import CatBoostRegressor, Pool

-from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel):
                weight=data_dictionary["test_weights"],
            )

+        init_model = self.get_init_model(dk.pair)
+
        model = CatBoostRegressor(
            allow_writing_files=False,
            **self.model_training_parameters,
        )

-        model.fit(X=train_data, eval_set=test_data)
-
-        # some evidence that catboost pools have memory leaks:
-        # https://github.com/catboost/catboost/issues/1835
-        del train_data, test_data
-        gc.collect()
+        model.fit(X=train_data, eval_set=test_data, init_model=init_model)

        return model
--- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py
+++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py
@@ -1,10 +1,11 @@
 import logging
 from typing import Any, Dict

-from catboost import CatBoostRegressor  # , Pool
-from sklearn.multioutput import MultiOutputRegressor
+from catboost import CatBoostRegressor, Pool

-from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict) -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -31,14 +32,37 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):

        X = data_dictionary["train_features"]
        y = data_dictionary["train_labels"]
-        eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+
        sample_weight = data_dictionary["train_weights"]

-        model = MultiOutputRegressor(estimator=cbr)
-        model.fit(X=X, y=y, sample_weight=sample_weight)  # , eval_set=eval_set)
+        eval_sets = [None] * y.shape[1]

        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
-            train_score = model.score(X, y)
-            test_score = model.score(*eval_set)
-            logger.info(f"Train score {train_score}, Test score {test_score}")
+            eval_sets = [None] * data_dictionary['test_labels'].shape[1]
+
+            for i in range(data_dictionary['test_labels'].shape[1]):
+                eval_sets[i] = Pool(
+                    data=data_dictionary["test_features"],
+                    label=data_dictionary["test_labels"].iloc[:, i],
+                    weight=data_dictionary["test_weights"],
+                )
+
+        init_model = self.get_init_model(dk.pair)
+
+        if init_model:
+            init_models = init_model.estimators_
+        else:
+            init_models = [None] * y.shape[1]
+
+        fit_params = []
+        for i in range(len(eval_sets)):
+            fit_params.append(
+                {'eval_set': eval_sets[i],  'init_model': init_models[i]})
+
+        model = FreqaiMultiOutputRegressor(estimator=cbr)
+        thread_training = self.freqai_info.get('multitarget_parallel_training', False)
+        if thread_training:
+            model.n_jobs = y.shape[1]
+        model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
+
        return model
--- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py
+++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py
@@ -3,7 +3,8 @@ from typing import Any, Dict

 from lightgbm import LGBMClassifier

-from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict) -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :params:
@@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel):
        y = data_dictionary["train_labels"].to_numpy()[:, 0]
        train_weights = data_dictionary["train_weights"]

+        init_model = self.get_init_model(dk.pair)
+
        model = LGBMClassifier(**self.model_training_parameters)

        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
-                  eval_sample_weight=[test_weights])
+                  eval_sample_weight=[test_weights], init_model=init_model)

        return model
--- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py
+++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py
@@ -3,7 +3,8 @@ from typing import Any, Dict

 from lightgbm import LGBMRegressor

-from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict) -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
@@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel):
        y = data_dictionary["train_labels"]
        train_weights = data_dictionary["train_weights"]

+        init_model = self.get_init_model(dk.pair)
+
        model = LGBMRegressor(**self.model_training_parameters)

        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
-                  eval_sample_weight=[eval_weights])
+                  eval_sample_weight=[eval_weights], init_model=init_model)

        return model
--- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py
+++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py
@@ -2,9 +2,10 @@ import logging
 from typing import Any, Dict

 from lightgbm import LGBMRegressor
-from sklearn.multioutput import MultiOutputRegressor

-from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

-    def fit(self, data_dictionary: Dict) -> Any:
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):

        X = data_dictionary["train_features"]
        y = data_dictionary["train_labels"]
-        eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
        sample_weight = data_dictionary["train_weights"]

-        model = MultiOutputRegressor(estimator=lgb)
-        model.fit(X=X, y=y, sample_weight=sample_weight)  # , eval_set=eval_set)
-        train_score = model.score(X, y)
-        test_score = model.score(*eval_set)
-        logger.info(f"Train score {train_score}, Test score {test_score}")
+        eval_weights = None
+        eval_sets = [None] * y.shape[1]
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            eval_weights = [data_dictionary["test_weights"]]
+            eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1]  # type: ignore
+            for i in range(data_dictionary['test_labels'].shape[1]):
+                eval_sets[i] = (  # type: ignore
+                    data_dictionary["test_features"],
+                    data_dictionary["test_labels"].iloc[:, i]
+                )
+
+        init_model = self.get_init_model(dk.pair)
+        if init_model:
+            init_models = init_model.estimators_
+        else:
+            init_models = [None] * y.shape[1]
+
+        fit_params = []
+        for i in range(len(eval_sets)):
+            fit_params.append(
+                {'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
+                 'init_model': init_models[i]})
+
+        model = FreqaiMultiOutputRegressor(estimator=lgb)
+        thread_training = self.freqai_info.get('multitarget_parallel_training', False)
+        if thread_training:
+            model.n_jobs = y.shape[1]
+        model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
+
        return model
--- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py
@@ -0,0 +1,45 @@
+import logging
+from typing import Any, Dict
+
+from xgboost import XGBRegressor
+
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostRegressor(BaseRegressionModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
+        """
+
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+
+        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
+            eval_set = None
+        else:
+            eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
+            eval_weights = [data_dictionary['test_weights']]
+
+        sample_weight = data_dictionary["train_weights"]
+
+        xgb_model = self.get_init_model(dk.pair)
+
+        model = XGBRegressor(**self.model_training_parameters)
+
+        model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
+                  sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
+
+        return model
--- a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py
@@ -0,0 +1,63 @@
+import logging
+from typing import Any, Dict
+
+from xgboost import XGBRegressor
+
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostRegressorMultiTarget(BaseRegressionModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
+        """
+
+        xgb = XGBRegressor(**self.model_training_parameters)
+
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+        sample_weight = data_dictionary["train_weights"]
+
+        eval_weights = None
+        eval_sets = [None] * y.shape[1]
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            eval_weights = [data_dictionary["test_weights"]]
+            for i in range(data_dictionary['test_labels'].shape[1]):
+                eval_sets[i] = [(  # type: ignore
+                    data_dictionary["test_features"],
+                    data_dictionary["test_labels"].iloc[:, i]
+                )]
+
+        init_model = self.get_init_model(dk.pair)
+        if init_model:
+            init_models = init_model.estimators_
+        else:
+            init_models = [None] * y.shape[1]
+
+        fit_params = []
+        for i in range(len(eval_sets)):
+            fit_params.append(
+                {'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
+                 'xgb_model': init_models[i]})
+
+        model = FreqaiMultiOutputRegressor(estimator=xgb)
+        thread_training = self.freqai_info.get('multitarget_parallel_training', False)
+        if thread_training:
+            model.n_jobs = y.shape[1]
+        model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
+
+        return model