diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json
index 648f36917..a895a7341 100644
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@@ -57,8 +57,8 @@
         "train_period": 30,
         "backtest_period": 7,
         "identifier": "example",
-        "live_trained_timerange": "20220330-20220429",
-        "live_full_backtestrange": "20220302-20220501",
+        "live_trained_timerange": "",
+        "live_full_backtestrange": "",
         "corr_pairlist": [
             "BTC/USDT",
             "ETH/USDT",
@@ -68,20 +68,19 @@
         "feature_parameters": {
             "period": 12,
             "shift": 1,
-            "drop_features": false,
             "DI_threshold": 1,
             "weight_factor": 0,
             "principal_component_analysis": false,
-            "remove_outliers": false
+            "use_SVM_to_remove_outliers": false
         },
         "data_split_parameters": {
             "test_size": 0.25,
             "random_state": 1
         },
         "model_training_parameters": {
-            "n_estimators": 2000,
+            "n_estimators": 1000,
             "random_state": 1,
-            "learning_rate": 0.02,
+            "learning_rate": 0.1,
             "task_type": "CPU"
         }
     },
diff --git a/docs/freqai.md b/docs/freqai.md
index df41846a4..8a37e7d66 100644
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -331,21 +331,21 @@ Users can reduce the dimensionality of their features by activating the `princip
 Which will perform PCA on the features and reduce the dimensionality of the data so that the explained
 variance of the data set is >= 0.999.
 
-### Removing outliers based on feature statistical distributions
+### Removing outliers using a Support Vector Machine (SVM)
 
 The user can tell Freqai to remove outlier data points from the training/test data sets by setting:
 
 ```json
     "freqai": {
         "feature_parameters" : {
-                "remove_outliers": true
+            "use_SVM_to_remove_outliers: true
         }
     }
 ```
 
-Freqai will check the statistical distributions of each feature (or component if the user activated
-`principal_component_analysis`) and remove any data point that sits more than 3 standard deviations away 
-from the mean.
+Freqai will train an SVM on the training data (or components if the user activated
+`principal_component_analysis`) and remove any data point that it deems to be sit beyond the 
+feature space.
 
 ## Additional information
 
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index e35243f6a..f589a1c89 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -10,8 +10,9 @@ from typing import Any, Dict, List, Tuple
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
-from joblib import dump, load
+from joblib import dump, load  # , Parallel, delayed # used for auto distribution assignment
 from pandas import DataFrame
+from sklearn import linear_model
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.model_selection import train_test_split
 
@@ -22,6 +23,9 @@ from freqtrade.resolvers import ExchangeResolver
 from freqtrade.strategy.interface import IStrategy
 
 
+# import scipy as spy  # used for auto distribution assignment
+
+
 SECONDS_IN_DAY = 86400
 
 logger = logging.getLogger(__name__)
@@ -52,6 +56,7 @@ class FreqaiDataKitchen:
         self.model_filename: str = ""
         self.model_dictionary: Dict[Any, Any] = {}
         self.live = live
+        self.svm_model: linear_model.SGDOneClassSVM = None
         if not self.live:
             self.full_timerange = self.create_fulltimerange(self.config["timerange"],
                                                             self.freqai_config["train_period"]
@@ -89,6 +94,10 @@ class FreqaiDataKitchen:
 
         # Save the trained model
         dump(model, save_path / str(self.model_filename + "_model.joblib"))
+
+        if self.svm_model is not None:
+            dump(self.svm_model, save_path / str(self.model_filename + "_svm_model.joblib"))
+
         self.data["model_path"] = str(self.model_path)
         self.data["model_filename"] = str(self.model_filename)
         self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
@@ -104,6 +113,19 @@ class FreqaiDataKitchen:
         if self.live:
             self.model_dictionary[self.model_filename] = model
 
+        # TODO add a helper function to let user save/load any data they are custom adding. We
+        # do not want them having to edit the default save/load methods here. Below is an example
+        # of what we do NOT want.
+
+        # if self.freqai_config['feature_parameters']['determine_statistical_distributions']:
+        #     self.data_dictionary["upper_quantiles"].to_pickle(
+        #         save_path / str(self.model_filename + "_upper_quantiles.pkl")
+        #     )
+
+        #     self.data_dictionary["lower_quantiles"].to_pickle(
+        #         save_path / str(self.model_filename + "_lower_quantiles.pkl")
+        #     )
+
         return
 
     def load_data(self) -> Any:
@@ -121,6 +143,19 @@ class FreqaiDataKitchen:
             self.model_path / str(self.model_filename + "_trained_df.pkl")
         )
 
+        # TODO add a helper function to let user save/load any data they are custom adding. We
+        # do not want them having to edit the default save/load methods here. Below is an example
+        # of what we do NOT want.
+
+        # if self.freqai_config['feature_parameters']['determine_statistical_distributions']:
+        #     self.data_dictionary["upper_quantiles"] = pd.read_pickle(
+        #         self.model_path / str(self.model_filename + "_upper_quantiles.pkl")
+        #     )
+
+        #     self.data_dictionary["lower_quantiles"] = pd.read_pickle(
+        #         self.model_path / str(self.model_filename + "_lower_quantiles.pkl")
+        #     )
+
         self.model_path = Path(self.data["model_path"])
         self.model_filename = self.data["model_filename"]
 
@@ -130,6 +165,10 @@ class FreqaiDataKitchen:
         else:
             model = load(self.model_path / str(self.model_filename + "_model.joblib"))
 
+        if Path(self.model_path / str(self.model_filename +
+                "_svm_model.joblib")).resolve().exists():
+            self.svm_model = load(self.model_path / str(self.model_filename + "_svm_model.joblib"))
+
         assert model, (
                        f"Unable to load model, ensure model exists at "
                        f"{self.model_path} "
@@ -159,6 +198,12 @@ class FreqaiDataKitchen:
         else:
             weights = np.ones(len(filtered_dataframe))
 
+        if self.config["freqai"]["feature_parameters"]["stratify"] > 0:
+            stratification = np.zeros(len(filtered_dataframe))
+            for i in range(1, len(stratification)):
+                if i % self.config["freqai"]["feature_parameters"]["stratify"] == 0:
+                    stratification[i] = 1
+
         (
             train_features,
             test_features,
@@ -170,6 +215,8 @@ class FreqaiDataKitchen:
             filtered_dataframe[: filtered_dataframe.shape[0]],
             labels,
             weights,
+            stratify=stratification,
+            # shuffle=False,
             **self.config["freqai"]["data_split_parameters"]
         )
 
@@ -261,9 +308,9 @@ class FreqaiDataKitchen:
 
         return self.data_dictionary
 
-    def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+    def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
         """
-        Standardize all data in the data_dictionary according to the training dataset
+        Normalize all data in the data_dictionary according to the training dataset
         :params:
         :data_dictionary: dictionary containing the cleaned and split training/test data/labels
         :returns:
@@ -297,6 +344,42 @@ class FreqaiDataKitchen:
 
         return data_dictionary
 
+    def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+        """
+        Standardize all data in the data_dictionary according to the training dataset
+        :params:
+        :data_dictionary: dictionary containing the cleaned and split training/test data/labels
+        :returns:
+        :data_dictionary: updated dictionary with standardized values.
+        """
+        # standardize the data by training stats
+        train_max = data_dictionary["train_features"].max()
+        train_min = data_dictionary["train_features"].min()
+        data_dictionary["train_features"] = 2 * (
+            data_dictionary["train_features"] - train_min
+        ) / (train_max - train_min) - 1
+        data_dictionary["test_features"] = 2 * (
+            data_dictionary["test_features"] - train_min
+        ) / (train_max - train_min) - 1
+
+        train_labels_max = data_dictionary["train_labels"].max()
+        train_labels_min = data_dictionary["train_labels"].min()
+        data_dictionary["train_labels"] = 2 * (
+            data_dictionary["train_labels"] - train_labels_min
+        ) / (train_labels_max - train_labels_min) - 1
+        data_dictionary["test_labels"] = 2 * (
+            data_dictionary["test_labels"] - train_labels_min
+        ) / (train_labels_max - train_labels_min) - 1
+
+        for item in train_max.keys():
+            self.data[item + "_max"] = train_max[item]
+            self.data[item + "_min"] = train_min[item]
+
+        self.data["labels_max"] = train_labels_max
+        self.data["labels_min"] = train_labels_min
+
+        return data_dictionary
+
     def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
         """
         Standardizes a set of data using the mean and standard deviation from
@@ -305,6 +388,20 @@ class FreqaiDataKitchen:
         :df: Dataframe to be standardized
         """
 
+        for item in df.keys():
+            df[item] = 2 * (df[item] - self.data[item + "_min"]) / (self.data[item + "_max"] -
+                                                                    self.data[item + '_min']) - 1
+
+        return df
+
+    def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+        """
+        Normalizes a set of data using the mean and standard deviation from
+        the associated training data.
+        :params:
+        :df: Dataframe to be standardized
+        """
+
         for item in df.keys():
             df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
 
@@ -420,6 +517,8 @@ class FreqaiDataKitchen:
         self.data["n_kept_components"] = n_keep_components
         self.pca = pca2
 
+        logger.info(f'PCA reduced total features from  {n_components} to {n_keep_components}')
+
         if not self.model_path.is_dir():
             self.model_path.mkdir(parents=True, exist_ok=True)
         pk.dump(pca2, open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "wb"))
@@ -434,70 +533,53 @@ class FreqaiDataKitchen:
 
         return avg_mean_dist
 
-    def remove_outliers(self, predict: bool) -> None:
-        """
-        Remove data that looks like an outlier based on the distribution of each
-        variable.
-        :params:
-        :predict: boolean which tells the function if this is prediction data or
-        training data coming in.
-        """
-
-        lower_quantile = self.data_dictionary["train_features"].quantile(0.001)
-        upper_quantile = self.data_dictionary["train_features"].quantile(0.999)
+    def use_SVM_to_remove_outliers(self, predict: bool) -> None:
 
         if predict:
-
-            df = self.data_dictionary["prediction_features"][
-                (self.data_dictionary["prediction_features"] < upper_quantile)
-                & (self.data_dictionary["prediction_features"] > lower_quantile)
-            ]
-            drop_index = pd.isnull(df).any(1)
-            self.data_dictionary["prediction_features"].fillna(0, inplace=True)
-            drop_index = ~drop_index
-            do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+            assert self.svm_model, "No svm model available for outlier removal"
+            y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
+            do_predict = np.where(y_pred == -1, 0, y_pred)
 
             logger.info(
-                "remove_outliers() tossed %s predictions",
-                len(do_predict) - do_predict.sum(),
+                f'svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions'
             )
             self.do_predict += do_predict
             self.do_predict -= 1
 
         else:
+            # use SGDOneClassSVM to increase speed?
+            self.svm_model = linear_model.SGDOneClassSVM(nu=0.1).fit(
+                                                            self.data_dictionary["train_features"]
+                                                            )
+            y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
+            dropped_points = np.where(y_pred == -1, 0, y_pred)
+            # keep_index = np.where(y_pred == 1)
+            self.data_dictionary["train_features"] = self.data_dictionary[
+                                                                "train_features"][(y_pred == 1)]
+            self.data_dictionary["train_labels"] = self.data_dictionary[
+                                                                "train_labels"][(y_pred == 1)]
+            self.data_dictionary["train_weights"] = self.data_dictionary[
+                                                                "train_weights"][(y_pred == 1)]
 
-            filter_train_df = self.data_dictionary["train_features"][
-                (self.data_dictionary["train_features"] < upper_quantile)
-                & (self.data_dictionary["train_features"] > lower_quantile)
-            ]
-            drop_index = pd.isnull(filter_train_df).any(1)
-            drop_index = drop_index.replace(True, 1).replace(False, 0)
-            self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
-                (drop_index == 0)
-            ]
-            self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-                (drop_index == 0)
-            ]
-            self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-                (drop_index == 0)
-            ]
+            logger.info(
+                f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}'
+                f' train points from {len(y_pred)}'
+            )
 
-            # do the same for the test data
-            filter_test_df = self.data_dictionary["test_features"][
-                (self.data_dictionary["test_features"] < upper_quantile)
-                & (self.data_dictionary["test_features"] > lower_quantile)
-            ]
-            drop_index = pd.isnull(filter_test_df).any(1)
-            drop_index = drop_index.replace(True, 1).replace(False, 0)
-            self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
-                (drop_index == 0)
-            ]
-            self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-                (drop_index == 0)
-            ]
-            self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-                (drop_index == 0)
-            ]
+            # same for test data
+            y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
+            dropped_points = np.where(y_pred == -1, 0, y_pred)
+            self.data_dictionary["test_features"] = self.data_dictionary[
+                                                                "test_features"][(y_pred == 1)]
+            self.data_dictionary["test_labels"] = self.data_dictionary[
+                                                                "test_labels"][(y_pred == 1)]
+            self.data_dictionary["test_weights"] = self.data_dictionary[
+                                                                "test_weights"][(y_pred == 1)]
+
+            logger.info(
+                f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}'
+                f' test points from {len(y_pred)}'
+            )
 
         return
 
@@ -507,32 +589,6 @@ class FreqaiDataKitchen:
         assert features, ("Could not find any features!")
         return features
 
-    # def build_feature_list(self, config: dict, metadata: dict) -> list:
-    #     """
-    #     SUPERCEDED BY self.find_features()
-    #     Build the list of features that will be used to filter
-    #     the full dataframe. Feature list is construced from the
-    #     user configuration file.
-    #     :params:
-    #     :config: Canonical freqtrade config file containing all
-    #     user defined input in config['freqai] dictionary.
-    #     """
-    #     features = []
-    #     for tf in config["freqai"]["timeframes"]:
-    #         for ft in config["freqai"]["base_features"]:
-    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
-    #                 shift = ""
-    #                 if n > 0:
-    #                     shift = "_shift-" + str(n)
-    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
-    #                 for p in config["freqai"]["corr_pairlist"]:
-    #                     if metadata['pair'] in p:
-    #                         continue  # avoid duplicate features
-    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
-
-    #     # logger.info("number of features %s", len(features))
-    #     return features
-
     def check_if_pred_in_training_spaces(self) -> None:
         """
         Compares the distance from each prediction point to each training data
@@ -568,7 +624,7 @@ class FreqaiDataKitchen:
         training than older data.
         """
 
-        weights = np.zeros_like(num_weights)
+        weights = np.zeros(num_weights)
         for i in range(1, len(weights)):
             weights[len(weights) - i] = np.exp(
                 -i / (self.config["freqai"]["feature_parameters"]["weight_factor"] * num_weights)
@@ -638,19 +694,23 @@ class FreqaiDataKitchen:
 
         time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
 
-        trained_timerange = TimeRange.parse_timerange(training_timerange)
+        if training_timerange:  # user passed no live_trained_timerange in config
+            trained_timerange = TimeRange.parse_timerange(training_timerange)
+            elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
+            trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
+            trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
+            retrain = elapsed_time > self.freqai_config['backtest_period']
+        else:
+            trained_timerange = TimeRange.parse_timerange("20000101-20000201")
+            trained_timerange.startts = int(time - self.freqai_config['train_period'] *
+                                            SECONDS_IN_DAY)
+            trained_timerange.stopts = int(time)
+            retrain = True
 
-        elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
-
-        trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
-        trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
         start = datetime.datetime.utcfromtimestamp(trained_timerange.startts)
         stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts)
-
         new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
 
-        retrain = elapsed_time > self.freqai_config['backtest_period']
-
         if retrain:
             coin, _ = metadata['pair'].split("/")
             # set the new model_path
@@ -738,3 +798,141 @@ class FreqaiDataKitchen:
     def np_encoder(self, object):
         if isinstance(object, np.generic):
             return object.item()
+
+    # Functions containing useful data manpulation examples. but not actively in use.
+
+    # def build_feature_list(self, config: dict, metadata: dict) -> list:
+    #     """
+    #     SUPERCEDED BY self.find_features()
+    #     Build the list of features that will be used to filter
+    #     the full dataframe. Feature list is construced from the
+    #     user configuration file.
+    #     :params:
+    #     :config: Canonical freqtrade config file containing all
+    #     user defined input in config['freqai] dictionary.
+    #     """
+    #     features = []
+    #     for tf in config["freqai"]["timeframes"]:
+    #         for ft in config["freqai"]["base_features"]:
+    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
+    #                 shift = ""
+    #                 if n > 0:
+    #                     shift = "_shift-" + str(n)
+    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
+    #                 for p in config["freqai"]["corr_pairlist"]:
+    #                     if metadata['pair'] in p:
+    #                         continue  # avoid duplicate features
+    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
+
+    #     # logger.info("number of features %s", len(features))
+    #     return features
+
+    # Possibly phasing these outlier removal methods below out in favor of
+    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
+    # But these have good data manipulation examples, so keep them commented here for now.
+
+    # def determine_statistical_distributions(self) -> None:
+    #     from fitter import Fitter
+
+    #     logger.info('Determining best model for all features, may take some time')
+
+    #     def compute_quantiles(ft):
+    #         f = Fitter(self.data_dictionary["train_features"][ft],
+    #                    distributions=['gamma', 'cauchy', 'laplace',
+    #                                   'beta', 'uniform', 'lognorm'])
+    #         f.fit()
+    #         # f.summary()
+    #         dist = list(f.get_best().items())[0][0]
+    #         params = f.get_best()[dist]
+    #         upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
+    #         lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
+
+    #         return ft, upper_q, lower_q, dist
+
+    #     quantiles_tuple = Parallel(n_jobs=-1)(
+    #         delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
+    #                                                       'train_features'].columns)
+
+    #     df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
+    #                                                 'lower_quantiles', 'dist'])
+    #     self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
+    #     self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
+
+    #     return
+
+    # def remove_outliers(self, predict: bool) -> None:
+    #     """
+    #     Remove data that looks like an outlier based on the distribution of each
+    #     variable.
+    #     :params:
+    #     :predict: boolean which tells the function if this is prediction data or
+    #     training data coming in.
+    #     """
+
+    #     lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
+    #     upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
+
+    #     if predict:
+
+    #         df = self.data_dictionary["prediction_features"][
+    #             (self.data_dictionary["prediction_features"] < upper_quantile)
+    #             & (self.data_dictionary["prediction_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(df).any(1)
+    #         self.data_dictionary["prediction_features"].fillna(0, inplace=True)
+    #         drop_index = ~drop_index
+    #         do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+
+    #         logger.info(
+    #             "remove_outliers() tossed %s predictions",
+    #             len(do_predict) - do_predict.sum(),
+    #         )
+    #         self.do_predict += do_predict
+    #         self.do_predict -= 1
+
+    #     else:
+
+    #         filter_train_df = self.data_dictionary["train_features"][
+    #             (self.data_dictionary["train_features"] < upper_quantile)
+    #             & (self.data_dictionary["train_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(filter_train_df).any(1)
+    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
+    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+    #             (drop_index == 0)
+    #         ]
+
+    #         logger.info(
+    #             f'remove_outliers() tossed {drop_index.sum()}'
+    #             f' training points from {len(filter_train_df)}'
+    #         )
+
+    #         # do the same for the test data
+    #         filter_test_df = self.data_dictionary["test_features"][
+    #             (self.data_dictionary["test_features"] < upper_quantile)
+    #             & (self.data_dictionary["test_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(filter_test_df).any(1)
+    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
+    #         self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+    #             (drop_index == 0)
+    #         ]
+
+    #         logger.info(
+    #             f'remove_outliers() tossed {drop_index.sum()}'
+    #             f' test points from {len(filter_test_df)}'
+    #         )
+
+    #     return
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 2523cd561..f1dd5550a 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -62,6 +62,7 @@ class IFreqaiModel(ABC):
         self.predictions = None
         self.training_on_separate_thread = False
         self.retrain = False
+        self.first = True
 
     def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
         """
@@ -80,12 +81,12 @@ class IFreqaiModel(ABC):
         :metadata: pair metadata coming from strategy.
         """
 
-        live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
+        self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
 
         self.pair = metadata["pair"]
-        self.dh = FreqaiDataKitchen(self.config, dataframe, live)
+        self.dh = FreqaiDataKitchen(self.config, dataframe, self.live)
 
-        if live:
+        if self.live:
             # logger.info('testing live')
             self.start_live(dataframe, metadata, strategy)
 
@@ -115,11 +116,12 @@ class IFreqaiModel(ABC):
                 self.dh.save_data(self.model)
             else:
                 self.model = self.dh.load_data()
-                strategy_provided_features = self.dh.find_features(dataframe_train)
-                if strategy_provided_features != self.dh.training_features_list:
-                    logger.info("User changed input features, retraining model.")
-                    self.model = self.train(dataframe_train, metadata)
-                    self.dh.save_data(self.model)
+                # strategy_provided_features = self.dh.find_features(dataframe_train)
+                # # TOFIX doesnt work with PCA
+                # if strategy_provided_features != self.dh.training_features_list:
+                #     logger.info("User changed input features, retraining model.")
+                #     self.model = self.train(dataframe_train, metadata)
+                #     self.dh.save_data(self.model)
 
             preds, do_preds = self.predict(dataframe_backtest, metadata)
 
@@ -148,7 +150,7 @@ class IFreqaiModel(ABC):
         if not self.training_on_separate_thread:
             # this will also prevent other pairs from trying to train simultaneously.
             (self.retrain,
-             new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[
+             self.new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[
                                                                         'live_trained_timerange'],
                                                                         metadata)
         else:
@@ -156,14 +158,19 @@ class IFreqaiModel(ABC):
             self.retrain = False
 
         if self.retrain or not file_exists:
-            self.training_on_separate_thread = True  # acts like a lock
-            self.retrain_model_on_separate_thread(new_trained_timerange, metadata, strategy)
+            if self.first:
+                self.train_model_in_series(self.new_trained_timerange, metadata, strategy)
+                self.first = False
+            else:
+                self.training_on_separate_thread = True  # acts like a lock
+                self.retrain_model_on_separate_thread(self.new_trained_timerange,
+                                                      metadata, strategy)
 
         self.model = self.dh.load_data()
 
         strategy_provided_features = self.dh.find_features(dataframe)
         if strategy_provided_features != self.dh.training_features_list:
-            self.train_model_in_series(new_trained_timerange, metadata, strategy)
+            self.train_model_in_series(self.new_trained_timerange, metadata, strategy)
 
         preds, do_preds = self.predict(dataframe, metadata)
         self.dh.append_predictions(preds, do_preds, len(dataframe))
@@ -215,12 +222,36 @@ class IFreqaiModel(ABC):
         data (NaNs) or felt uncertain about data (PCA and DI index)
         """
 
+    @abstractmethod
+    def data_cleaning_train(self) -> None:
+        """
+        User can add data analysis and cleaning here.
+        Any function inside this method should drop training data points from the filtered_dataframe
+        based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
+        of how outlier data points are dropped from the dataframe used for training.
+        """
+
+    @abstractmethod
+    def data_cleaning_predict(self) -> None:
+        """
+        User can add data analysis and cleaning here.
+        These functions each modify self.dh.do_predict, which is a dataframe with equal length
+        to the number of candles coming from and returning to the strategy. Inside do_predict,
+         1 allows prediction and < 0 signals to the strategy that the model is not confident in
+         the prediction.
+         See FreqaiDataKitchen::remove_outliers() for an example
+        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
+        for buy signals.
+        """
+
     def model_exists(self, pair: str, training_timerange: str) -> bool:
         """
         Given a pair and path, check if a model already exists
         :param pair: pair e.g. BTC/USD
         :param path: path to model
         """
+        if self.live and training_timerange is None:
+            return False
         coin, _ = pair.split("/")
         self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
         path_to_modelfile = Path(self.dh.model_path / str(self.dh.model_filename + "_model.joblib"))
@@ -265,3 +296,4 @@ class IFreqaiModel(ABC):
 
         self.model = self.train(unfiltered_dataframe, metadata)
         self.dh.save_data(self.model)
+        self.retrain = False
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
index e2ba6bd29..8550f3f15 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -29,7 +29,7 @@ class CatboostPredictionModel(IFreqaiModel):
             dataframe["close"]
             .shift(-self.feature_parameters["period"])
             .rolling(self.feature_parameters["period"])
-            .max()
+            .mean()
             / dataframe["close"]
             - 1
         )
@@ -68,15 +68,11 @@ class CatboostPredictionModel(IFreqaiModel):
         # standardize all data based on train_dataset only
         data_dictionary = self.dh.standardize_data(data_dictionary)
 
-        # optional additional data cleaning
-        if self.feature_parameters["principal_component_analysis"]:
-            self.dh.principal_component_analysis()
-        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=False)
-        if self.feature_parameters["DI_threshold"]:
-            self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
+        # optional additional data cleaning/analysis
+        self.data_cleaning_train()
 
-        logger.info("length of train data %s", len(data_dictionary["train_features"]))
+        logger.info(f'Training model on {len(self.dh.training_features_list)} features')
+        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
 
         model = self.fit(data_dictionary)
 
@@ -86,9 +82,7 @@ class CatboostPredictionModel(IFreqaiModel):
 
     def fit(self, data_dictionary: Dict) -> Any:
         """
-        Most regressors use the same function names and arguments e.g. user
-        can drop in LGBMRegressor in place of CatBoostRegressor and all data
-        management will be properly handled by Freqai.
+        User sets up the training and test data to fit their desired model here
         :params:
         :data_dictionary: the dictionary constructed by DataHandler to hold
         all the training and test data/labels.
@@ -133,7 +127,51 @@ class CatboostPredictionModel(IFreqaiModel):
         filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
         self.dh.data_dictionary["prediction_features"] = filtered_dataframe
 
-        # optional additional data cleaning
+        # optional additional data cleaning/analysis
+        self.data_cleaning_predict(filtered_dataframe)
+
+        predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])
+
+        # compute the non-standardized predictions
+        self.dh.predictions = (predictions + 1) * (self.dh.data["labels_max"] -
+                                                   self.dh.data["labels_min"]) / 2 + self.dh.data[
+                                                                                     "labels_min"]
+
+        # logger.info("--------------------Finished prediction--------------------")
+
+        return (self.dh.predictions, self.dh.do_predict)
+
+    def data_cleaning_train(self) -> None:
+        """
+        User can add data analysis and cleaning here.
+        Any function inside this method should drop training data points from the filtered_dataframe
+        based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
+        of how outlier data points are dropped from the dataframe used for training.
+        """
+        if self.feature_parameters["principal_component_analysis"]:
+            self.dh.principal_component_analysis()
+
+        # if self.feature_parameters["determine_statistical_distributions"]:
+        #     self.dh.determine_statistical_distributions()
+        # if self.feature_parameters["remove_outliers"]:
+        #     self.dh.remove_outliers(predict=False)
+
+        if self.feature_parameters["use_SVM_to_remove_outliers"]:
+            self.dh.use_SVM_to_remove_outliers(predict=False)
+        if self.feature_parameters["DI_threshold"]:
+            self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
+
+    def data_cleaning_predict(self, filtered_dataframe: DataFrame) -> None:
+        """
+        User can add data analysis and cleaning here.
+        These functions each modify self.dh.do_predict, which is a dataframe with equal length
+        to the number of candles coming from and returning to the strategy. Inside do_predict,
+         1 allows prediction and < 0 signals to the strategy that the model is not confident in
+         the prediction.
+         See FreqaiDataKitchen::remove_outliers() for an example
+        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
+        for buy signals.
+        """
         if self.feature_parameters["principal_component_analysis"]:
             pca_components = self.dh.pca.transform(filtered_dataframe)
             self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
@@ -142,17 +180,13 @@ class CatboostPredictionModel(IFreqaiModel):
                 index=filtered_dataframe.index,
             )
 
-        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=True)  # creates dropped index
+        # if self.feature_parameters["determine_statistical_distributions"]:
+        #     self.dh.determine_statistical_distributions()
+        # if self.feature_parameters["remove_outliers"]:
+        #     self.dh.remove_outliers(predict=True)  # creates dropped index
+
+        if self.feature_parameters["use_SVM_to_remove_outliers"]:
+            self.dh.use_SVM_to_remove_outliers(predict=True)
 
         if self.feature_parameters["DI_threshold"]:
             self.dh.check_if_pred_in_training_spaces()  # sets do_predict
-
-        predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])
-
-        # compute the non-standardized predictions
-        self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]
-
-        # logger.info("--------------------Finished prediction--------------------")
-
-        return (self.dh.predictions, self.dh.do_predict)
diff --git a/freqtrade/templates/ExamplePredictionModel.py b/freqtrade/templates/ExamplePredictionModel.py
deleted file mode 100644
index 3d2b7a808..000000000
--- a/freqtrade/templates/ExamplePredictionModel.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import logging
-from typing import Any, Dict, Tuple
-
-import pandas as pd
-from catboost import CatBoostRegressor, Pool
-from pandas import DataFrame
-
-from freqtrade.freqai.freqai_interface import IFreqaiModel
-
-
-logger = logging.getLogger(__name__)
-
-
-class ExamplePredictionModel(IFreqaiModel):
-    """
-    User created prediction model. The class needs to override three necessary
-    functions, predict(), train(), fit(). The class inherits ModelHandler which
-    has its own DataHandler where data is held, saved, loaded, and managed.
-    """
-
-    def make_labels(self, dataframe: DataFrame) -> DataFrame:
-        """
-        User defines the labels here (target values).
-        :params:
-        :dataframe: the full dataframe for the present training period
-        """
-
-        dataframe["s"] = (
-            dataframe["close"]
-            .shift(-self.feature_parameters["period"])
-            .rolling(self.feature_parameters["period"])
-            .max()
-            / dataframe["close"]
-            - 1
-        )
-        self.dh.data["s_mean"] = dataframe["s"].mean()
-        self.dh.data["s_std"] = dataframe["s"].std()
-
-        # logger.info("label mean", self.dh.data["s_mean"], "label std", self.dh.data["s_std"])
-
-        return dataframe["s"]
-
-    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
-        """
-        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
-        for storing, saving, loading, and analyzing the data.
-        :params:
-        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy.
-        :returns:
-        :model: Trained model which can be used to inference (self.predict)
-        """
-        logger.info("--------------------Starting training--------------------")
-
-        # create the full feature list based on user config info
-        self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata)
-        unfiltered_labels = self.make_labels(unfiltered_dataframe)
-
-        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = self.dh.filter_features(
-            unfiltered_dataframe,
-            self.dh.training_features_list,
-            unfiltered_labels,
-            training_filter=True,
-        )
-
-        # split data into train/test data.
-        data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
-        # standardize all data based on train_dataset only
-        data_dictionary = self.dh.standardize_data(data_dictionary)
-
-        # optional additional data cleaning
-        if self.feature_parameters["principal_component_analysis"]:
-            self.dh.principal_component_analysis()
-        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=False)
-        if self.feature_parameters["DI_threshold"]:
-            self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
-
-        logger.info("length of train data %s", len(data_dictionary["train_features"]))
-
-        model = self.fit(data_dictionary)
-
-        logger.info(f'--------------------done training {metadata["pair"]}--------------------')
-
-        return model
-
-    def fit(self, data_dictionary: Dict) -> Any:
-        """
-        Most regressors use the same function names and arguments e.g. user
-        can drop in LGBMRegressor in place of CatBoostRegressor and all data
-        management will be properly handled by Freqai.
-        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold
-        all the training and test data/labels.
-        """
-
-        train_data = Pool(
-            data=data_dictionary["train_features"],
-            label=data_dictionary["train_labels"],
-            weight=data_dictionary["train_weights"],
-        )
-
-        test_data = Pool(
-            data=data_dictionary["test_features"],
-            label=data_dictionary["test_labels"],
-            weight=data_dictionary["test_weights"],
-        )
-
-        model = CatBoostRegressor(
-            verbose=100, early_stopping_rounds=400, **self.model_training_parameters
-        )
-        model.fit(X=train_data, eval_set=test_data)
-
-        return model
-
-    def predict(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame,
-                                                                                DataFrame]:
-        """
-        Filter the prediction features data and predict with it.
-        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return:
-        :predictions: np.array of predictions
-        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
-        data (NaNs) or felt uncertain about data (PCA and DI index)
-        """
-
-        # logger.info("--------------------Starting prediction--------------------")
-
-        original_feature_list = self.dh.build_feature_list(self.config, metadata)
-        filtered_dataframe, _ = self.dh.filter_features(
-            unfiltered_dataframe, original_feature_list, training_filter=False
-        )
-        filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
-        self.dh.data_dictionary["prediction_features"] = filtered_dataframe
-
-        # optional additional data cleaning
-        if self.feature_parameters["principal_component_analysis"]:
-            pca_components = self.dh.pca.transform(filtered_dataframe)
-            self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
-                data=pca_components,
-                columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])],
-                index=filtered_dataframe.index,
-            )
-
-        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=True)  # creates dropped index
-
-        if self.feature_parameters["DI_threshold"]:
-            self.dh.check_if_pred_in_training_spaces()  # sets do_predict
-
-        predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])
-
-        # compute the non-standardized predictions
-        self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]
-
-        # logger.info("--------------------Finished prediction--------------------")
-
-        return (self.dh.predictions, self.dh.do_predict)
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index c8befebcf..a76ea2303 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -166,8 +166,8 @@ class FreqaiExampleStrategy(IStrategy):
             dataframe["target_std"],
         ) = self.model.bridge.start(dataframe, metadata, self)
 
-        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5
-        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5
+        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5
+        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1
         return dataframe
 
     def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
@@ -183,7 +183,7 @@ class FreqaiExampleStrategy(IStrategy):
 
     def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
         sell_conditions = [
-            (dataframe["prediction"] < dataframe["sell_roi"]) & (dataframe["do_predict"] == 1)
+            (dataframe["do_predict"] <= 0)
         ]
         if sell_conditions:
             dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), "sell"] = 1