flake8 passing, use pathlib in lieu of os.path to accommodate windows/mac OS

2022-05-04 17:42:34 +02:00
parent 2600ba4e74
commit 99f7e44c30
7 changed files with 593 additions and 439 deletions
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@@ -6,17 +6,19 @@
    "fiat_display_currency": "USD",
    "dry_run": true,
    "timeframe": "5m",
-    "dry_run_wallet":1000,
+    "dry_run_wallet": 1000,
    "cancel_open_orders_on_exit": true,
    "unfilledtimeout": {
        "entry": 10,
        "exit": 30
-		},
+    },
    "exchange": {
        "name": "ftx",
        "key": "",
        "secret": "",
-        "ccxt_config": {"enableRateLimit": true},
+        "ccxt_config": {
+            "enableRateLimit": true
+        },
        "ccxt_async_config": {
            "enableRateLimit": true,
            "rateLimit": 200
@@ -24,8 +26,7 @@
        "pair_whitelist": [
            "BTC/USDT"
        ],
-        "pair_blacklist": [
-        ]
+        "pair_blacklist": []
    },
    "entry_pricing": {
        "price_side": "same",
@@ -43,54 +44,57 @@
        "order_book_top": 1
    },
    "pairlists": [
-        {"method": "StaticPairList"}
+        {
+            "method": "StaticPairList"
+        }
    ],
-
    "freqai": {
-                "btc_pair" : "BTC/USDT",
-                "timeframes" : ["5m","15m","1h"],
-                "full_timerange" : "20210601-20220101",
-                "train_period" : 30,
-                "backtest_period" : 7,
-                "identifier" :  "example",
-                "base_features": [
-                        "rsi",
-                        "close_over_20sma",
-                        "relative_volume",
-                        "bb_width",
-                        "mfi",
-                        "roc",
-                        "pct-change",
-                        "adx",
-                        "macd"
-                ],
-                "corr_pairlist": [
-                        "ETH/USDT",
-                        "LINK/USDT",
-                        "DOT/USDT"
-                ],
-                "training_timerange" : "20211220-20220117",
-
-                "feature_parameters" : {
-                        "period": 12,
-                        "shift": 2,
-                        "drop_features": false,
-                        "DI_threshold": 1,
-                        "weight_factor":  0,
-                        "principal_component_analysis": false,
-                        "remove_outliers": false
-                },
-                "data_split_parameters" : {
-                    "test_size": 0.25,
-                    "random_state": 1
-                },
-                "model_training_parameters" : {
-                    "n_estimators": 2000,
-                    "random_state": 1,
-                    "learning_rate": 0.02,
-                    "task_type": "CPU"
-                }
+        "btc_pair": "BTC/USDT",
+        "timeframes": [
+            "5m",
+            "15m"
+        ],
+        "full_timerange": "20210601-20210901",
+        "train_period": 30,
+        "backtest_period": 7,
+        "identifier": "example",
+        "base_features": [
+            "rsi",
+            "close_over_20sma",
+            "relative_volume",
+            "bb_width",
+            "mfi",
+            "roc",
+            "pct-change",
+            "adx",
+            "macd"
+        ],
+        "corr_pairlist": [
+            "ETH/USDT",
+            "LINK/USDT",
+            "DOT/USDT"
+        ],
+        "training_timerange": "20211220-20220117",
+        "feature_parameters": {
+            "period": 12,
+            "shift": 1,
+            "drop_features": false,
+            "DI_threshold": 1,
+            "weight_factor": 0,
+            "principal_component_analysis": false,
+            "remove_outliers": false
        },
+        "data_split_parameters": {
+            "test_size": 0.25,
+            "random_state": 1
+        },
+        "model_training_parameters": {
+            "n_estimators": 2000,
+            "random_state": 1,
+            "learning_rate": 0.02,
+            "task_type": "CPU"
+        }
+    },
    "bot_name": "",
    "initial_state": "running",
    "forcebuy_enable": false,
--- a/freqtrade/freqai/data_handler.py
+++ b/freqtrade/freqai/data_handler.py
@@ -1,64 +1,77 @@
-import json
-import os
 import copy
+import datetime
+import json
+import pickle as pk
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
 import numpy as np
 import pandas as pd
+from joblib import dump, load
 from pandas import DataFrame
-from joblib import dump
-from joblib import load
-from sklearn.model_selection import train_test_split
 from sklearn.metrics.pairwise import pairwise_distances
-import datetime
-from typing import Any, Dict, List, Tuple
-import pickle as pk
+from sklearn.model_selection import train_test_split
+
 from freqtrade.configuration import TimeRange

+
 SECONDS_IN_DAY = 86400

+
 class DataHandler:
    """
-    Class designed to handle all the data for the IFreqaiModel class model. 
+    Class designed to handle all the data for the IFreqaiModel class model.
    Functionalities include holding, saving, loading, and analyzing the data.
    author: Robert Caulk, rob.caulk@gmail.com
    """

-    def __init__(self, config: Dict[str, Any], dataframe: DataFrame, data: List):
+    def __init__(self, config: Dict[str, Any], dataframe: DataFrame):
        self.full_dataframe = dataframe
-        (self.training_timeranges,
-        self.backtesting_timeranges) = self.split_timerange(
-                                    config['freqai']['full_timerange'],
-                                    config['freqai']['train_period'],
-                                    config['freqai']['backtest_period'])
-        self.data = data
-        self.data_dictionary = {}
+        (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
+            config["freqai"]["full_timerange"],
+            config["freqai"]["train_period"],
+            config["freqai"]["backtest_period"],
+        )
+        self.data: Dict[Any, Any] = {}
        self.config = config
-        self.freq_config = config['freqai']
+        self.freq_config = config["freqai"]
        self.predictions = np.array([])
        self.do_predict = np.array([])
        self.target_mean = np.array([])
        self.target_std = np.array([])
+        self.model_path = Path()
+        self.model_filename = ""

    def save_data(self, model: Any) -> None:
        """
        Saves all data associated with a model for a single sub-train time range
        :params:
-        :model: User trained model which can be reused for inferencing to generate 
+        :model: User trained model which can be reused for inferencing to generate
        predictions
        """

-        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
-        save_path = self.model_path + self.model_filename
+        if not self.model_path.is_dir():
+            self.model_path.mkdir(parents=True, exist_ok=True)
+
+        save_path = Path(self.model_path)
+
+        # if not os.path.exists(self.model_path):
+        #     os.mkdir(self.model_path)
+        # save_path = self.model_path + self.model_filename
+
        # Save the trained model
-        dump(model, save_path+"_model.joblib")
-        self.data['model_path'] = self.model_path
-        self.data['model_filename'] = self.model_filename
-        self.data['training_features_list'] = list(self.data_dictionary['train_features'].columns)
+        dump(model, save_path / str(self.model_filename + "_model.joblib"))
+        self.data["model_path"] = self.model_path
+        self.data["model_filename"] = self.model_filename
+        self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
        # store the metadata
-        with open(save_path+"_metadata.json", 'w') as fp:
-           json.dump(self.data, fp, default=self.np_encoder)
+        with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
+            json.dump(self.data, fp, default=self.np_encoder)

        # save the train data to file so we can check preds for area of applicability later
-        self.data_dictionary['train_features'].to_pickle(save_path+"_trained_df.pkl")
+        self.data_dictionary["train_features"].to_pickle(
+            save_path / str(self.model_filename + "_trained_df.pkl")
+        )

        return

@@ -68,156 +81,210 @@ class DataHandler:
        :returns:
        :model: User trained model which can be inferenced for new predictions
        """
-        model = load(self.model_path+self.model_filename+"_model.joblib")
+        model = load(self.model_path / str(self.model_filename + "_model.joblib"))

-        with open(self.model_path+self.model_filename+"_metadata.json", 'r') as fp:
+        with open(self.model_path / str(self.model_filename + "_metadata.json"), "r") as fp:
            self.data = json.load(fp)
-            if self.data.get('training_features_list'):
-                self.training_features_list = [*self.data.get('training_features_list')]
+            self.training_features_list = self.data["training_features_list"]
+            # if self.data.get("training_features_list"):
+            #     self.training_features_list = [*self.data.get("training_features_list")]

-        self.data_dictionary['train_features'] = pd.read_pickle(self.model_path+
-                                        self.model_filename+"_trained_df.pkl")
+        self.data_dictionary["train_features"] = pd.read_pickle(
+            self.model_path / str(self.model_filename + "_trained_df.pkl")
+        )

-        self.model_path = self.data['model_path']
-        self.model_filename = self.data['model_filename']
-        if self.config['freqai']['feature_parameters']['principal_component_analysis']:
-            self.pca = pk.load(open(self.model_path+self.model_filename+"_pca_object.pkl","rb"))
+        self.model_path = self.data["model_path"]
+        self.model_filename = self.data["model_filename"]
+        if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
+            self.pca = pk.load(
+                open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "rb")
+            )

        return model

-    def make_train_test_datasets(self, filtered_dataframe: DataFrame, labels: DataFrame) -> None:
-        '''
-        Given the dataframe for the full history for training, split the data into 
-        training and test data according to user specified parameters in configuration 
-        file. 
+    def make_train_test_datasets(
+        self, filtered_dataframe: DataFrame, labels: DataFrame
+    ) -> Dict[Any, Any]:
+        """
+        Given the dataframe for the full history for training, split the data into
+        training and test data according to user specified parameters in configuration
+        file.
        :filtered_dataframe: cleaned dataframe ready to be split.
        :labels: cleaned labels ready to be split.
-        '''
+        """

-        if self.config['freqai']['feature_parameters']['weight_factor'] > 0:
+        if self.config["freqai"]["feature_parameters"]["weight_factor"] > 0:
            weights = self.set_weights_higher_recent(len(filtered_dataframe))
-        else: weights = np.ones(len(filtered_dataframe))
+        else:
+            weights = np.ones(len(filtered_dataframe))

-        (train_features, test_features, train_labels,
-            test_labels, train_weights, test_weights) = train_test_split(
-            filtered_dataframe[:filtered_dataframe.shape[0]],
+        (
+            train_features,
+            test_features,
+            train_labels,
+            test_labels,
+            train_weights,
+            test_weights,
+        ) = train_test_split(
+            filtered_dataframe[: filtered_dataframe.shape[0]],
            labels,
            weights,
-            **self.config['freqai']['data_split_parameters']
+            **self.config["freqai"]["data_split_parameters"]
        )

        return self.build_data_dictionary(
-                                    train_features,test_features,
-                                    train_labels,test_labels,
-                                    train_weights,test_weights)
+            train_features, test_features, train_labels, test_labels, train_weights, test_weights
+        )

-
-
-    def filter_features(self, unfiltered_dataframe: DataFrame, training_feature_list: List, 
-            labels: DataFrame = None, training_filter: bool=True) -> Tuple[DataFrame, DataFrame]:
-        '''
-        Filter the unfiltered dataframe to extract the user requested features and properly 
-        remove all NaNs. Any row with a NaN is removed from training dataset or replaced with 
-        0s in the prediction dataset. However, prediction dataset do_predict will reflect any 
+    def filter_features(
+        self,
+        unfiltered_dataframe: DataFrame,
+        training_feature_list: List,
+        labels: DataFrame = pd.DataFrame(),
+        training_filter: bool = True,
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the unfiltered dataframe to extract the user requested features and properly
+        remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
+        0s in the prediction dataset. However, prediction dataset do_predict will reflect any
        row that had a NaN and will shield user from that prediction.
        :params:
        :unfiltered_dataframe: the full dataframe for the present training period
-        :training_feature_list: list, the training feature list constructed by self.build_feature_list()
-        according to user specified parameters in the configuration file.
+        :training_feature_list: list, the training feature list constructed by
+        self.build_feature_list() according to user specified parameters in the configuration file.
        :labels: the labels for the dataset
-        :training_filter: boolean which lets the function know if it is training data or 
-        prediction data to be filtered. 
+        :training_filter: boolean which lets the function know if it is training data or
+        prediction data to be filtered.
        :returns:
        :filtered_dataframe: dataframe cleaned of NaNs and only containing the user
        requested feature set.
        :labels: labels cleaned of NaNs.
-        '''
+        """
        filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
-        drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
-
-        if training_filter: # we don't care about total row number (total no. datapoints) in training, we only care about removing any row with NaNs
+        drop_index = pd.isnull(filtered_dataframe).any(1)  # get the rows that have NaNs,
+        drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
+        if (
+            training_filter
+        ):  # we don't care about total row number (total no. datapoints) in training, we only care
+            # about removing any row with NaNs
            drop_index_labels = pd.isnull(labels)
-            filtered_dataframe = filtered_dataframe[(drop_index==False) & (drop_index_labels==False)] # dropping values
-            labels = labels[(drop_index==False) & (drop_index_labels==False)] # assuming the labels depend entirely on the dataframe here.
-            print('dropped',len(unfiltered_dataframe)-len(filtered_dataframe),
-                    'training data points due to NaNs, ensure you have downloaded all historical training data')
-            self.data['filter_drop_index_training'] = drop_index
+            drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
+            filtered_dataframe = filtered_dataframe[
+                (drop_index == 0) & (drop_index_labels == 0)
+            ]  # dropping values
+            labels = labels[
+                (drop_index == 0) & (drop_index_labels == 0)
+            ]  # assuming the labels depend entirely on the dataframe here.
+            print(
+                "dropped",
+                len(unfiltered_dataframe) - len(filtered_dataframe),
+                "training data points due to NaNs, ensure you have downloaded",
+                "all historical training data",
+            )
+            self.data["filter_drop_index_training"] = drop_index

-        else: # we are backtesting so we need to preserve row number to send back to strategy, so now we use do_predict to avoid any prediction based on a NaN
+        else:
+            # we are backtesting so we need to preserve row number to send back to strategy,
+            # so now we use do_predict to avoid any prediction based on a NaN
            drop_index = pd.isnull(filtered_dataframe).any(1)
-            self.data['filter_drop_index_prediction'] = drop_index
-            filtered_dataframe.fillna(0, inplace=True) # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction that was based on a single NaN is ultimately protected from buys with do_predict
+            self.data["filter_drop_index_prediction"] = drop_index
+            filtered_dataframe.fillna(0, inplace=True)
+            # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
+            # that was based on a single NaN is ultimately protected from buys with do_predict
            drop_index = ~drop_index
-            self.do_predict = np.array(drop_index.replace(True,1).replace(False,0))
-            print('dropped',len(self.do_predict) - self.do_predict.sum(),'of',len(filtered_dataframe),
-            'prediction data points due to NaNs. These are protected from prediction with do_predict vector returned to strategy.')
-
+            self.do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+            print(
+                "dropped",
+                len(self.do_predict) - self.do_predict.sum(),
+                "of",
+                len(filtered_dataframe),
+                "prediction data points due to NaNs. These are protected from prediction",
+                "with do_predict vector returned to strategy.",
+            )

        return filtered_dataframe, labels

-    def build_data_dictionary(self, train_df: DataFrame, test_df: DataFrame,
-        train_labels: DataFrame, test_labels: DataFrame,
-        train_weights: Any, test_weights: Any) -> Dict:
+    def build_data_dictionary(
+        self,
+        train_df: DataFrame,
+        test_df: DataFrame,
+        train_labels: DataFrame,
+        test_labels: DataFrame,
+        train_weights: Any,
+        test_weights: Any,
+    ) -> Dict:

-        self.data_dictionary = {'train_features': train_df,
-                                'test_features': test_df,
-                                'train_labels': train_labels,
-                                'test_labels': test_labels,
-                                'train_weights': train_weights,
-                                'test_weights': test_weights}
+        self.data_dictionary = {
+            "train_features": train_df,
+            "test_features": test_df,
+            "train_labels": train_labels,
+            "test_labels": test_labels,
+            "train_weights": train_weights,
+            "test_weights": test_weights,
+        }

        return self.data_dictionary

-    def standardize_data(self, data_dictionary: Dict) -> None:
-        '''
+    def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+        """
        Standardize all data in the data_dictionary according to the training dataset
        :params:
        :data_dictionary: dictionary containing the cleaned and split training/test data/labels
        :returns:
        :data_dictionary: updated dictionary with standardized values.
-        '''
+        """
        # standardize the data by training stats
-        train_mean = data_dictionary['train_features'].mean()
-        train_std = data_dictionary['train_features'].std()
-        data_dictionary['train_features'] = (data_dictionary['train_features'] - train_mean) / train_std
-        data_dictionary['test_features'] = (data_dictionary['test_features'] - train_mean) / train_std
+        train_mean = data_dictionary["train_features"].mean()
+        train_std = data_dictionary["train_features"].std()
+        data_dictionary["train_features"] = (
+            data_dictionary["train_features"] - train_mean
+        ) / train_std
+        data_dictionary["test_features"] = (
+            data_dictionary["test_features"] - train_mean
+        ) / train_std

-        train_labels_std = data_dictionary['train_labels'].std()
-        train_labels_mean = data_dictionary['train_labels'].mean()
-        data_dictionary['train_labels'] = (data_dictionary['train_labels'] - train_labels_mean) / train_labels_std
-        data_dictionary['test_labels'] = (data_dictionary['test_labels'] - train_labels_mean) / train_labels_std
+        train_labels_std = data_dictionary["train_labels"].std()
+        train_labels_mean = data_dictionary["train_labels"].mean()
+        data_dictionary["train_labels"] = (
+            data_dictionary["train_labels"] - train_labels_mean
+        ) / train_labels_std
+        data_dictionary["test_labels"] = (
+            data_dictionary["test_labels"] - train_labels_mean
+        ) / train_labels_std

        for item in train_std.keys():
-            self.data[item+'_std'] = train_std[item]
-            self.data[item+'_mean'] = train_mean[item]
+            self.data[item + "_std"] = train_std[item]
+            self.data[item + "_mean"] = train_mean[item]

-        self.data['labels_std'] = train_labels_std
-        self.data['labels_mean'] = train_labels_mean
+        self.data["labels_std"] = train_labels_std
+        self.data["labels_mean"] = train_labels_mean

        return data_dictionary

    def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-        '''
-        Standardizes a set of data using the mean and standard deviation from 
+        """
+        Standardizes a set of data using the mean and standard deviation from
        the associated training data.
        :params:
        :df: Dataframe to be standardized
-        '''
+        """

        for item in df.keys():
-            df[item] = (df[item] - self.data[item+'_mean']) / self.data[item+'_std']
+            df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]

        return df

-    def split_timerange(self, tr: Dict, train_split: int=28, bt_split: int=7) -> list:
-        '''
+    def split_timerange(
+        self, tr: str, train_split: int = 28, bt_split: int = 7
+    ) -> Tuple[list, list]:
+        """
        Function which takes a single time range (tr) and splits it
        into sub timeranges to train and backtest on based on user input
        tr: str, full timerange to train on
        train_split: the period length for the each training (days). Specified in user
        configuration file
        bt_split: the backtesting length (dats). Specified in user configuration file
-        '''
+        """

        train_period = train_split * SECONDS_IN_DAY
        bt_period = bt_split * SECONDS_IN_DAY
@@ -230,22 +297,24 @@ class DataHandler:
        tr_backtesting_list = []
        first = True
        while True:
-            if not first: timerange_train.startts = timerange_train.startts + bt_period
+            if not first:
+                timerange_train.startts = timerange_train.startts + bt_period
            timerange_train.stopts = timerange_train.startts + train_period

            # if a full training period doesnt fit, we stop
-            if timerange_train.stopts > full_timerange.stopts: break 
+            if timerange_train.stopts > full_timerange.stopts:
+                break
            first = False
            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
            stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
-            tr_training_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
+            tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))

-            ## associated backtest period
-            timerange_backtest.startts = timerange_train.stopts 
-            timerange_backtest.stopts = timerange_backtest.startts + bt_period 
+            # associated backtest period
+            timerange_backtest.startts = timerange_train.stopts
+            timerange_backtest.stopts = timerange_backtest.startts + bt_period
            start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
            stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
-            tr_backtesting_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
+            tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))

        return tr_training_list, tr_backtesting_list

@@ -260,8 +329,8 @@ class DataHandler:
        timerange = TimeRange.parse_timerange(tr)
        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
-        df = df.loc[df['date'] >= start, :]
-        df = df.loc[df['date'] <= stop, :]
+        df = df.loc[df["date"] >= start, :]
+        df = df.loc[df["date"] <= stop, :]

        return df

@@ -272,128 +341,171 @@ class DataHandler:
        No parameters or returns, it acts on the data_dictionary held by the DataHandler.
        """

-        from sklearn.decomposition import PCA # avoid importing if we dont need it
+        from sklearn.decomposition import PCA  # avoid importing if we dont need it

-        n_components = self.data_dictionary['train_features'].shape[1]
+        n_components = self.data_dictionary["train_features"].shape[1]
        pca = PCA(n_components=n_components)
-        pca = pca.fit(self.data_dictionary['train_features'])
+        pca = pca.fit(self.data_dictionary["train_features"])
        n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999)
        pca2 = PCA(n_components=n_keep_components)
-        self.data['n_kept_components'] = n_keep_components
-        pca2 = pca2.fit(self.data_dictionary['train_features'])
-        print('reduced feature dimension by',n_components-n_keep_components)
-        print("explained variance",np.sum(pca2.explained_variance_ratio_))
-        train_components = pca2.transform(self.data_dictionary['train_features'])
-        test_components = pca2.transform(self.data_dictionary['test_features'])
+        self.data["n_kept_components"] = n_keep_components
+        pca2 = pca2.fit(self.data_dictionary["train_features"])
+        print("reduced feature dimension by", n_components - n_keep_components)
+        print("explained variance", np.sum(pca2.explained_variance_ratio_))
+        train_components = pca2.transform(self.data_dictionary["train_features"])
+        test_components = pca2.transform(self.data_dictionary["test_features"])

-        self.data_dictionary['train_features'] = pd.DataFrame(data=train_components,
-                          columns = ['PC'+str(i) for i in range(0,n_keep_components)],
-                          index = self.data_dictionary['train_features'].index)
+        self.data_dictionary["train_features"] = pd.DataFrame(
+            data=train_components,
+            columns=["PC" + str(i) for i in range(0, n_keep_components)],
+            index=self.data_dictionary["train_features"].index,
+        )

-        self.data_dictionary['test_features'] = pd.DataFrame(data=test_components,
-                          columns = ['PC'+str(i) for i in range(0,n_keep_components)],
-                          index = self.data_dictionary['test_features'].index)
+        self.data_dictionary["test_features"] = pd.DataFrame(
+            data=test_components,
+            columns=["PC" + str(i) for i in range(0, n_keep_components)],
+            index=self.data_dictionary["test_features"].index,
+        )

-        self.data['n_kept_components'] = n_keep_components
+        self.data["n_kept_components"] = n_keep_components
        self.pca = pca2
-        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
-        pk.dump(pca2, open(self.model_path + self.model_filename+"_pca_object.pkl","wb"))
+
+        if not self.model_path.is_dir():
+            self.model_path.mkdir(parents=True, exist_ok=True)
+        pk.dump(pca2, open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "wb"))

        return None

    def compute_distances(self) -> float:
-        print('computing average mean distance for all training points')
-        pairwise = pairwise_distances(self.data_dictionary['train_features'],n_jobs=-1)
+        print("computing average mean distance for all training points")
+        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
        avg_mean_dist = pairwise.mean(axis=1).mean()
-        print('avg_mean_dist',avg_mean_dist)
+        print("avg_mean_dist", avg_mean_dist)

        return avg_mean_dist

-    def remove_outliers(self,predict: bool) -> None:
+    def remove_outliers(self, predict: bool) -> None:
        """
-        Remove data that looks like an outlier based on the distribution of each 
-        variable. 
+        Remove data that looks like an outlier based on the distribution of each
+        variable.
        :params:
-        :predict: boolean which tells the function if this is prediction data or 
-        training data coming in. 
+        :predict: boolean which tells the function if this is prediction data or
+        training data coming in.
        """

-        lower_quantile = self.data_dictionary['train_features'].quantile(0.001)
-        upper_quantile = self.data_dictionary['train_features'].quantile(0.999)
+        lower_quantile = self.data_dictionary["train_features"].quantile(0.001)
+        upper_quantile = self.data_dictionary["train_features"].quantile(0.999)

        if predict:

-            df = self.data_dictionary['prediction_features'][(self.data_dictionary['prediction_features']<upper_quantile) & (self.data_dictionary['prediction_features']>lower_quantile)]
+            df = self.data_dictionary["prediction_features"][
+                (self.data_dictionary["prediction_features"] < upper_quantile)
+                & (self.data_dictionary["prediction_features"] > lower_quantile)
+            ]
            drop_index = pd.isnull(df).any(1)
-            self.data_dictionary['prediction_features'].fillna(0,inplace=True)
+            self.data_dictionary["prediction_features"].fillna(0, inplace=True)
            drop_index = ~drop_index
-            do_predict = np.array(drop_index.replace(True,1).replace(False,0))
-            
-            print('remove_outliers() tossed',len(do_predict)-do_predict.sum(),'predictions because they were beyond 3 std deviations from training data.')
+            do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+
+            print(
+                "remove_outliers() tossed",
+                len(do_predict) - do_predict.sum(),
+                "predictions because they were beyond 3 std deviations from training data.",
+            )
            self.do_predict += do_predict
            self.do_predict -= 1

        else:

-            filter_train_df = self.data_dictionary['train_features'][(self.data_dictionary['train_features']<upper_quantile) & (self.data_dictionary['train_features']>lower_quantile)]
+            filter_train_df = self.data_dictionary["train_features"][
+                (self.data_dictionary["train_features"] < upper_quantile)
+                & (self.data_dictionary["train_features"] > lower_quantile)
+            ]
            drop_index = pd.isnull(filter_train_df).any(1)
-            self.data_dictionary['train_features'] = self.data_dictionary['train_features'][(drop_index==False)]
-            self.data_dictionary['train_labels'] = self.data_dictionary['train_labels'][(drop_index==False)]
-            self.data_dictionary['train_weights'] = self.data_dictionary['train_weights'][(drop_index==False)]
+            drop_index = drop_index.replace(True, 1).replace(False, 0)
+            self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
+                (drop_index == 0)
+            ]
+            self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+                (drop_index == 0)
+            ]
+            self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+                (drop_index == 0)
+            ]

            # do the same for the test data
-            filter_test_df = self.data_dictionary['test_features'][(self.data_dictionary['test_features']<upper_quantile) & (self.data_dictionary['test_features']>lower_quantile)]
+            filter_test_df = self.data_dictionary["test_features"][
+                (self.data_dictionary["test_features"] < upper_quantile)
+                & (self.data_dictionary["test_features"] > lower_quantile)
+            ]
            drop_index = pd.isnull(filter_test_df).any(1)
-            #pdb.set_trace()
-            self.data_dictionary['test_labels'] = self.data_dictionary['test_labels'][(drop_index==False)]
-            self.data_dictionary['test_features'] = self.data_dictionary['test_features'][(drop_index==False)]
-            self.data_dictionary['test_weights'] = self.data_dictionary['test_weights'][(drop_index==False)]
+            drop_index = drop_index.replace(True, 1).replace(False, 0)
+            self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
+                (drop_index == 0)
+            ]
+            self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+                (drop_index == 0)
+            ]
+            self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+                (drop_index == 0)
+            ]

        return

-    def build_feature_list(self, config: dict) -> int:
+    def build_feature_list(self, config: dict) -> list:
        """
-        Build the list of features that will be used to filter 
-        the full dataframe. Feature list is construced from the 
+        Build the list of features that will be used to filter
+        the full dataframe. Feature list is construced from the
        user configuration file.
        :params:
        :config: Canonical freqtrade config file containing all
        user defined input in config['freqai] dictionary.
        """
        features = []
-        for tf in config['freqai']['timeframes']:
-            for ft in config['freqai']['base_features']:
-                for n in range(config['freqai']['feature_parameters']['shift']+1):
-                    shift=''
-                    if n>0: shift = '_shift-'+str(n)
-                    features.append(ft+shift+'_'+tf)
-                    for p in config['freqai']['corr_pairlist']:
-                        features.append(p.split("/")[0]+'-'+ft+shift+'_'+tf)
+        for tf in config["freqai"]["timeframes"]:
+            for ft in config["freqai"]["base_features"]:
+                for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
+                    shift = ""
+                    if n > 0:
+                        shift = "_shift-" + str(n)
+                    features.append(ft + shift + "_" + tf)
+                    for p in config["freqai"]["corr_pairlist"]:
+                        features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)

-        print('number of features',len(features))
+        print("number of features", len(features))
        return features

    def check_if_pred_in_training_spaces(self) -> None:
        """
-        Compares the distance from each prediction point to each training data 
+        Compares the distance from each prediction point to each training data
        point. It uses this information to estimate a Dissimilarity Index (DI)
-        and avoid making predictions on any points that are too far away 
-        from the training data set. 
+        and avoid making predictions on any points that are too far away
+        from the training data set.
        """

-        print('checking if prediction features are in AOA')
-        distance = pairwise_distances(self.data_dictionary['train_features'],
-                    self.data_dictionary['prediction_features'],n_jobs=-1)
+        print("checking if prediction features are in AOA")
+        distance = pairwise_distances(
+            self.data_dictionary["train_features"],
+            self.data_dictionary["prediction_features"],
+            n_jobs=-1,
+        )

-        do_predict = np.where(distance.min(axis=0) /
-            self.data['avg_mean_dist'] < self.config['freqai']['feature_parameters']['DI_threshold'],1,0)
+        do_predict = np.where(
+            distance.min(axis=0) / self.data["avg_mean_dist"]
+            < self.config["freqai"]["feature_parameters"]["DI_threshold"],
+            1,
+            0,
+        )

-        print('Distance checker tossed',len(do_predict)-do_predict.sum(),
-            'predictions for being too far from training data')
+        print(
+            "Distance checker tossed",
+            len(do_predict) - do_predict.sum(),
+            "predictions for being too far from training data",
+        )

-        self.do_predict += do_predict 
+        self.do_predict += do_predict
        self.do_predict -= 1
-        
+
    def set_weights_higher_recent(self, num_weights: int) -> int:
        """
        Set weights so that recent data is more heavily weighted during
@@ -401,8 +513,9 @@ class DataHandler:
        """
        weights = np.zeros(num_weights)
        for i in range(1, len(weights)):
-            weights[len(weights) - i] = np.exp(-i/
-                            (self.config['freqai']['feature_parameters']['weight_factor']*num_weights))
+            weights[len(weights) - i] = np.exp(
+                -i / (self.config["freqai"]["feature_parameters"]["weight_factor"] * num_weights)
+            )
        return weights

    def append_predictions(self, predictions, do_predict, len_dataframe):
@@ -411,12 +524,12 @@ class DataHandler:
        """

        ones = np.ones(len_dataframe)
-        s_mean, s_std = ones*self.data['s_mean'], ones*self.data['s_std']
+        s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"]

-        self.predictions = np.append(self.predictions,predictions)
-        self.do_predict = np.append(self.do_predict,do_predict)
-        self.target_mean = np.append(self.target_mean,s_mean)
-        self.target_std = np.append(self.target_std,s_std)
+        self.predictions = np.append(self.predictions, predictions)
+        self.do_predict = np.append(self.do_predict, do_predict)
+        self.target_mean = np.append(self.target_mean, s_mean)
+        self.target_std = np.append(self.target_std, s_std)

        return

@@ -426,14 +539,14 @@ class DataHandler:
        when it goes back to the strategy. These rows are not included in the backtest.
        """

-        filler = np.zeros(len_dataframe -len(self.predictions)) # startup_candle_count
-        self.predictions = np.append(filler,self.predictions)
-        self.do_predict = np.append(filler,self.do_predict)
-        self.target_mean = np.append(filler,self.target_mean)
-        self.target_std = np.append(filler,self.target_std)
+        filler = np.zeros(len_dataframe - len(self.predictions))  # startup_candle_count
+        self.predictions = np.append(filler, self.predictions)
+        self.do_predict = np.append(filler, self.do_predict)
+        self.target_mean = np.append(filler, self.target_mean)
+        self.target_std = np.append(filler, self.target_std)

        return
-        
+
    def np_encoder(self, object):
        if isinstance(object, np.generic):
            return object.item()
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -1,20 +1,23 @@
+import gc
+import shutil
+from abc import ABC
+from pathlib import Path
+from typing import Any, Dict, Tuple

-import os
 import numpy as np
 import pandas as pd
 from pandas import DataFrame
-import shutil
-import gc
-from typing import Any, Dict, Optional, Tuple
-from abc import ABC
+
 from freqtrade.freqai.data_handler import DataHandler

+
 pd.options.mode.chained_assignment = None

+
 class IFreqaiModel(ABC):
    """
    Class containing all tools for training and prediction in the strategy.
-    User models should inherit from this class as shown in 
+    User models should inherit from this class as shown in
    templates/ExamplePredictionModel.py where the user overrides
    train(), predict(), fit(), and make_labels().
    Author: Robert Caulk, rob.caulk@gmail.com
@@ -23,61 +26,71 @@ class IFreqaiModel(ABC):
    def __init__(self, config: Dict[str, Any]) -> None:

        self.config = config
-        self.freqai_info = config['freqai']
-        self.data_split_parameters = config['freqai']['data_split_parameters']
-        self.model_training_parameters = config['freqai']['model_training_parameters']
-        self.feature_parameters = config['freqai']['feature_parameters']
-        self.full_path = (str(config['user_data_dir'])+
-                            "/models/"+self.freqai_info['full_timerange']+
-                            '-'+self.freqai_info['identifier'])
-        self.metadata = {}
-        self.data = {}
+        self.freqai_info = config["freqai"]
+        self.data_split_parameters = config["freqai"]["data_split_parameters"]
+        self.model_training_parameters = config["freqai"]["model_training_parameters"]
+        self.feature_parameters = config["freqai"]["feature_parameters"]
+        self.full_path = Path(
+            config["user_data_dir"]
+            / "models"
+            / str(self.freqai_info["full_timerange"] + self.freqai_info["identifier"])
+        )
+
        self.time_last_trained = None
        self.current_time = None
        self.model = None
        self.predictions = None

-        if not os.path.exists(self.full_path):
-            os.mkdir(self.full_path)
-            shutil.copy(self.config['config_files'][0],self.full_path+"/"+self.config['config_files'][0])
+        if not self.full_path.is_dir():
+            self.full_path.mkdir(parents=True, exist_ok=True)
+            shutil.copy(
+                self.config["config_files"][0],
+                Path(self.full_path / self.config["config_files"][0]),
+            )

    def start(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        """
-        Entry point to the FreqaiModel, it will train a new model if 
+        Entry point to the FreqaiModel, it will train a new model if
        necesssary before making the prediction.
        The backtesting and training paradigm is a sliding training window
        with a following backtest window. Both windows slide according to the
-        length of the backtest window. This function is not intended to be 
-        overridden by children of IFreqaiModel, but technically, it can be 
+        length of the backtest window. This function is not intended to be
+        overridden by children of IFreqaiModel, but technically, it can be
        if the user wishes to make deeper changes to the sliding window
        logic.
        :params:
        :dataframe: Full dataframe coming from strategy - it contains entire
-        backtesting timerange + additional historical data necessary to train 
+        backtesting timerange + additional historical data necessary to train
        the model.
-        :metadata: pair metadataa coming from strategy. 
+        :metadata: pair metadataa coming from strategy.
        """
-        self.pair = metadata['pair']
-        self.dh = DataHandler(self.config, dataframe, self.data)
+        self.pair = metadata["pair"]
+        self.dh = DataHandler(self.config, dataframe)

-        print('going to train',len(self.dh.training_timeranges),
-            'timeranges:',self.dh.training_timeranges)
+        print(
+            "going to train",
+            len(self.dh.training_timeranges),
+            "timeranges:",
+            self.dh.training_timeranges,
+        )

        # Loop enforcing the sliding window training/backtesting paragigm
        # tr_train is the training time range e.g. 1 historical month
-        # tr_backtest is the backtesting time range e.g. the week directly 
-        # following tr_train. Both of these windows slide through the 
+        # tr_backtest is the backtesting time range e.g. the week directly
+        # following tr_train. Both of these windows slide through the
        # entire backtest
-        for tr_train, tr_backtest in zip(self.dh.training_timeranges,
-                                         self.dh.backtesting_timeranges):
+        for tr_train, tr_backtest in zip(
+            self.dh.training_timeranges, self.dh.backtesting_timeranges
+        ):
            gc.collect()
-            #self.config['timerange'] = tr_train
-            self.dh.data = {} # clean the pair specific data between models
-            self.freqai_info['training_timerange'] = tr_train
+            # self.config['timerange'] = tr_train
+            self.dh.data = {}  # clean the pair specific data between models
+            self.freqai_info["training_timerange"] = tr_train
            dataframe_train = self.dh.slice_dataframe(tr_train, dataframe)
            dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe)
-            print("training",self.pair,"for",tr_train)
-            self.dh.model_path = self.full_path+"/"+ 'sub-train'+'-'+str(tr_train)+'/'
+            print("training", self.pair, "for", tr_train)
+            # self.dh.model_path = self.full_path + "/" + "sub-train" + "-" + str(tr_train) + "/"
+            self.dh.model_path = Path(self.full_path / str("sub-train" + "-" + str(tr_train)))
            if not self.model_exists(self.pair, training_timerange=tr_train):
                self.model = self.train(dataframe_train, metadata)
                self.dh.save_data(self.model)
@@ -86,8 +99,8 @@ class IFreqaiModel(ABC):

            preds, do_preds = self.predict(dataframe_backtest)

-            self.dh.append_predictions(preds,do_preds,len(dataframe_backtest))
-        
+            self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
+
        self.dh.fill_predictions(len(dataframe))

        return self.dh.predictions, self.dh.do_predict, self.dh.target_mean, self.dh.target_std
@@ -107,7 +120,7 @@ class IFreqaiModel(ABC):
        for storing, saving, loading, and analyzing the data.
        :params:
        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy. 
+        :metadata: pair metadata from strategy.
        :returns:
        :model: Trained model which can be used to inference (self.predict)
        """
@@ -116,40 +129,40 @@ class IFreqaiModel(ABC):

    def fit(self) -> Any:
        """
-        Most regressors use the same function names and arguments e.g. user 
+        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
        management will be properly handled by Freqai.
        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """

-        return None
-    
-    def predict(self) -> Optional[Tuple[DataFrame, DataFrame]]:
+        return Any
+
+    def predict(self, dataframe: DataFrame) -> Tuple[np.array, np.array]:
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return: 
+        :return:
        :predictions: np.array of predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

-        return None
+        return np.array([]), np.array([])

-    def model_exists(self, pair: str, training_timerange: str = None) -> bool:
+    def model_exists(self, pair: str, training_timerange: str) -> bool:
        """
        Given a pair and path, check if a model already exists
        :param pair: pair e.g. BTC/USD
        :param path: path to model
        """
-        coin,_ = pair.split('/')
-        self.dh.model_filename = f"cb_"+coin.lower()+"_"+training_timerange
-        file_exists = os.path.isfile(self.dh.model_path+
-                              self.dh.model_filename+"_model.joblib")
+        coin, _ = pair.split("/")
+        self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
+        path_to_modelfile = Path(self.dh.model_path / str(self.dh.model_filename + "_model.joblib"))
+        file_exists = path_to_modelfile.is_file()
        if file_exists:
-            print("Found model at", self.dh.model_path+self.dh.model_filename)
-        else: print("Could not find model at",
-              self.dh.model_path+self.dh.model_filename)
+            print("Found model at", self.dh.model_path / self.dh.model_filename)
+        else:
+            print("Could not find model at", self.dh.model_path / self.dh.model_filename)
        return file_exists
--- a/freqtrade/freqai/strategy_bridge.py
+++ b/freqtrade/freqai/strategy_bridge.py
@@ -3,10 +3,10 @@ from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver

 class CustomModel:
    """
-    A bridge between the user defined IFreqaiModel class 
+    A bridge between the user defined IFreqaiModel class
    and the strategy.
    """

-    def __init__(self,config):
+    def __init__(self, config):

        self.bridge = FreqaiModelResolver.load_freqaimodel(config)
--- a/freqtrade/resolvers/freqaimodel_resolver.py
+++ b/freqtrade/resolvers/freqaimodel_resolver.py
@@ -12,6 +12,7 @@ from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.freqai_interface import IFreqaiModel
 from freqtrade.resolvers import IResolver

+
 logger = logging.getLogger(__name__)


@@ -19,10 +20,11 @@ class FreqaiModelResolver(IResolver):
    """
    This class contains all the logic to load custom hyperopt loss class
    """
+
    object_type = IFreqaiModel
    object_type_str = "FreqaiModel"
    user_subdir = USERPATH_FREQAIMODELS
-    initial_search_path = Path(__file__).parent.parent.joinpath('optimize').resolve()
+    initial_search_path = Path(__file__).parent.parent.joinpath("optimize").resolve()

    @staticmethod
    def load_freqaimodel(config: Dict) -> IFreqaiModel:
@@ -31,15 +33,17 @@ class FreqaiModelResolver(IResolver):
        :param config: configuration dictionary
        """

-        freqaimodel_name = config.get('freqaimodel')
+        freqaimodel_name = config.get("freqaimodel")
        if not freqaimodel_name:
            raise OperationalException(
                "No freqaimodel set. Please use `--freqaimodel` to "
                "specify the FreqaiModel class to use.\n"
            )
-        freqaimodel = FreqaiModelResolver.load_object(freqaimodel_name,
-                                                        config, kwargs={'config': config},
-                                                        extra_dir=config.get('freqaimodel_path'))
-
+        freqaimodel = FreqaiModelResolver.load_object(
+            freqaimodel_name,
+            config,
+            kwargs={"config": config},
+            extra_dir=config.get("freqaimodel_path"),
+        )

        return freqaimodel
--- a/freqtrade/templates/ExamplePredictionModel.py
+++ b/freqtrade/templates/ExamplePredictionModel.py
@@ -1,15 +1,17 @@
-import numpy as np
+from typing import Any, Dict, Tuple
+
 import pandas as pd
 from catboost import CatBoostRegressor, Pool
 from pandas import DataFrame
-from typing import Any, Dict, Tuple
+
 from freqtrade.freqai.freqai_interface import IFreqaiModel

+
 class ExamplePredictionModel(IFreqaiModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
-    has its own DataHandler where data is held, saved, loaded, and managed. 
+    has its own DataHandler where data is held, saved, loaded, and managed.
    """

    def make_labels(self, dataframe: DataFrame) -> DataFrame:
@@ -19,15 +21,20 @@ class ExamplePredictionModel(IFreqaiModel):
        :dataframe: the full dataframe for the present training period
        """

-        dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
-            self.feature_parameters['period']).max() / dataframe['close'] - 1)
-        self.dh.data['s_mean'] = dataframe['s'].mean()
-        self.dh.data['s_std'] = dataframe['s'].std()
+        dataframe["s"] = (
+            dataframe["close"]
+            .shift(-self.feature_parameters["period"])
+            .rolling(self.feature_parameters["period"])
+            .max()
+            / dataframe["close"]
+            - 1
+        )
+        self.dh.data["s_mean"] = dataframe["s"].mean()
+        self.dh.data["s_std"] = dataframe["s"].std()

-        print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
-
-        return dataframe['s']
+        print("label mean", self.dh.data["s_mean"], "label std", self.dh.data["s_std"])

+        return dataframe["s"]

    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
        """
@@ -35,7 +42,7 @@ class ExamplePredictionModel(IFreqaiModel):
        for storing, saving, loading, and managed.
        :params:
        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy. 
+        :metadata: pair metadata from strategy.
        :returns:
        :model: Trained model which can be used to inference (self.predict)
        """
@@ -46,8 +53,12 @@ class ExamplePredictionModel(IFreqaiModel):
        unfiltered_labels = self.make_labels(unfiltered_dataframe)

        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe, 
-                            self.dh.training_features_list, unfiltered_labels, training_filter=True)
+        features_filtered, labels_filtered = self.dh.filter_features(
+            unfiltered_dataframe,
+            self.dh.training_features_list,
+            unfiltered_labels,
+            training_filter=True,
+        )

        # split data into train/test data.
        data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
@@ -55,46 +66,47 @@ class ExamplePredictionModel(IFreqaiModel):
        data_dictionary = self.dh.standardize_data(data_dictionary)

        # optional additional data cleaning
-        if self.feature_parameters['principal_component_analysis']:
+        if self.feature_parameters["principal_component_analysis"]:
            self.dh.principal_component_analysis()
        if self.feature_parameters["remove_outliers"]:
            self.dh.remove_outliers(predict=False)
-        if self.feature_parameters['DI_threshold']:
-            self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
+        if self.feature_parameters["DI_threshold"]:
+            self.dh.data["avg_mean_dist"] = self.dh.compute_distances()

-        print("length of train data", len(data_dictionary['train_features']))
+        print("length of train data", len(data_dictionary["train_features"]))

        model = self.fit(data_dictionary)

-        print('Finished training')
+        print("Finished training")
        print(f'--------------------done training {metadata["pair"]}--------------------')

        return model

    def fit(self, data_dictionary: Dict) -> Any:
        """
-        Most regressors use the same function names and arguments e.g. user 
+        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
        management will be properly handled by Freqai.
        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """

        train_data = Pool(
-            data=data_dictionary['train_features'],
-            label=data_dictionary['train_labels'],
-            weight=data_dictionary['train_weights']
+            data=data_dictionary["train_features"],
+            label=data_dictionary["train_labels"],
+            weight=data_dictionary["train_weights"],
        )

        test_data = Pool(
-            data=data_dictionary['test_features'],
-            label=data_dictionary['test_labels'],
-            weight=data_dictionary['test_weights']
+            data=data_dictionary["test_features"],
+            label=data_dictionary["test_labels"],
+            weight=data_dictionary["test_weights"],
        )

-        model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
-                            **self.model_training_parameters)
+        model = CatBoostRegressor(
+            verbose=100, early_stopping_rounds=400, **self.model_training_parameters
+        )
        model.fit(X=train_data, eval_set=test_data)

        return model
@@ -103,7 +115,7 @@ class ExamplePredictionModel(IFreqaiModel):
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return: 
+        :return:
        :predictions: np.array of predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
@@ -112,27 +124,31 @@ class ExamplePredictionModel(IFreqaiModel):
        print("--------------------Starting prediction--------------------")

        original_feature_list = self.dh.build_feature_list(self.config)
-        filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
+        filtered_dataframe, _ = self.dh.filter_features(
+            unfiltered_dataframe, original_feature_list, training_filter=False
+        )
        filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
-        self.dh.data_dictionary['prediction_features'] = filtered_dataframe
+        self.dh.data_dictionary["prediction_features"] = filtered_dataframe

-        # optional additional data cleaning 
-        if self.feature_parameters['principal_component_analysis']:
+        # optional additional data cleaning
+        if self.feature_parameters["principal_component_analysis"]:
            pca_components = self.dh.pca.transform(filtered_dataframe)
-            self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
-                              columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
-                              index = filtered_dataframe.index)
-        
+            self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
+                data=pca_components,
+                columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])],
+                index=filtered_dataframe.index,
+            )
+
        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=True) # creates dropped index
+            self.dh.remove_outliers(predict=True)  # creates dropped index

-        if self.feature_parameters['DI_threshold']:
-            self.dh.check_if_pred_in_training_spaces() # sets do_predict
+        if self.feature_parameters["DI_threshold"]:
+            self.dh.check_if_pred_in_training_spaces()  # sets do_predict

-        predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
+        predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])

        # compute the non-standardized predictions
-        predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
+        predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]

        print("--------------------Finished prediction--------------------")

--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -1,61 +1,59 @@
 import logging
+from functools import reduce
+
+import numpy as np
+import pandas as pd
 import talib.abstract as ta
 from pandas import DataFrame
-import pandas as pd
 from technical import qtpylib
-import numpy as np
-from freqtrade.strategy import (merge_informative_pair)
-from freqtrade.strategy.interface import IStrategy
+
 from freqtrade.freqai.strategy_bridge import CustomModel
-from functools import reduce
+from freqtrade.strategy import merge_informative_pair
+from freqtrade.strategy.interface import IStrategy
+
+
 logger = logging.getLogger(__name__)

+
 class FreqaiExampleStrategy(IStrategy):
    """
-    Example strategy showing how the user connects their own 
+    Example strategy showing how the user connects their own
    IFreqaiModel to the strategy. Namely, the user uses:
    self.model = CustomModel(self.config)
    self.model.bridge.start(dataframe, metadata)

-    to make predictions on their data. populate_any_indicators() automatically 
+    to make predictions on their data. populate_any_indicators() automatically
    generates the variety of features indicated by the user in the
    canonical freqtrade configuration file under config['freqai'].
    """

-    minimal_roi = {
-          "0": 0.01,
-          "240": -1
-     }
+    minimal_roi = {"0": 0.01, "240": -1}

    plot_config = {
-        'main_plot': {
+        "main_plot": {},
+        "subplots": {
+            "prediction": {"prediction": {"color": "blue"}},
+            "target_roi": {
+                "target_roi": {"color": "brown"},
+            },
+            "do_predict": {
+                "do_predict": {"color": "brown"},
+            },
        },
-        'subplots': {
-            "prediction":{
-                'prediction':{'color':'blue'}
-            },
-            "target_roi":{
-                'target_roi':{'color':'brown'},
-            },
-            "do_predict":{
-                'do_predict':{'color':'brown'},
-            },
-        }
    }

    stoploss = -0.05
    use_sell_signal = True
-    startup_candle_count: int = 1000 
-
+    startup_candle_count: int = 1000

    def informative_pairs(self):
-        pairs = self.freqai_info['corr_pairlist'] 
+        pairs = self.freqai_info["corr_pairlist"]
        informative_pairs = []
        for tf in self.timeframes:
            informative_pairs.append([(pair, tf) for pair in pairs])
        return informative_pairs

-    def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
+    def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
        """
        Function designed to automatically generate, name and merge features
        from user indicated timeframes in the configuration file. User can add
@@ -70,110 +68,116 @@ class FreqaiExampleStrategy(IStrategy):
        if informative is None:
            informative = self.dp.get_pair_dataframe(pair, tf)

-        informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
-        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
-        informative[coin+'adx'] = ta.ADX(informative, window=20)
+        informative[coin + "rsi"] = ta.RSI(informative, timeperiod=14)
+        informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)
+        informative[coin + "adx"] = ta.ADX(informative, window=20)

-        informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
-        informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
-        informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
-        informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
+        informative[coin + "20sma"] = ta.SMA(informative, timeperiod=20)
+        informative[coin + "21ema"] = ta.EMA(informative, timeperiod=21)
+        informative[coin + "bmsb"] = np.where(
+            informative[coin + "20sma"].lt(informative[coin + "21ema"]), 1, 0
+        )
+        informative[coin + "close_over_20sma"] = informative["close"] / informative[coin + "20sma"]

-        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
+        informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)

-        informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
-        informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
+        informative[coin + "ema21"] = ta.EMA(informative, timeperiod=21)
+        informative[coin + "sma20"] = ta.SMA(informative, timeperiod=20)
        stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
-        informative[coin+'srsi-fk'] = stoch['fastk']
-        informative[coin+'srsi-fd'] = stoch['fastd']
+        informative[coin + "srsi-fk"] = stoch["fastk"]
+        informative[coin + "srsi-fd"] = stoch["fastd"]

        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
-        informative[coin+'bb_lowerband'] = bollinger['lower']
-        informative[coin+'bb_middleband'] = bollinger['mid']
-        informative[coin+'bb_upperband'] = bollinger['upper']
-        informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
-        informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
+        informative[coin + "bb_lowerband"] = bollinger["lower"]
+        informative[coin + "bb_middleband"] = bollinger["mid"]
+        informative[coin + "bb_upperband"] = bollinger["upper"]
+        informative[coin + "bb_width"] = (
+            informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
+        ) / informative[coin + "bb_middleband"]
+        informative[coin + "close-bb_lower"] = (
+            informative["close"] / informative[coin + "bb_lowerband"]
+        )

-        informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
-        informative[coin+'adx'] = ta.ADX(informative, window=14)
+        informative[coin + "roc"] = ta.ROC(informative, timeperiod=3)
+        informative[coin + "adx"] = ta.ADX(informative, window=14)

        macd = ta.MACD(informative)
-        informative[coin+'macd'] = macd['macd']
-        informative[coin+'pct-change'] = informative['close'].pct_change()
-        informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
+        informative[coin + "macd"] = macd["macd"]
+        informative[coin + "pct-change"] = informative["close"].pct_change()
+        informative[coin + "relative_volume"] = (
+            informative["volume"] / informative["volume"].rolling(10).mean()
+        )

-        informative[coin+'pct-change'] = informative['close'].pct_change()
+        informative[coin + "pct-change"] = informative["close"].pct_change()

        indicators = [col for col in informative if col.startswith(coin)]

-        for n in range(self.freqai_info['feature_parameters']['shift']+1):
-            if n==0: continue
+        for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
+            if n == 0:
+                continue
            informative_shift = informative[indicators].shift(n)
-            informative_shift = informative_shift.add_suffix('_shift-'+str(n))
-            informative = pd.concat((informative,informative_shift),axis=1)
+            informative_shift = informative_shift.add_suffix("_shift-" + str(n))
+            informative = pd.concat((informative, informative_shift), axis=1)

-        df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
-        skip_columns = [(s + '_'+tf) for s in
-                        ['date', 'open', 'high', 'low', 'close', 'volume']]
+        df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
+        skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
        df = df.drop(columns=skip_columns)

        return df

-
    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:

        # the configuration file parameters are stored here
-        self.freqai_info = self.config['freqai']
+        self.freqai_info = self.config["freqai"]

        # the model is instantiated here
        self.model = CustomModel(self.config)

-        print('Populating indicators...')
+        print("Populating indicators...")

-        # the following loops are necessary for building the features 
+        # the following loops are necessary for building the features
        # indicated by the user in the configuration file.
-        for tf in self.freqai_info['timeframes']:
-            dataframe = self.populate_any_indicators(metadata['pair'],
-                                                        dataframe.copy(), tf)
-            for i in self.freqai_info['corr_pairlist']:
-                dataframe = self.populate_any_indicators(i,
-                            dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+        for tf in self.freqai_info["timeframes"]:
+            dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
+            for i in self.freqai_info["corr_pairlist"]:
+                dataframe = self.populate_any_indicators(
+                    i, dataframe.copy(), tf, coin=i.split("/")[0] + "-"
+                )

-        # the model will return 4 values, its prediction, an indication of whether or not the prediction 
-        # should be accepted, the target mean/std values from the labels used during each training period.
-        (dataframe['prediction'], dataframe['do_predict'], 
-            dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+        # the model will return 4 values, its prediction, an indication of whether or not the
+        # prediction should be accepted, the target mean/std values from the labels used during
+        # each training period.
+        (
+            dataframe["prediction"],
+            dataframe["do_predict"],
+            dataframe["target_mean"],
+            dataframe["target_std"],
+        ) = self.model.bridge.start(dataframe, metadata)

-        dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
-        dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
+        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5
+        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5
        return dataframe

-
    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:

        buy_conditions = [
-                    (dataframe['prediction'] > dataframe['target_roi'])
-                    &
-                    (dataframe['do_predict'] == 1)
+            (dataframe["prediction"] > dataframe["target_roi"]) & (dataframe["do_predict"] == 1)
        ]

        if buy_conditions:
-            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
+            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1

        return dataframe

-
    def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-       # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
+        # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
        sell_conditions = [
-                    (dataframe['prediction'] < dataframe['sell_roi'])
-                    &
-                    (dataframe['do_predict'] == 1)
+            (dataframe["prediction"] < dataframe["sell_roi"]) & (dataframe["do_predict"] == 1)
        ]
        if sell_conditions:
-            dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
+            dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), "sell"] = 1

        return dataframe

    def get_ticker_indicator(self):
-        return int(self.config['timeframe'][:-1])
+        return int(self.config["timeframe"][:-1])