fix bug for target_mean/std array merging in backtesting

2022-05-26 21:07:50 +02:00
parent ff531c416f
commit 6193205012
6 changed files with 186 additions and 110 deletions
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@@ -1,7 +1,7 @@
 {
    "max_open_trades": 1,
    "stake_currency": "USDT",
-    "stake_amount": 800,
+    "stake_amount": 900,
    "tradable_balance_ratio": 1,
    "fiat_display_currency": "USD",
    "dry_run": true,
@@ -24,8 +24,7 @@
            "rateLimit": 200
        },
        "pair_whitelist": [
-            "BTC/USDT",
+            "BTC/USDT"
            "ETH/USDT"
        ],
        "pair_blacklist": []
    },
@@ -55,7 +54,7 @@
            "15m"
        ],
        "train_period": 30,
-        "backtest_period": 10,
+        "backtest_period": 7,
        "identifier": "example",
        "live_trained_timestamp": 0,
        "corr_pairlist": [
@@ -64,16 +63,16 @@
            "DOT/USDT"
        ],
        "feature_parameters": {
-            "period": 12,
+            "period": 24,
            "shift": 1,
-            "DI_threshold": 1,
+            "DI_threshold": 0,
-            "weight_factor": 0,
+            "weight_factor": 0.9,
            "principal_component_analysis": false,
-            "use_SVM_to_remove_outliers": false,
+            "use_SVM_to_remove_outliers": true,
-            "stratify": 0
+            "stratify": 3
        },
        "data_split_parameters": {
-            "test_size": 0.25,
+            "test_size": 0.33,
            "random_state": 1
        },
        "model_training_parameters": {
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -221,19 +221,29 @@ This way, the user can return to using any model they wish by simply changing th
 ### Building a freqai strategy
-The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()`
+The Freqai strategy requires the user to include the following lines of code in the strategy:
 ```python
    from freqtrade.freqai.strategy_bridge import CustomModel
-        def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+    def informative_pairs(self):
-                # the configuration file parameters are stored here
+        whitelist_pairs = self.dp.current_whitelist()
-                self.freqai_info = self.config['freqai']
+        corr_pairs = self.config["freqai"]["corr_pairlist"]
        informative_pairs = []
        for tf in self.config["freqai"]["timeframes"]:
            for pair in whitelist_pairs:
                informative_pairs.append((pair, tf))
            for pair in corr_pairs:
                if pair in whitelist_pairs:
                    continue  # avoid duplication
                informative_pairs.append((pair, tf))
        return informative_pairs
-                # the model is instantiated here
+    def bot_start(self):
        self.model = CustomModel(self.config)
-                print('Populating indicators...')
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
            self.freqai_info = self.config['freqai']
            # the following loops are necessary for building the features 
            # indicated by the user in the configuration file.
@@ -314,7 +324,7 @@ data point and all other training data points:
 $$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
-where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$
+where $d_{ab}$ is the distance between the normalized points $a$ and $b$. $p$
 is the number of features i.e. the length of the vector $X$. The
 characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
 of the average distances:
@@ -394,11 +404,61 @@ which will split the data chronolocially so that every X data points is a testin
 present example, the user is asking for every third data point in the dataframe to be used for 
 testing, the other points are used for training. 
 <!-- ## Dynamic target expectation
 The labels used for model training have a unique statistical distribution for each separate model training. 
 We can use this information to know if our current prediction is in the realm of what the model was trained on, 
 and if so, what is the statistical probability of the current prediction. With this information, we can
 make more informed prediction._
 FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
 dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
 ```json
    "freqai": {
        "feature_parameters" : {
            "target_quantile": 0.9
        }
    }
 ```
 Means the user will get back in the strategy the label threshold at which 90% of the labels were 
 below this value. An example usage in the strategy may look something like:
 ```python
    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        # ... #
        (
            dataframe["prediction"],
            dataframe["do_predict"],
            dataframe["target_upper_quantile"],
            dataframe["target_lower_quantile"],
        ) = self.model.bridge.start(dataframe, metadata, self)
        return dataframe
    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
        buy_conditions = [
            (dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
        ]
        if buy_conditions:
            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
        return dataframe
 ``` -->
 ## Additional information
-### Feature standardization
+### Feature normalization
-The feature set created by the user is automatically standardized to the training
+The feature set created by the user is automatically normalized to the training
 data only. This includes all test data and unseen prediction data (dry/live/backtest).
 ### File structure
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -141,7 +141,7 @@ class FreqaiDataKitchen:
        :model: User trained model which can be inferenced for new predictions
        """
-        # if self.live:
+        if self.live:
            self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
            self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
@@ -329,42 +329,6 @@ class FreqaiDataKitchen:
        :data_dictionary: updated dictionary with standardized values.
        """
        # standardize the data by training stats
        train_mean = data_dictionary["train_features"].mean()
        train_std = data_dictionary["train_features"].std()
        data_dictionary["train_features"] = (
            data_dictionary["train_features"] - train_mean
        ) / train_std
        data_dictionary["test_features"] = (
            data_dictionary["test_features"] - train_mean
        ) / train_std
        train_labels_std = data_dictionary["train_labels"].std()
        train_labels_mean = data_dictionary["train_labels"].mean()
        data_dictionary["train_labels"] = (
            data_dictionary["train_labels"] - train_labels_mean
        ) / train_labels_std
        data_dictionary["test_labels"] = (
            data_dictionary["test_labels"] - train_labels_mean
        ) / train_labels_std
        for item in train_std.keys():
            self.data[item + "_std"] = train_std[item]
            self.data[item + "_mean"] = train_mean[item]
        self.data["labels_std"] = train_labels_std
        self.data["labels_mean"] = train_labels_mean
        return data_dictionary
    def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
        """
        Standardize all data in the data_dictionary according to the training dataset
        :params:
        :data_dictionary: dictionary containing the cleaned and split training/test data/labels
        :returns:
        :data_dictionary: updated dictionary with standardized values.
        """
        # standardize the data by training stats
        train_max = data_dictionary["train_features"].max()
        train_min = data_dictionary["train_features"].min()
        data_dictionary["train_features"] = 2 * (
@@ -392,9 +356,9 @@ class FreqaiDataKitchen:
        return data_dictionary
-    def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+    def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
        """
-        Standardizes a set of data using the mean and standard deviation from
+        Normalize a set of data using the mean and standard deviation from
        the associated training data.
        :params:
        :df: Dataframe to be standardized
@@ -406,19 +370,6 @@ class FreqaiDataKitchen:
        return df
    def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
        """
        Normalizes a set of data using the mean and standard deviation from
        the associated training data.
        :params:
        :df: Dataframe to be standardized
        """
        for item in df.keys():
            df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
        return df
    def split_timerange(
        self, tr: str, train_split: int = 28, bt_split: int = 7
    ) -> Tuple[list, list]:
@@ -657,12 +608,12 @@ class FreqaiDataKitchen:
        """
        ones = np.ones(len_dataframe)
-        s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"]
+        target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
        self.full_predictions = np.append(self.full_predictions, predictions)
        self.full_do_predict = np.append(self.full_do_predict, do_predict)
-        self.full_target_mean = np.append(self.full_target_mean, s_mean)
+        self.full_target_mean = np.append(self.full_target_mean, target_mean)
-        self.full_target_std = np.append(self.full_target_std, s_std)
+        self.full_target_std = np.append(self.full_target_std, target_std)
        return
@@ -827,6 +778,23 @@ class FreqaiDataKitchen:
        return dataframe
    def fit_labels(self) -> None:
        import scipy as spy
        f = spy.stats.norm.fit(self.data_dictionary["train_labels"])
        # KEEPME incase we want to let user start to grab quantiles.
        # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
        #                                                   'target_quantile'], *f)
        # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
        #                                                       'target_quantile'], *f)
        self.data["target_mean"], self.data["target_std"] = f[0], f[1]
        # self.data["upper_quantile"] = upper_q
        # self.data["lower_quantile"] = lower_q
        return
    def np_encoder(self, object):
        if isinstance(object, np.generic):
            return object.item()
@@ -968,3 +936,52 @@ class FreqaiDataKitchen:
    #         )
    #     return
    # def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
    #     """
    #     standardize all data in the data_dictionary according to the training dataset
    #     :params:
    #     :data_dictionary: dictionary containing the cleaned and split training/test data/labels
    #     :returns:
    #     :data_dictionary: updated dictionary with standardized values.
    #     """
    #     # standardize the data by training stats
    #     train_mean = data_dictionary["train_features"].mean()
    #     train_std = data_dictionary["train_features"].std()
    #     data_dictionary["train_features"] = (
    #         data_dictionary["train_features"] - train_mean
    #     ) / train_std
    #     data_dictionary["test_features"] = (
    #         data_dictionary["test_features"] - train_mean
    #     ) / train_std
    #     train_labels_std = data_dictionary["train_labels"].std()
    #     train_labels_mean = data_dictionary["train_labels"].mean()
    #     data_dictionary["train_labels"] = (
    #         data_dictionary["train_labels"] - train_labels_mean
    #     ) / train_labels_std
    #     data_dictionary["test_labels"] = (
    #         data_dictionary["test_labels"] - train_labels_mean
    #     ) / train_labels_std
    #     for item in train_std.keys():
    #         self.data[item + "_std"] = train_std[item]
    #         self.data[item + "_mean"] = train_mean[item]
    #     self.data["labels_std"] = train_labels_std
    #     self.data["labels_mean"] = train_labels_mean
    #     return data_dictionary
    # def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
    # """
    # Normalizes a set of data using the mean and standard deviation from
    # the associated training data.
    # :params:
    # :df: Dataframe to be standardized
    # """
    # for item in df.keys():
    #     df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
    # return df
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -158,12 +158,7 @@ class IFreqaiModel(ABC):
            else:
                self.model = dh.load_data(metadata['pair'])
-                # strategy_provided_features = self.dh.find_features(dataframe_train)
+            self.check_if_feature_list_matches_strategy(dataframe_train, dh)
                # # FIXME doesnt work with PCA
                # if strategy_provided_features != self.dh.training_features_list:
                #     logger.info("User changed input features, retraining model.")
                #     self.model = self.train(dataframe_train, metadata)
                #     self.dh.save_data(self.model)
            preds, do_preds = self.predict(dataframe_backtest, dh)
@@ -220,16 +215,23 @@ class IFreqaiModel(ABC):
        self.model = dh.load_data(coin=metadata['pair'])
-        # FIXME
+        self.check_if_feature_list_matches_strategy(dataframe, dh)
        # strategy_provided_features = dh.find_features(dataframe)
        # if strategy_provided_features != dh.training_features_list:
        #     self.train_model_in_series(new_trained_timerange, metadata, strategy)
        preds, do_preds = self.predict(dataframe, dh)
        dh.append_predictions(preds, do_preds, len(dataframe))
        return dh
    def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
                                               dh: FreqaiDataKitchen) -> None:
        strategy_provided_features = dh.find_features(dataframe)
        if strategy_provided_features != dh.training_features_list:
            raise OperationalException("Trying to access pretrained model with `identifier` "
                                       "but found different features furnished by current strategy."
                                       "Change `identifer` to train from scratch, or ensure the"
                                       "strategy is furnishing the same features as the pretrained"
                                       "model")
    def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None:
        """
        Base data cleaning method for train
@@ -237,6 +239,7 @@ class IFreqaiModel(ABC):
        based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
        of how outlier data points are dropped from the dataframe used for training.
        """
        if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
            dh.principal_component_analysis()
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -33,10 +33,6 @@ class CatboostPredictionModel(IFreqaiModel):
            / dataframe["close"]
            - 1
        )
        dh.data["s_mean"] = dataframe["s"].mean()
        dh.data["s_std"] = dataframe["s"].std()
        # logger.info("label mean", dh.data["s_mean"], "label std", dh.data["s_std"])
        return dataframe["s"]
@@ -68,8 +64,9 @@ class CatboostPredictionModel(IFreqaiModel):
        # split data into train/test data.
        data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered)
-        # standardize all data based on train_dataset only
+        dh.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
-        data_dictionary = dh.standardize_data(data_dictionary)
+        # normalize all data based on train_dataset only
        data_dictionary = dh.normalize_data(data_dictionary)
        # optional additional data cleaning/analysis
        self.data_cleaning_train(dh)
@@ -128,7 +125,7 @@ class CatboostPredictionModel(IFreqaiModel):
        filtered_dataframe, _ = dh.filter_features(
            unfiltered_dataframe, original_feature_list, training_filter=False
        )
-        filtered_dataframe = dh.standardize_data_from_metadata(filtered_dataframe)
+        filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe)
        dh.data_dictionary["prediction_features"] = filtered_dataframe
        # optional additional data cleaning/analysis
@@ -136,7 +133,7 @@ class CatboostPredictionModel(IFreqaiModel):
        predictions = self.model.predict(dh.data_dictionary["prediction_features"])
-        # compute the non-standardized predictions
+        # compute the non-normalized predictions
        dh.predictions = (predictions + 1) * (dh.data["labels_max"] -
                                              dh.data["labels_min"]) / 2 + dh.data["labels_min"]
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy):
            dataframe["target_std"],
        ) = self.model.bridge.start(dataframe, metadata, self)
-        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5
+        dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
-        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1
+        dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]
        return dataframe
    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: