throw user error if user tries to load models but feeds the wrong features (while using PCA)

2022-05-28 11:11:41 +02:00 · 2022-05-28 11:11:41 +02:00 · c5a16e91fb
commit c5a16e91fb
parent b8f9c3557b
4 changed files with 19 additions and 6 deletions
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -313,6 +313,8 @@ $$ W_i = \exp(\frac{-i}{\alpha*n}) $$
 where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._
 ![weight-factor](assets/weights_factor.png)
 Finally, `period` defines the offset used for the `labels`. In the present example,
 the user is asking for `labels` that are 24 candles in the future.
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -477,6 +477,11 @@ class FreqaiDataKitchen:
            index=self.data_dictionary["train_features"].index,
        )
        # keeping a copy of the non-transformed features so we can check for errors during
        # model load from disk
        self.data['training_features_list_raw'] = copy.deepcopy(self.training_features_list)
        self.training_features_list = self.data_dictionary["train_features"].columns
        self.data_dictionary["test_features"] = pd.DataFrame(
            data=test_components,
            columns=["PC" + str(i) for i in range(0, n_keep_components)],
@ -563,7 +568,8 @@ class FreqaiDataKitchen:
    def find_features(self, dataframe: DataFrame) -> list:
        column_names = dataframe.columns
        features = [c for c in column_names if '%' in c]
-        assert features, ("Could not find any features!")
+        if not features:
            raise OperationalException("Could not find any features!")
        return features
    def check_if_pred_in_training_spaces(self) -> None:
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -225,7 +225,11 @@ class IFreqaiModel(ABC):
    def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
                                               dh: FreqaiDataKitchen) -> None:
        strategy_provided_features = dh.find_features(dataframe)
-        if strategy_provided_features != dh.training_features_list:
+        if dh.data['training_features_list_raw']:
            feature_list = dh.data['training_features_list_raw']
        else:
            feature_list = dh.training_features_list
        if strategy_provided_features != feature_list:
            raise OperationalException("Trying to access pretrained model with `identifier` "
                                       "but found different features furnished by current strategy."
                                       "Change `identifer` to train from scratch, or ensure the"
@ -254,7 +258,7 @@ class IFreqaiModel(ABC):
        # if self.feature_parameters["remove_outliers"]:
        #     dh.remove_outliers(predict=False)
-    def data_cleaning_predict(self, dh: FreqaiDataKitchen) -> None:
+    def data_cleaning_predict(self, dh: FreqaiDataKitchen, dataframe: DataFrame) -> None:
        """
        Base data cleaning method for predict.
        These functions each modify dh.do_predict, which is a dataframe with equal length
@ -266,7 +270,7 @@ class IFreqaiModel(ABC):
        for buy signals.
        """
        if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
-            dh.pca_transform()
+            dh.pca_transform(dataframe)
        if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
            dh.use_SVM_to_remove_outliers(predict=True)
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@ -71,7 +71,8 @@ class CatboostPredictionModel(IFreqaiModel):
        # optional additional data cleaning/analysis
        self.data_cleaning_train(dh)
-        logger.info(f'Training model on {len(dh.training_features_list)} features')
+        logger.info(f'Training model on {len(dh.data_dictionary["train_features"].columns)}'
                    'features')
        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
        model = self.fit(data_dictionary)
@ -129,7 +130,7 @@ class CatboostPredictionModel(IFreqaiModel):
        dh.data_dictionary["prediction_features"] = filtered_dataframe
        # optional additional data cleaning/analysis
-        self.data_cleaning_predict(dh)
+        self.data_cleaning_predict(dh, filtered_dataframe)
        predictions = self.model.predict(dh.data_dictionary["prediction_features"])