Fix constant PCA

2022-10-15 23:30:12 +02:00
parent 62ca822597
commit 20fc521771
1 changed files with 12 additions and 5 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -206,11 +206,15 @@ class FreqaiDataKitchen:
        drop_index = pd.isnull(filtered_df).any(axis=1)  # get the rows that have NaNs,
        drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
        ft_params = self.freqai_config["feature_parameters"]
        if (training_filter):
-            const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
+            if not ft_params.get(
-            if const_cols:
+                "principal_component_analysis", False
-                filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
+            ):
-                logger.warning(f"Removed features {const_cols} with constant values.")
+                const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
                if const_cols:
                    filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
                    logger.warning(f"Removed features {const_cols} with constant values.")
            # we don't care about total row number (total no. datapoints) in training, we only care
            # about removing any row with NaNs
            # if labels has multiple columns (user wants to train multiple modelEs), we detect here
@@ -241,7 +245,10 @@ class FreqaiDataKitchen:
            self.data["filter_drop_index_training"] = drop_index
        else:
-            filtered_df = self.check_pred_labels(filtered_df)
+            if not ft_params.get(
                "principal_component_analysis", False
            ):
                filtered_df = self.check_pred_labels(filtered_df)
            # we are backtesting so we need to preserve row number to send back to strategy,
            # so now we use do_predict to avoid any prediction based on a NaN
            drop_index = pd.isnull(filtered_df).any(axis=1)