allow user to pass test_size = 0 and avoid using eval sets in prediction models

2022-07-25 19:40:13 +02:00 · 2022-07-25 19:40:13 +02:00 · 56b17e6f3c
commit 56b17e6f3c
parent 55cf378ec2
4 changed files with 67 additions and 44 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -243,20 +243,28 @@ class FreqaiDataKitchen:
        else:
            stratification = None

-        (
-            train_features,
-            test_features,
-            train_labels,
-            test_labels,
-            train_weights,
-            test_weights,
-        ) = train_test_split(
-            filtered_dataframe[: filtered_dataframe.shape[0]],
-            labels,
-            weights,
-            stratify=stratification,
-            **self.config["freqai"]["data_split_parameters"],
-        )
+        if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (
+                train_features,
+                test_features,
+                train_labels,
+                test_labels,
+                train_weights,
+                test_weights,
+            ) = train_test_split(
+                filtered_dataframe[: filtered_dataframe.shape[0]],
+                labels,
+                weights,
+                stratify=stratification,
+                **self.config["freqai"]["data_split_parameters"],
+            )
+        else:
+            test_labels = np.zeros(2)
+            test_features = pd.DataFrame()
+            test_weights = np.zeros(2)
+            train_features = filtered_dataframe
+            train_labels = labels
+            train_weights = weights

        return self.build_data_dictionary(
            train_features, test_features, train_labels, test_labels, train_weights, test_weights
@ -392,12 +400,13 @@ class FreqaiDataKitchen:
                / (train_labels_max - train_labels_min)
                - 1
            )
-            data_dictionary["test_labels"][item] = (
-                2
-                * (data_dictionary["test_labels"][item] - train_labels_min)
-                / (train_labels_max - train_labels_min)
-                - 1
-            )
+            if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+                data_dictionary["test_labels"][item] = (
+                    2
+                    * (data_dictionary["test_labels"][item] - train_labels_min)
+                    / (train_labels_max - train_labels_min)
+                    - 1
+                )

            self.data[f"{item}_max"] = train_labels_max  # .to_dict()
            self.data[f"{item}_min"] = train_labels_min  # .to_dict()
@ -555,11 +564,12 @@ class FreqaiDataKitchen:
        self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
        self.training_features_list = self.data_dictionary["train_features"].columns

-        self.data_dictionary["test_features"] = pd.DataFrame(
-            data=test_components,
-            columns=["PC" + str(i) for i in range(0, n_keep_components)],
-            index=self.data_dictionary["test_features"].index,
-        )
+        if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            self.data_dictionary["test_features"] = pd.DataFrame(
+                data=test_components,
+                columns=["PC" + str(i) for i in range(0, n_keep_components)],
+                index=self.data_dictionary["test_features"].index,
+            )

        self.data["n_kept_components"] = n_keep_components
        self.pca = pca2
@ -652,15 +662,17 @@ class FreqaiDataKitchen:
            )

            # same for test data
-            y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
-            dropped_points = np.where(y_pred == -1, 0, y_pred)
-            self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-                (y_pred == 1)
-            ]
-            self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
-            self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-                (y_pred == 1)
-            ]
+            if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+                y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
+                dropped_points = np.where(y_pred == -1, 0, y_pred)
+                self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+                    (y_pred == 1)
+                ]
+                self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
+                    y_pred == 1)]
+                self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+                    (y_pred == 1)
+                ]

            logger.info(
                f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@ -28,17 +28,22 @@ class CatboostPredictionModel(BaseRegressionModel):
            label=data_dictionary["train_labels"],
            weight=data_dictionary["train_weights"],
        )
-
-        test_data = Pool(
-            data=data_dictionary["test_features"],
-            label=data_dictionary["test_labels"],
-            weight=data_dictionary["test_weights"],
-        )
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            test_data = None
+        else:
+            test_data = Pool(
+                data=data_dictionary["test_features"],
+                label=data_dictionary["test_labels"],
+                weight=data_dictionary["test_weights"],
+            )

        model = CatBoostRegressor(
            allow_writing_files=False,
            **self.model_training_parameters,
        )
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            test_data = None
        model.fit(X=train_data, eval_set=test_data)

        return model
--- a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
@ -36,7 +36,9 @@ class CatboostPredictionMultiModel(BaseRegressionModel):

        model = MultiOutputRegressor(estimator=cbr)
        model.fit(X=X, y=y, sample_weight=sample_weight)  # , eval_set=eval_set)
-        train_score = model.score(X, y)
-        test_score = model.score(*eval_set)
-        logger.info(f"Train score {train_score}, Test score {test_score}")
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            train_score = model.score(X, y)
+            test_score = model.score(*eval_set)
+            logger.info(f"Train score {train_score}, Test score {test_score}")
        return model
--- a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
@ -25,11 +25,15 @@ class LightGBMPredictionModel(BaseRegressionModel):
                                all the training and test data/labels.
        """

-        eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            eval_set = None
+        else:
+            eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
        X = data_dictionary["train_features"]
        y = data_dictionary["train_labels"]

        model = LGBMRegressor(**self.model_training_parameters)
+
        model.fit(X=X, y=y, eval_set=eval_set)

        return model