From 56b17e6f3cc8e8286a582be2bf57db1c59af3725 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Mon, 25 Jul 2022 19:40:13 +0200
Subject: [PATCH] allow user to pass test_size = 0 and avoid using eval sets in
 prediction models

---
 freqtrade/freqai/data_kitchen.py              | 80 +++++++++++--------
 .../CatboostPredictionModel.py                | 17 ++--
 .../CatboostPredictionMultiModel.py           |  8 +-
 .../LightGBMPredictionModel.py                |  6 +-
 4 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
index 5ca64e504..1b0ef7f33 100644
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -243,20 +243,28 @@ class FreqaiDataKitchen:
         else:
             stratification = None
 
-        (
-            train_features,
-            test_features,
-            train_labels,
-            test_labels,
-            train_weights,
-            test_weights,
-        ) = train_test_split(
-            filtered_dataframe[: filtered_dataframe.shape[0]],
-            labels,
-            weights,
-            stratify=stratification,
-            **self.config["freqai"]["data_split_parameters"],
-        )
+        if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            (
+                train_features,
+                test_features,
+                train_labels,
+                test_labels,
+                train_weights,
+                test_weights,
+            ) = train_test_split(
+                filtered_dataframe[: filtered_dataframe.shape[0]],
+                labels,
+                weights,
+                stratify=stratification,
+                **self.config["freqai"]["data_split_parameters"],
+            )
+        else:
+            test_labels = np.zeros(2)
+            test_features = pd.DataFrame()
+            test_weights = np.zeros(2)
+            train_features = filtered_dataframe
+            train_labels = labels
+            train_weights = weights
 
         return self.build_data_dictionary(
             train_features, test_features, train_labels, test_labels, train_weights, test_weights
@@ -392,12 +400,13 @@ class FreqaiDataKitchen:
                 / (train_labels_max - train_labels_min)
                 - 1
             )
-            data_dictionary["test_labels"][item] = (
-                2
-                * (data_dictionary["test_labels"][item] - train_labels_min)
-                / (train_labels_max - train_labels_min)
-                - 1
-            )
+            if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+                data_dictionary["test_labels"][item] = (
+                    2
+                    * (data_dictionary["test_labels"][item] - train_labels_min)
+                    / (train_labels_max - train_labels_min)
+                    - 1
+                )
 
             self.data[f"{item}_max"] = train_labels_max  # .to_dict()
             self.data[f"{item}_min"] = train_labels_min  # .to_dict()
@@ -555,11 +564,12 @@ class FreqaiDataKitchen:
         self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
         self.training_features_list = self.data_dictionary["train_features"].columns
 
-        self.data_dictionary["test_features"] = pd.DataFrame(
-            data=test_components,
-            columns=["PC" + str(i) for i in range(0, n_keep_components)],
-            index=self.data_dictionary["test_features"].index,
-        )
+        if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            self.data_dictionary["test_features"] = pd.DataFrame(
+                data=test_components,
+                columns=["PC" + str(i) for i in range(0, n_keep_components)],
+                index=self.data_dictionary["test_features"].index,
+            )
 
         self.data["n_kept_components"] = n_keep_components
         self.pca = pca2
@@ -652,15 +662,17 @@ class FreqaiDataKitchen:
             )
 
             # same for test data
-            y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
-            dropped_points = np.where(y_pred == -1, 0, y_pred)
-            self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-                (y_pred == 1)
-            ]
-            self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
-            self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-                (y_pred == 1)
-            ]
+            if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+                y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
+                dropped_points = np.where(y_pred == -1, 0, y_pred)
+                self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+                    (y_pred == 1)
+                ]
+                self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
+                    y_pred == 1)]
+                self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+                    (y_pred == 1)
+                ]
 
             logger.info(
                 f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
index f41760472..56b84c08d 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -28,17 +28,22 @@ class CatboostPredictionModel(BaseRegressionModel):
             label=data_dictionary["train_labels"],
             weight=data_dictionary["train_weights"],
         )
-
-        test_data = Pool(
-            data=data_dictionary["test_features"],
-            label=data_dictionary["test_labels"],
-            weight=data_dictionary["test_weights"],
-        )
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            test_data = None
+        else:
+            test_data = Pool(
+                data=data_dictionary["test_features"],
+                label=data_dictionary["test_labels"],
+                weight=data_dictionary["test_weights"],
+            )
 
         model = CatBoostRegressor(
             allow_writing_files=False,
             **self.model_training_parameters,
         )
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            test_data = None
         model.fit(X=train_data, eval_set=test_data)
 
         return model
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
index 17b5e6c68..35a93e808 100644
--- a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
@@ -36,7 +36,9 @@ class CatboostPredictionMultiModel(BaseRegressionModel):
 
         model = MultiOutputRegressor(estimator=cbr)
         model.fit(X=X, y=y, sample_weight=sample_weight)  # , eval_set=eval_set)
-        train_score = model.score(X, y)
-        test_score = model.score(*eval_set)
-        logger.info(f"Train score {train_score}, Test score {test_score}")
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
+            train_score = model.score(X, y)
+            test_score = model.score(*eval_set)
+            logger.info(f"Train score {train_score}, Test score {test_score}")
         return model
diff --git a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
index 525566cf4..c94bc5698 100644
--- a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
@@ -25,11 +25,15 @@ class LightGBMPredictionModel(BaseRegressionModel):
                                 all the training and test data/labels.
         """
 
-        eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            eval_set = None
+        else:
+            eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
         X = data_dictionary["train_features"]
         y = data_dictionary["train_labels"]
 
         model = LGBMRegressor(**self.model_training_parameters)
+
         model.fit(X=X, y=y, eval_set=eval_set)
 
         return model