From 56b17e6f3cc8e8286a582be2bf57db1c59af3725 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 25 Jul 2022 19:40:13 +0200 Subject: [PATCH] allow user to pass test_size = 0 and avoid using eval sets in prediction models --- freqtrade/freqai/data_kitchen.py | 80 +++++++++++-------- .../CatboostPredictionModel.py | 17 ++-- .../CatboostPredictionMultiModel.py | 8 +- .../LightGBMPredictionModel.py | 6 +- 4 files changed, 67 insertions(+), 44 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 5ca64e504..1b0ef7f33 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -243,20 +243,28 @@ class FreqaiDataKitchen: else: stratification = None - ( - train_features, - test_features, - train_labels, - test_labels, - train_weights, - test_weights, - ) = train_test_split( - filtered_dataframe[: filtered_dataframe.shape[0]], - labels, - weights, - stratify=stratification, - **self.config["freqai"]["data_split_parameters"], - ) + if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + ( + train_features, + test_features, + train_labels, + test_labels, + train_weights, + test_weights, + ) = train_test_split( + filtered_dataframe[: filtered_dataframe.shape[0]], + labels, + weights, + stratify=stratification, + **self.config["freqai"]["data_split_parameters"], + ) + else: + test_labels = np.zeros(2) + test_features = pd.DataFrame() + test_weights = np.zeros(2) + train_features = filtered_dataframe + train_labels = labels + train_weights = weights return self.build_data_dictionary( train_features, test_features, train_labels, test_labels, train_weights, test_weights @@ -392,12 +400,13 @@ class FreqaiDataKitchen: / (train_labels_max - train_labels_min) - 1 ) - data_dictionary["test_labels"][item] = ( - 2 - * (data_dictionary["test_labels"][item] - train_labels_min) - / (train_labels_max - train_labels_min) - - 1 - ) + if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + data_dictionary["test_labels"][item] = ( + 2 + * (data_dictionary["test_labels"][item] - train_labels_min) + / (train_labels_max - train_labels_min) + - 1 + ) self.data[f"{item}_max"] = train_labels_max # .to_dict() self.data[f"{item}_min"] = train_labels_min # .to_dict() @@ -555,11 +564,12 @@ class FreqaiDataKitchen: self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list) self.training_features_list = self.data_dictionary["train_features"].columns - self.data_dictionary["test_features"] = pd.DataFrame( - data=test_components, - columns=["PC" + str(i) for i in range(0, n_keep_components)], - index=self.data_dictionary["test_features"].index, - ) + if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + self.data_dictionary["test_features"] = pd.DataFrame( + data=test_components, + columns=["PC" + str(i) for i in range(0, n_keep_components)], + index=self.data_dictionary["test_features"].index, + ) self.data["n_kept_components"] = n_keep_components self.pca = pca2 @@ -652,15 +662,17 @@ class FreqaiDataKitchen: ) # same for test data - y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) - dropped_points = np.where(y_pred == -1, 0, y_pred) - self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ - (y_pred == 1) - ] - self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)] - self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][ - (y_pred == 1) - ] + if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) + dropped_points = np.where(y_pred == -1, 0, y_pred) + self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ + (y_pred == 1) + ] + self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][( + y_pred == 1)] + self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][ + (y_pred == 1) + ] logger.info( f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}" diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index f41760472..56b84c08d 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -28,17 +28,22 @@ class CatboostPredictionModel(BaseRegressionModel): label=data_dictionary["train_labels"], weight=data_dictionary["train_weights"], ) - - test_data = Pool( - data=data_dictionary["test_features"], - label=data_dictionary["test_labels"], - weight=data_dictionary["test_weights"], - ) + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + test_data = None + else: + test_data = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"], + weight=data_dictionary["test_weights"], + ) model = CatBoostRegressor( allow_writing_files=False, **self.model_training_parameters, ) + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + test_data = None model.fit(X=train_data, eval_set=test_data) return model diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py index 17b5e6c68..35a93e808 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py @@ -36,7 +36,9 @@ class CatboostPredictionMultiModel(BaseRegressionModel): model = MultiOutputRegressor(estimator=cbr) model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + train_score = model.score(X, y) + test_score = model.score(*eval_set) + logger.info(f"Train score {train_score}, Test score {test_score}") return model diff --git a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py index 525566cf4..c94bc5698 100644 --- a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py +++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py @@ -25,11 +25,15 @@ class LightGBMPredictionModel(BaseRegressionModel): all the training and test data/labels. """ - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + eval_set = None + else: + eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) X = data_dictionary["train_features"] y = data_dictionary["train_labels"] model = LGBMRegressor(**self.model_training_parameters) + model.fit(X=X, y=y, eval_set=eval_set) return model