From 1e41c773a067beaf523c7bd3c67717b27fd5e732 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 12:11:29 +0200 Subject: [PATCH] fix outlier protection --- docs/freqai.md | 2 +- freqtrade/freqai/data_kitchen.py | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 78d71e2f8..c0f764953 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -113,7 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Boolean. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Dictionary. | `use_DBSCAN_to_remove_outliers` | Cluster data using DBSCAN to identify and remove outliers from training and prediction data. See details about how it works [here](#removing-outliers-with-dbscan).
**Datatype:** Boolean. -| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `0.3` +| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `30` | | **Data split parameters** | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website).
**Datatype:** Dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** Positive float < 1. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index eb9f6beb7..ed3990de0 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -519,7 +519,7 @@ class FreqaiDataKitchen: """ outlier_protection_pct = self.freqai_config["feature_parameters"].get( "outlier_protection_percentage", 30) - outlier_pct = dropped_pts.sum() / len(dropped_pts) + outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100 if outlier_pct >= outlier_protection_pct: self.svm_model = None return outlier_pct @@ -563,12 +563,12 @@ class FreqaiDataKitchen: self.data_dictionary["train_features"] ) y_pred = self.svm_model.predict(self.data_dictionary["train_features"]) - dropped_points = np.where(y_pred == -1, 0, y_pred) + kept_points = np.where(y_pred == -1, 0, y_pred) # keep_index = np.where(y_pred == 1) - outlier_ptc = self.get_outlier_percentage(dropped_points) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(1 - kept_points) + if outlier_pct: logger.warning( - f"SVM detected > {outlier_ptc}% of the points as outliers." + f"SVM detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return @@ -584,7 +584,7 @@ class FreqaiDataKitchen: ] logger.info( - f"SVM tossed {len(y_pred) - dropped_points.sum()}" + f"SVM tossed {len(y_pred) - kept_points.sum()}" f" train points from {len(y_pred)} total points." ) @@ -593,7 +593,7 @@ class FreqaiDataKitchen: # to reduce code duplication if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0: y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) - dropped_points = np.where(y_pred == -1, 0, y_pred) + kept_points = np.where(y_pred == -1, 0, y_pred) self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ (y_pred == 1) ] @@ -604,7 +604,7 @@ class FreqaiDataKitchen: ] logger.info( - f"SVM tossed {len(y_pred) - dropped_points.sum()}" + f"SVM tossed {len(y_pred) - kept_points.sum()}" f" test points from {len(y_pred)} total points." ) @@ -688,10 +688,10 @@ class FreqaiDataKitchen: self.data['DBSCAN_min_samples'] = MinPts dropped_points = np.where(clustering.labels_ == -1, 1, 0) - outlier_ptc = self.get_outlier_percentage(dropped_points) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(dropped_points) + if outlier_pct: logger.warning( - f"DBSCAN detected > {outlier_ptc}% of the points as outliers." + f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return @@ -751,10 +751,10 @@ class FreqaiDataKitchen: 0, ) - outlier_ptc = self.get_outlier_percentage(1 - do_predict) - if outlier_ptc: + outlier_pct = self.get_outlier_percentage(1 - do_predict) + if outlier_pct: logger.warning( - f"DI detected > {outlier_ptc}% of the points as outliers." + f"DI detected {outlier_pct:.2f}% of the points as outliers. " f"Keeping original dataset." ) return