Add outlier percentage check
This commit is contained in:
parent
b2d664c63c
commit
86c5ac44e4
@ -513,6 +513,19 @@ class FreqaiDataKitchen:
|
||||
|
||||
return avg_mean_dist
|
||||
|
||||
def get_outlier_percentage(self, dropped_pts: npt.ArrayLike) -> float:
|
||||
"""
|
||||
Check if more than X% of points werer dropped during outlier detection.
|
||||
"""
|
||||
outlier_protection_pct = self.freqai_config["feature_parameters"].get(
|
||||
"outlier_protection_percentage", 30)
|
||||
outlier_pct = dropped_pts.sum() / len(dropped_pts)
|
||||
if outlier_pct >= outlier_protection_pct:
|
||||
self.svm_model = None
|
||||
return outlier_pct
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
def use_SVM_to_remove_outliers(self, predict: bool) -> None:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
@ -552,6 +565,14 @@ class FreqaiDataKitchen:
|
||||
y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
|
||||
dropped_points = np.where(y_pred == -1, 0, y_pred)
|
||||
# keep_index = np.where(y_pred == 1)
|
||||
outlier_ptc = self.get_outlier_percentage(dropped_points)
|
||||
if outlier_ptc:
|
||||
logger.warning(
|
||||
f"SVM detected >{outlier_ptc}% of the points as outliers."
|
||||
f"Keeping original dataset."
|
||||
)
|
||||
return
|
||||
|
||||
self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
@ -667,6 +688,14 @@ class FreqaiDataKitchen:
|
||||
self.data['DBSCAN_min_samples'] = MinPts
|
||||
dropped_points = np.where(clustering.labels_ == -1, 1, 0)
|
||||
|
||||
outlier_ptc = self.get_outlier_percentage(dropped_points)
|
||||
if outlier_ptc:
|
||||
logger.warning(
|
||||
f"DBSCAN detected >{outlier_ptc}% of the points as outliers."
|
||||
f"Keeping original dataset."
|
||||
)
|
||||
return
|
||||
|
||||
self.data_dictionary['train_features'] = self.data_dictionary['train_features'][
|
||||
(clustering.labels_ != -1)
|
||||
]
|
||||
@ -722,6 +751,14 @@ class FreqaiDataKitchen:
|
||||
0,
|
||||
)
|
||||
|
||||
outlier_ptc = self.get_outlier_percentage(1 - do_predict)
|
||||
if outlier_ptc:
|
||||
logger.warning(
|
||||
f"DBSCAN detected >{outlier_ptc}% of the points as outliers."
|
||||
f"Keeping original dataset."
|
||||
)
|
||||
return
|
||||
|
||||
if (len(do_predict) - do_predict.sum()) > 0:
|
||||
logger.info(
|
||||
f"DI tossed {len(do_predict) - do_predict.sum()} predictions for "
|
||||
|
Loading…
Reference in New Issue
Block a user