Fix bug in DI (#7321)

This commit is contained in:
th0rntwig 2022-08-30 12:54:39 +02:00 committed by GitHub
parent 71846ecbf2
commit 50e2808667
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -529,7 +529,6 @@ class FreqaiDataKitchen:
"outlier_protection_percentage", 30) "outlier_protection_percentage", 30)
outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100 outlier_pct = (dropped_pts.sum() / len(dropped_pts)) * 100
if outlier_pct >= outlier_protection_pct: if outlier_pct >= outlier_protection_pct:
self.svm_model = None
return outlier_pct return outlier_pct
else: else:
return 0.0 return 0.0
@ -579,6 +578,7 @@ class FreqaiDataKitchen:
f"SVM detected {outlier_pct:.2f}% of the points as outliers. " f"SVM detected {outlier_pct:.2f}% of the points as outliers. "
f"Keeping original dataset." f"Keeping original dataset."
) )
self.svm_model = None
return return
self.data_dictionary["train_features"] = self.data_dictionary["train_features"][ self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
@ -633,6 +633,8 @@ class FreqaiDataKitchen:
from math import cos, sin from math import cos, sin
if predict: if predict:
if not self.data['DBSCAN_eps']:
return
train_ft_df = self.data_dictionary['train_features'] train_ft_df = self.data_dictionary['train_features']
pred_ft_df = self.data_dictionary['prediction_features'] pred_ft_df = self.data_dictionary['prediction_features']
num_preds = len(pred_ft_df) num_preds = len(pred_ft_df)
@ -702,6 +704,7 @@ class FreqaiDataKitchen:
f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. " f"DBSCAN detected {outlier_pct:.2f}% of the points as outliers. "
f"Keeping original dataset." f"Keeping original dataset."
) )
self.data['DBSCAN_eps'] = 0
return return
self.data_dictionary['train_features'] = self.data_dictionary['train_features'][ self.data_dictionary['train_features'] = self.data_dictionary['train_features'][
@ -759,18 +762,10 @@ class FreqaiDataKitchen:
0, 0,
) )
outlier_pct = self.get_outlier_percentage(1 - do_predict)
if outlier_pct:
logger.warning(
f"DI detected {outlier_pct:.2f}% of the points as outliers. "
f"Keeping original dataset."
)
return
if (len(do_predict) - do_predict.sum()) > 0: if (len(do_predict) - do_predict.sum()) > 0:
logger.info( logger.info(
f"DI tossed {len(do_predict) - do_predict.sum()} predictions for " f"DI tossed {len(do_predict) - do_predict.sum()} predictions for "
"being too far from training data" "being too far from training data."
) )
self.do_predict += do_predict self.do_predict += do_predict