From 51a6b4289ff303ab6a6963baf4d8ca4660299031 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Thu, 4 Aug 2022 17:41:58 +0200 Subject: [PATCH] improve DBSCAN performance for subsequent trainings --- freqtrade/freqai/data_drawer.py | 1 + freqtrade/freqai/data_kitchen.py | 19 ++++++++++--------- freqtrade/freqai/freqai_interface.py | 7 ++++++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 42338966d..9a17797ff 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -76,6 +76,7 @@ class FreqaiDataDrawer: self.load_historic_predictions_from_disk() self.training_queue: Dict[str, int] = {} self.history_lock = threading.Lock() + self.old_DBSCAN_eps: Dict[str, float] = {} def load_drawer_from_disk(self): """ diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 823cf2a55..1dfae3ef1 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -582,7 +582,7 @@ class FreqaiDataKitchen: return - def use_DBSCAN_to_remove_outliers(self, predict: bool) -> None: + def use_DBSCAN_to_remove_outliers(self, predict: bool, eps=None) -> None: """ Use DBSCAN to cluster training data and remove "noisy" data (read outliers). User controls this via the config param `DBSCAN_outlier_pct` which indicates the @@ -615,10 +615,10 @@ class FreqaiDataKitchen: else: outlier_target = self.freqai_config['feature_parameters'].get('DBSCAN_outlier_pct') - if 'DBSCAN_eps' in self.data: - eps = self.data['DBSCAN_eps'] + if eps: + epsilon = eps else: - eps = 10 + epsilon = 10 logger.info('DBSCAN starting from high value. This should be faster next train.') error = 1. @@ -628,7 +628,7 @@ class FreqaiDataKitchen: # find optimal value for epsilon using an iterative approach: while abs(np.sqrt(error)) > 0.1: - clustering = DBSCAN(eps=eps, min_samples=MinPts, + clustering = DBSCAN(eps=epsilon, min_samples=MinPts, n_jobs=int(self.thread_count / 2)).fit( self.data_dictionary['train_features'] ) @@ -637,13 +637,14 @@ class FreqaiDataKitchen: multiplier = (outlier_pct - outlier_target) if outlier_pct > 0 else 1 * \ np.sign(outlier_pct - outlier_target) multiplier = 1 + error * multiplier - eps = multiplier * eps + epsilon = multiplier * epsilon logger.info( - f'DBSCAN error {error:.2f} for eps {eps:.2f} and outliet pct {outlier_pct:.2f}') + f'DBSCAN error {error:.2f} for eps {epsilon:.2f}' + f' and outlier pct {outlier_pct:.2f}') - logger.info(f'DBSCAN found eps of {eps}.') + logger.info(f'DBSCAN found eps of {epsilon}.') - self.data['DBSCAN_eps'] = eps + self.data['DBSCAN_eps'] = epsilon self.data['DBSCAN_min_samples'] = MinPts dropped_points = np.where(clustering.labels_ == -1, 1, 0) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 078a574b7..3978fc7b2 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -385,7 +385,12 @@ class IFreqaiModel(ABC): dk.data["avg_mean_dist"] = dk.compute_distances() if self.freqai_info["feature_parameters"].get("DBSCAN_outlier_pct", 0): - dk.use_DBSCAN_to_remove_outliers(predict=False) + if dk.pair in self.dd.old_DBSCAN_eps: + eps = self.dd.old_DBSCAN_eps[dk.pair] + else: + eps = None + dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps) + self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps'] def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: """