Merge branch 'develop' of github.com:lolongcovas/freqtrade into feat/freqai

2022-08-24 10:39:32 +02:00
parent 7b8a64e846 a6d78a8615
commit 1c5f2d653c
27 changed files with 203 additions and 91 deletions
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -421,7 +421,7 @@ class FreqaiDataDrawer:
            )

        # if self.live:
-        self.model_dictionary[dk.model_filename] = model
+        self.model_dictionary[coin] = model
        self.pair_dict[coin]["model_filename"] = dk.model_filename
        self.pair_dict[coin]["data_path"] = str(dk.data_path)
        self.save_drawer_to_disk()
@@ -460,8 +460,8 @@ class FreqaiDataDrawer:
        )

        # try to access model in memory instead of loading object from disk to save time
-        if dk.live and dk.model_filename in self.model_dictionary:
-            model = self.model_dictionary[dk.model_filename]
+        if dk.live and coin in self.model_dictionary:
+            model = self.model_dictionary[coin]
        elif not dk.keras:
            model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
        else:
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -601,6 +601,8 @@ class FreqaiDataKitchen:
        is an outlier.
        """

+        from math import cos, sin
+
        if predict:
            train_ft_df = self.data_dictionary['train_features']
            pred_ft_df = self.data_dictionary['prediction_features']
@@ -619,23 +621,47 @@ class FreqaiDataKitchen:

        else:

+            def normalise_distances(distances):
+                normalised_distances = (distances - distances.min()) / \
+                                        (distances.max() - distances.min())
+                return normalised_distances
+
+            def rotate_point(origin, point, angle):
+                # rotate a point counterclockwise by a given angle (in radians)
+                # around a given origin
+                x = origin[0] + cos(angle) * (point[0] - origin[0]) - \
+                                    sin(angle) * (point[1] - origin[1])
+                y = origin[1] + sin(angle) * (point[0] - origin[0]) + \
+                    cos(angle) * (point[1] - origin[1])
+                return (x, y)
+
            MinPts = len(self.data_dictionary['train_features'].columns) * 2
            # measure pairwise distances to train_features.shape[1]*2 nearest neighbours
            neighbors = NearestNeighbors(
                n_neighbors=MinPts, n_jobs=self.thread_count)
            neighbors_fit = neighbors.fit(self.data_dictionary['train_features'])
            distances, _ = neighbors_fit.kneighbors(self.data_dictionary['train_features'])
-            distances = np.sort(distances, axis=0)
-            index_ten_pct = int(len(distances[:, 1]) * 0.1)
-            distances = distances[index_ten_pct:, 1]
-            epsilon = distances[-1]
+            distances = np.sort(distances, axis=0).mean(axis=1)
+
+            normalised_distances = normalise_distances(distances)
+            x_range = np.linspace(0, 1, len(distances))
+            line = np.linspace(normalised_distances[0],
+                               normalised_distances[-1], len(normalised_distances))
+            deflection = np.abs(normalised_distances - line)
+            max_deflection_loc = np.where(deflection == deflection.max())[0][0]
+            origin = x_range[max_deflection_loc], line[max_deflection_loc]
+            point = x_range[max_deflection_loc], normalised_distances[max_deflection_loc]
+            rot_angle = np.pi / 4
+            elbow_loc = rotate_point(origin, point, rot_angle)
+
+            epsilon = elbow_loc[1] * (distances[-1] - distances[0]) + distances[0]

            clustering = DBSCAN(eps=epsilon, min_samples=MinPts,
                                n_jobs=int(self.thread_count)).fit(
                                                    self.data_dictionary['train_features']
                                                )

-            logger.info(f'DBSCAN found eps of {epsilon}.')
+            logger.info(f'DBSCAN found eps of {epsilon:.2f}.')

            self.data['DBSCAN_eps'] = epsilon
            self.data['DBSCAN_min_samples'] = MinPts
@@ -806,7 +832,7 @@ class FreqaiDataKitchen:

        if (len(do_predict) - do_predict.sum()) > 0:
            logger.info(
-                f"DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for "
+                f"DI tossed {len(do_predict) - do_predict.sum()} predictions for "
                "being too far from training data"
            )

@@ -981,13 +1007,6 @@ class FreqaiDataKitchen:
            data_load_timerange.stopts = int(time)
            retrain = True

-        # logger.info(
-        #     f"downloading data for "
-        #     f"{(data_load_timerange.stopts-data_load_timerange.startts)/SECONDS_IN_DAY:.2f} "
-        #     " days. "
-        #     f"Extension of {additional_seconds/SECONDS_IN_DAY:.2f} days"
-        # )
-
        return retrain, trained_timerange, data_load_timerange

    def set_new_model_names(self, pair: str, trained_timerange: TimeRange):
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -82,12 +82,15 @@ class IFreqaiModel(ABC):
        if self.ft_params.get("inlier_metric_window", 0):
            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
        self.pair_it = 0
+        self.pair_it_train = 0
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
        self.last_trade_database_summary: DataFrame = {}
        self.current_trade_database_summary: DataFrame = {}
        self.analysis_lock = Lock()
        self.inference_time: float = 0
+        self.train_time: float = 0
        self.begin_time: float = 0
+        self.begin_time_train: float = 0
        self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])

    def assert_config(self, config: Dict[str, Any]) -> None:
@@ -130,11 +133,20 @@ class IFreqaiModel(ABC):
            dk = self.start_backtesting(dataframe, metadata, self.dk)

        dataframe = dk.remove_features_from_df(dk.return_dataframe)
-        del dk
+        self.clean_up()
        if self.live:
            self.inference_timer('stop')
        return dataframe

+    def clean_up(self):
+        """
+        Objects that should be handled by GC already between coins, but
+        are explicitly shown here to help demonstrate the non-persistence of these
+        objects.
+        """
+        self.model = None
+        self.dk = None
+
    @threaded
    def start_scanning(self, strategy: IStrategy) -> None:
        """
@@ -161,9 +173,11 @@ class IFreqaiModel(ABC):
                dk.set_paths(pair, new_trained_timerange.stopts)

                if retrain:
+                    self.train_timer('start')
                    self.train_model_in_series(
                        new_trained_timerange, pair, strategy, dk, data_load_timerange
                    )
+                    self.train_timer('stop')

            self.dd.save_historic_predictions_to_disk()

@@ -490,8 +504,7 @@ class IFreqaiModel(ABC):
        data_load_timerange: TimeRange,
    ):
        """
-        Retrieve data and train model in single threaded mode (only used if model directory is empty
-        upon startup for dry/live )
+        Retrieve data and train model.
        :param new_trained_timerange: TimeRange = the timerange to train the model on
        :param metadata: dict = strategy provided metadata
        :param strategy: IStrategy = user defined strategy object
@@ -622,6 +635,24 @@ class IFreqaiModel(ABC):
                self.inference_time = 0
        return

+    def train_timer(self, do='start'):
+        """
+        Timer designed to track the cumulative time spent training the full pairlist in
+        FreqAI.
+        """
+        if do == 'start':
+            self.pair_it_train += 1
+            self.begin_time_train = time.time()
+        elif do == 'stop':
+            end = time.time()
+            self.train_time += (end - self.begin_time_train)
+            if self.pair_it_train == self.total_pairs:
+                logger.info(
+                    f'Total time spent training pairlist {self.train_time:.2f} seconds')
+                self.pair_it_train = 0
+                self.train_time = 0
+        return
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.