detect if upper tf candles are new or not, append if so. Correct the epoch for candle update check

2022-06-07 19:49:20 +02:00 · 2022-06-07 19:49:20 +02:00 · 15d049cffe
commit 15d049cffe
parent cab8f517b4
3 changed files with 44 additions and 36 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -297,7 +297,7 @@ class FreqaiDataKitchen:
            )
            if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
                logger.warning(
-                    f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100} percent'
+                    f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent'
                    ' of training data dropped due to NaNs, model may perform inconsistent'
                    'with expectations'
                )
@ -538,9 +538,10 @@ class FreqaiDataKitchen:
        for prediction confidence in the Dissimilarity Index
        """
        logger.info("computing average mean distance for all training points")
-        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
+        tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1)
+        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
        avg_mean_dist = pairwise.mean(axis=1).mean()
-        logger.info("avg_mean_dist %s", avg_mean_dist)
+        logger.info(f'avg_mean_dist {avg_mean_dist:.2f}')

        return avg_mean_dist

@ -668,7 +669,8 @@ class FreqaiDataKitchen:

        self.full_predictions = np.append(self.full_predictions, predictions)
        self.full_do_predict = np.append(self.full_do_predict, do_predict)
-        self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
+        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+            self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
        self.full_target_mean = np.append(self.full_target_mean, target_mean)
        self.full_target_std = np.append(self.full_target_std, target_std)

@ -683,7 +685,8 @@ class FreqaiDataKitchen:
        filler = np.zeros(len_dataframe - len(self.full_predictions))  # startup_candle_count
        self.full_predictions = np.append(filler, self.full_predictions)
        self.full_do_predict = np.append(filler, self.full_do_predict)
-        self.full_DI_values = np.append(filler, self.full_DI_values)
+        if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
+            self.full_DI_values = np.append(filler, self.full_DI_values)
        self.full_target_mean = np.append(filler, self.full_target_mean)
        self.full_target_std = np.append(filler, self.full_target_std)

@ -728,7 +731,7 @@ class FreqaiDataKitchen:
        # find the max indicator length required
        max_timeframe_chars = self.freqai_config.get('timeframes')[-1]
        max_period = self.freqai_config.get('feature_parameters', {}).get(
-                                            'indicator_max_period', 20)
+                                            'indicator_max_period', 50)
        additional_seconds = 0
        if max_timeframe_chars[-1] == 'd':
            additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
@ -863,9 +866,17 @@ class FreqaiDataKitchen:

            for pair in self.all_pairs:
                for tf in self.freqai_config.get('timeframes'):
-                    lh = len(history_data[pair][tf].index)
-                    history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair,
-                                                                                    tf).iloc[-1]
+                    # check if newest candle is already appended
+                    if (
+                         str(history_data[pair][tf].iloc[-1]['date']) ==
+                         str(strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]['date'].iloc[-1])
+                         ):
+                        continue
+                    history_data[pair][tf] = pd.concat(
+                                            [history_data[pair][tf],
+                                             strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]],
+                                            ignore_index=True, axis=0
+                                                )

            logger.info(f'Length of history data {len(history_data[pair][tf])}')

@ -908,23 +919,25 @@ class FreqaiDataKitchen:
        for training according to user defined train_period
        metadata: dict = strategy furnished pair metadata
        """
-        corr_dataframes: Dict[Any, Any] = {}
-        base_dataframes: Dict[Any, Any] = {}
-        historic_data = self.data_drawer.historic_data
-        pairs = self.freqai_config.get('corr_pairlist', [])
+        with self.data_drawer.history_lock:
+            corr_dataframes: Dict[Any, Any] = {}
+            base_dataframes: Dict[Any, Any] = {}
+            historic_data = self.data_drawer.historic_data
+            pairs = self.freqai_config.get('corr_pairlist', [])

-        for tf in self.freqai_config.get('timeframes'):
-            base_dataframes[tf] = self.slice_dataframe(
-                                                       timerange,
-                                                       historic_data[metadata['pair']][tf]
-                                                       )
-            if pairs:
-                for p in pairs:
-                    if metadata['pair'] in p:
-                        continue  # dont repeat anything from whitelist
-                    if p not in corr_dataframes:
-                        corr_dataframes[p] = {}
-                    corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf])
+            for tf in self.freqai_config.get('timeframes'):
+                base_dataframes[tf] = self.slice_dataframe(
+                                                        timerange,
+                                                        historic_data[metadata['pair']][tf]
+                                                        )
+                if pairs:
+                    for p in pairs:
+                        if metadata['pair'] in p:
+                            continue  # dont repeat anything from whitelist
+                        if p not in corr_dataframes:
+                            corr_dataframes[p] = {}
+                        corr_dataframes[p][tf] = self.slice_dataframe(timerange,
+                                                                      historic_data[p][tf])

        return corr_dataframes, base_dataframes

--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -216,12 +216,9 @@ class IFreqaiModel(ABC):

        # append the historic data once per round
        if (self.data_drawer.historic_data and
-                self.update_historic_data >= len(self.config.get('exchange', '')
-                                                 .get('pair_whitelist'))):
+                self.config.get('exchange', '').get('pair_whitelist').index(metadata['pair']) == 1):
            dh.update_historic_data(strategy)
-            self.update_historic_data = 1
-        else:
-            self.update_historic_data += 1
+            logger.info(f'Updating historic data on pair {metadata["pair"]}')

        # if trainable, check if model needs training, if so compute new timerange,
        # then save model and metadata.
@ -405,9 +402,9 @@ class IFreqaiModel(ABC):
        # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
        # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
        #                                                           metadata)
-        with self.data_drawer.history_lock:
-            corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
-                                                                               metadata)
+
+        corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
+                                                                           metadata)

        # protecting from common benign errors associated with grabbing new data from exchange:
        try:
@ -419,7 +416,6 @@ class IFreqaiModel(ABC):

        except Exception as err:
            logger.exception(err)
-            # self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
            self.training_on_separate_thread = False
            self.retrain = False
            return
@ -428,7 +424,6 @@ class IFreqaiModel(ABC):
            model = self.train(unfiltered_dataframe, metadata, dh)
        except ValueError:
            logger.warning('Value error encountered during training')
-            # self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
            self.training_on_separate_thread = False
            self.retrain = False
            return
--- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@ -59,7 +59,7 @@ class CatboostPredictionModel(IFreqaiModel):
        :model: Trained model which can be used to inference (self.predict)
        """

-        logger.info('--------------------Starting training'
+        logger.info('--------------------Starting training '
                    f'{metadata["pair"]} --------------------')

        # create the full feature list based on user config info