Merge branch 'develop' into add-inlier-metric

2022-09-06 20:40:21 +02:00
parent d44296783e dc4a4bdf09
commit d7585161b2
38 changed files with 664 additions and 349 deletions
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from datetime import datetime, timezone
 from pathlib import Path
 from threading import Lock
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, List, Tuple

 import numpy as np
 import pandas as pd
@@ -26,13 +26,6 @@ pd.options.mode.chained_assignment = None
 logger = logging.getLogger(__name__)


-def threaded(fn):
-    def wrapper(*args, **kwargs):
-        threading.Thread(target=fn, args=args, kwargs=kwargs).start()
-
-    return wrapper
-
-
 class IFreqaiModel(ABC):
    """
    Class containing all tools for training and prediction in the strategy.
@@ -69,6 +62,9 @@ class IFreqaiModel(ABC):
        self.first = True
        self.set_full_path()
        self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
+        self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
+        if self.save_backtest_models:
+            logger.info('Backtesting module configured to save all models.')
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
@@ -92,6 +88,9 @@ class IFreqaiModel(ABC):
        self.begin_time_train: float = 0
        self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])

+        self._threads: List[threading.Thread] = []
+        self._stop_event = threading.Event()
+
    def assert_config(self, config: Dict[str, Any]) -> None:

        if not config.get("freqai", {}):
@@ -125,10 +124,9 @@ class IFreqaiModel(ABC):
        elif not self.follow_mode:
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
            logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
-            with self.analysis_lock:
-                dataframe = self.dk.use_strategy_to_populate_indicators(
-                    strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
-                )
+            dataframe = self.dk.use_strategy_to_populate_indicators(
+                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
+            )
            dk = self.start_backtesting(dataframe, metadata, self.dk)

        dataframe = dk.remove_features_from_df(dk.return_dataframe)
@@ -146,15 +144,34 @@ class IFreqaiModel(ABC):
        self.model = None
        self.dk = None

-    @threaded
-    def start_scanning(self, strategy: IStrategy) -> None:
+    def shutdown(self):
+        """
+        Cleans up threads on Shutdown, set stop event. Join threads to wait
+        for current training iteration.
+        """
+        logger.info("Stopping FreqAI")
+        self._stop_event.set()
+
+        logger.info("Waiting on Training iteration")
+        for _thread in self._threads:
+            _thread.join()
+
+    def start_scanning(self, *args, **kwargs) -> None:
+        """
+        Start `self._start_scanning` in a separate thread
+        """
+        _thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
+        self._threads.append(_thread)
+        _thread.start()
+
+    def _start_scanning(self, strategy: IStrategy) -> None:
        """
        Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
        to improve model youth. This function is agnostic to data preparation/collection/storage,
        it simply trains on what ever data is available in the self.dd.
        :param strategy: IStrategy = The user defined strategy class
        """
-        while 1:
+        while not self._stop_event.is_set():
            time.sleep(1)
            for pair in self.config.get("exchange", {}).get("pair_whitelist"):

@@ -225,28 +242,39 @@ class IFreqaiModel(ABC):
                "trains"
            )

+            trained_timestamp_int = int(trained_timestamp.stopts)
            dk.data_path = Path(
                dk.full_path
                /
-                f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
+                f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
                )
-            if not self.model_exists(
-                metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
-            ):
-                dk.find_features(dataframe_train)
-                self.model = self.train(dataframe_train, metadata["pair"], dk)
-                self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
-                    trained_timestamp.stopts)
-                dk.set_new_model_names(metadata["pair"], trained_timestamp)
-                self.dd.save_data(self.model, metadata["pair"], dk)
+
+            dk.set_new_model_names(metadata["pair"], trained_timestamp)
+
+            if dk.check_if_backtest_prediction_exists():
+                append_df = dk.get_backtesting_prediction()
+                dk.append_predictions(append_df)
            else:
-                self.model = self.dd.load_data(metadata["pair"], dk)
+                if not self.model_exists(
+                    metadata["pair"], dk, trained_timestamp=trained_timestamp_int
+                ):
+                    dk.find_features(dataframe_train)
+                    self.model = self.train(dataframe_train, metadata["pair"], dk)
+                    self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
+                        trained_timestamp.stopts)

-            self.check_if_feature_list_matches_strategy(dataframe_train, dk)
+                    if self.save_backtest_models:
+                        logger.info('Saving backtest model to disk.')
+                        self.dd.save_data(self.model, metadata["pair"], dk)
+                else:
+                    self.model = self.dd.load_data(metadata["pair"], dk)

-            pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                self.check_if_feature_list_matches_strategy(dataframe_train, dk)

-            dk.append_predictions(pred_df, do_preds)
+                pred_df, do_preds = self.predict(dataframe_backtest, dk)
+                append_df = dk.get_predictions_to_append(pred_df, do_preds)
+                dk.append_predictions(append_df)
+                dk.save_backtesting_prediction(append_df)

        dk.fill_predictions(dataframe)

@@ -291,14 +319,8 @@ class IFreqaiModel(ABC):
            )
            dk.set_paths(metadata["pair"], new_trained_timerange.stopts)

-            # download candle history if it is not already in memory
+            # load candle history into memory if it is not yet.
            if not self.dd.historic_data:
-                logger.info(
-                    "Downloading all training data for all pairs in whitelist and "
-                    "corr_pairlist, this may take a while if you do not have the "
-                    "data saved"
-                )
-                dk.download_all_data_for_training(data_load_timerange, strategy.dp)
                self.dd.load_all_pair_histories(data_load_timerange, dk)

            if not self.scanning:
@@ -471,11 +493,6 @@ class IFreqaiModel(ABC):
        :return:
        :boolean: whether the model file exists or not.
        """
-        coin, _ = pair.split("/")
-
-        if not self.live:
-            dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
-
        path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
        file_exists = path_to_modelfile.is_file()
        if file_exists and not scanning:
@@ -628,8 +645,8 @@ class IFreqaiModel(ABC):
                logger.info(
                    f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
                if self.inference_time > 0.25 * self.base_tf_seconds:
-                    logger.warning('Inference took over 25/% of the candle time. Reduce pairlist to'
-                                   ' avoid blinding open trades and degrading performance.')
+                    logger.warning("Inference took over 25% of the candle time. Reduce pairlist to"
+                                   " avoid blinding open trades and degrading performance.")
                self.pair_it = 0
                self.inference_time = 0
        return