Merge pull request #7475 from freqtrade/backtest_live_models

FreqAI - Backtest live/ready models
2022-11-07 20:39:49 +01:00
parent 8bc71f2025 884014a4b9
commit 426a26f268
12 changed files with 502 additions and 54 deletions
--- a/docs/freqai-running.md
+++ b/docs/freqai-running.md
@@ -73,12 +73,24 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to-
 To allow for tweaking your strategy (**not** the features!), FreqAI will automatically save the predictions during backtesting so that they can be reused for future backtests and live runs using the same `identifier` model. This provides a performance enhancement geared towards enabling **high-level hyperopting** of entry/exit criteria.
-An additional directory called `predictions`, which contains all the predictions stored in `hdf` format, will be created in the `unique-id` folder.
+An additional directory called `backtesting_predictions`, which contains all the predictions stored in `hdf` format, will be created in the `unique-id` folder.
 To change your **features**, you **must** set a new `identifier` in the config to signal to FreqAI to train new models.
 To save the models generated during a particular backtest so that you can start a live deployment from one of them instead of training a new model, you must set `save_backtest_models` to `True` in the config.
 ### Backtest live models
 FreqAI allow you to reuse ready models through the backtest parameter `--freqai-backtest-live-models`. This can be useful when you want to reuse models generated in dry/run for comparison or other study. For that, you must set `"purge_old_models"` to `True` in the config.
 The `--timerange` parameter must not be informed, as it will be automatically calculated through the training end dates of the models.
 Each model has an identifier derived from the training end date. If you have only 1 model trained, FreqAI will backtest from the training end date until the current date. If you have more than 1 model, each model will perform the backtesting according to the training end date until the training end date of the next model and so on. For the last model, the period of the previous model will be used for the execution.
 !!! Note
    Currently, there is no checking for expired models, even if the `expired_hours` parameter is set.
 ### Downloading data to cover the full backtest period
 For live/dry deployments, FreqAI will download the necessary data automatically. However, to use backtesting functionality, you need to download the necessary data using `download-data` (details [here](data-download.md#data-downloading)). You need to pay careful attention to understanding how much *additional* data needs to be downloaded to ensure that there is a sufficient amount of training data *before* the start of the backtesting time range. The amount of additional data can be roughly estimated by moving the start date of the time range backwards by `train_period_days` and the `startup_candle_count` (see the [parameter table](freqai-parameter-table.md) for detailed descriptions of these parameters) from the beginning of the desired backtesting time range. 
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -25,7 +25,8 @@ ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv",
 ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions",
                                        "enable_protections", "dry_run_wallet", "timeframe_detail",
                                        "strategy_list", "export", "exportfilename",
-                                        "backtest_breakdown", "backtest_cache"]
+                                        "backtest_breakdown", "backtest_cache",
                                        "freqai_backtest_live_models"]
 ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + ["hyperopt", "hyperopt_path",
                                        "position_stacking", "use_max_market_positions",
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -668,4 +668,9 @@ AVAILABLE_CLI_OPTIONS = {
        help='Specify additional lookup path for freqaimodels.',
        metavar='PATH',
    ),
    "freqai_backtest_live_models": Arg(
        '--freqai-backtest-live-models',
        help='Run backtest with ready models.',
        action='store_true'
    ),
 }
--- a/freqtrade/configuration/config_validation.py
+++ b/freqtrade/configuration/config_validation.py
@@ -86,6 +86,7 @@ def validate_config_consistency(conf: Dict[str, Any], preliminary: bool = False)
    _validate_unlimited_amount(conf)
    _validate_ask_orderbook(conf)
    _validate_freqai_hyperopt(conf)
    _validate_freqai_backtest(conf)
    _validate_freqai_include_timeframes(conf)
    _validate_consumers(conf)
    validate_migrated_strategy_settings(conf)
@@ -355,6 +356,26 @@ def _validate_freqai_include_timeframes(conf: Dict[str, Any]) -> None:
                f"`include_timeframes`.Offending include-timeframes: {', '.join(offending_lines)}")
 def _validate_freqai_backtest(conf: Dict[str, Any]) -> None:
    if conf.get('runmode', RunMode.OTHER) == RunMode.BACKTEST:
        freqai_enabled = conf.get('freqai', {}).get('enabled', False)
        timerange = conf.get('timerange')
        freqai_backtest_live_models = conf.get('freqai_backtest_live_models', False)
        if freqai_backtest_live_models and freqai_enabled and timerange:
            raise OperationalException(
                'Using timerange parameter is not supported with '
                '--freqai-backtest-live-models parameter.')
        if freqai_backtest_live_models and not freqai_enabled:
            raise OperationalException(
                'Using --freqai-backtest-live-models parameter is only '
                'supported with a FreqAI strategy.')
        if freqai_enabled and not freqai_backtest_live_models and not timerange:
            raise OperationalException(
                'Please pass --timerange if you intend to use FreqAI for backtesting.')
 def _validate_consumers(conf: Dict[str, Any]) -> None:
    emc_conf = conf.get('external_message_consumer', {})
    if emc_conf.get('enabled', False):
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -279,6 +279,9 @@ class Configuration:
        self._args_to_config(config, argname='disableparamexport',
                             logstring='Parameter --disableparamexport detected: {} ...')
        self._args_to_config(config, argname='freqai_backtest_live_models',
                             logstring='Parameter --freqai-backtest-live-models detected ...')
        # Edge section:
        if 'stoploss_range' in self.args and self.args["stoploss_range"]:
            txt_range = eval(self.args["stoploss_range"])
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -1,7 +1,7 @@
 import copy
 import logging
 import shutil
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from math import cos, sin
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -80,25 +80,32 @@ class FreqaiDataKitchen:
        self.svm_model: linear_model.SGDOneClassSVM = None
        self.keras: bool = self.freqai_config.get("keras", False)
        self.set_all_pairs()
-        if not self.live:
+        self.backtest_live_models = config.get("freqai_backtest_live_models", False)
            if not self.config["timerange"]:
                raise OperationalException(
                    'Please pass --timerange if you intend to use FreqAI for backtesting.')
            self.full_timerange = self.create_fulltimerange(
                self.config["timerange"], self.freqai_config.get("train_period_days", 0)
            )
-            (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
+        if not self.live:
-                self.full_timerange,
+            self.full_path = self.get_full_models_path(self.config)
-                config["freqai"]["train_period_days"],
+
-                config["freqai"]["backtest_period_days"],
+            if self.backtest_live_models:
-            )
+                if self.pair:
                    self.set_timerange_from_ready_models()
                    (self.training_timeranges,
                     self.backtesting_timeranges) = self.split_timerange_live_models()
            else:
                self.full_timerange = self.create_fulltimerange(
                    self.config["timerange"], self.freqai_config.get("train_period_days", 0)
                )
                (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
                    self.full_timerange,
                    config["freqai"]["train_period_days"],
                    config["freqai"]["backtest_period_days"],
                )
        self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
        self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
        self.train_dates: DataFrame = pd.DataFrame()
        self.unique_classes: Dict[str, list] = {}
        self.unique_class_list: list = []
        self.backtest_live_models_data: Dict[str, Any] = {}
    def set_paths(
        self,
@@ -110,10 +117,7 @@ class FreqaiDataKitchen:
        :param metadata: dict = strategy furnished pair metadata
        :param trained_timestamp: int = timestamp of most recent training
        """
-        self.full_path = Path(
+        self.full_path = self.get_full_models_path(self.config)
            self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
        )
        self.data_path = Path(
            self.full_path
            / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
@@ -244,7 +248,7 @@ class FreqaiDataKitchen:
            self.data["filter_drop_index_training"] = drop_index
        else:
-            if len(self.data['constant_features_list']):
+            if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
                filtered_df = self.check_pred_labels(filtered_df)
            # we are backtesting so we need to preserve row number to send back to strategy,
            # so now we use do_predict to avoid any prediction based on a NaN
@@ -455,6 +459,29 @@ class FreqaiDataKitchen:
        # print(tr_training_list, tr_backtesting_list)
        return tr_training_list_timerange, tr_backtesting_list_timerange
    def split_timerange_live_models(
        self
    ) -> Tuple[list, list]:
        tr_backtesting_list_timerange = []
        asset = self.pair.split("/")[0]
        if asset not in self.backtest_live_models_data["assets_end_dates"]:
            raise OperationalException(
                f"Model not available for pair {self.pair}. "
                "Please, try again after removing this pair from the configuration file."
            )
        asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
        backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
        model_end_dates = [x for x in asset_data]
        model_end_dates.append(backtesting_timerange.stopts)
        model_end_dates.sort()
        for index, item in enumerate(model_end_dates):
            if len(model_end_dates) > (index + 1):
                tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
                tr_backtesting_list_timerange.append(tr_to_add)
        return tr_backtesting_list_timerange, tr_backtesting_list_timerange
    def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
        """
        Given a full dataframe, extract the user desired window
@@ -962,8 +989,10 @@ class FreqaiDataKitchen:
            append_df[label] = predictions[label]
            if append_df[label].dtype == object:
                continue
-            append_df[f"{label}_mean"] = self.data["labels_mean"][label]
+            if "labels_mean" in self.data:
-            append_df[f"{label}_std"] = self.data["labels_std"][label]
+                append_df[f"{label}_mean"] = self.data["labels_mean"][label]
            if "labels_std" in self.data:
                append_df[f"{label}_std"] = self.data["labels_std"][label]
        for extra_col in self.data["extra_returns_per_train"]:
            append_df[f"{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
@@ -1031,11 +1060,6 @@ class FreqaiDataKitchen:
        start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
        stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
        full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
        self.full_path = Path(
            self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}"
        )
        config_path = Path(self.config["config_files"][0])
        if not self.full_path.is_dir():
@@ -1118,15 +1142,15 @@ class FreqaiDataKitchen:
        return retrain, trained_timerange, data_load_timerange
-    def set_new_model_names(self, pair: str, trained_timerange: TimeRange):
+    def set_new_model_names(self, pair: str, timestamp_id: int):
        coin, _ = pair.split("/")
        self.data_path = Path(
            self.full_path
-            / f"sub-train-{pair.split('/')[0]}_{int(trained_timerange.stopts)}"
+            / f"sub-train-{pair.split('/')[0]}_{timestamp_id}"
        )
-        self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}"
+        self.model_filename = f"cb_{coin.lower()}_{timestamp_id}"
    def set_all_pairs(self) -> None:
@@ -1319,11 +1343,11 @@ class FreqaiDataKitchen:
    def check_if_backtest_prediction_is_valid(
        self,
-        length_backtesting_dataframe: int
+        len_backtest_df: int
    ) -> bool:
        """
        Check if a backtesting prediction already exists and if the predictions
-        to append has the same size of backtesting dataframe slice
+        to append have the same size as the backtesting dataframe slice
        :param length_backtesting_dataframe: Length of backtesting dataframe slice
        :return:
        :boolean: whether the prediction file is valid.
@@ -1337,7 +1361,7 @@ class FreqaiDataKitchen:
        if file_exists:
            append_df = self.get_backtesting_prediction()
-            if len(append_df) == length_backtesting_dataframe:
+            if len(append_df) == len_backtest_df:
                logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
                return True
            else:
@@ -1350,6 +1374,109 @@ class FreqaiDataKitchen:
            )
            return False
    def set_timerange_from_ready_models(self):
        backtesting_timerange, \
            assets_end_dates = (
                self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
        self.backtest_live_models_data = {
            "backtesting_timerange": backtesting_timerange,
            "assets_end_dates": assets_end_dates
            }
        return
    def get_full_models_path(self, config: Config) -> Path:
        """
        Returns default FreqAI model path
        :param config: Configuration dictionary
        """
        freqai_config: Dict[str, Any] = config["freqai"]
        return Path(
            config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
        )
    def get_timerange_and_assets_end_dates_from_ready_models(
            self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
        """
        Returns timerange information based on a FreqAI model directory
        :param models_path: FreqAI model path
        :return: a Tuple with (Timerange calculated from directory and
        a Dict with pair and model end training dates info)
        """
        all_models_end_dates = []
        assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models(
                                           models_path)
        for key in assets_end_dates:
            for model_end_date in assets_end_dates[key]:
                if model_end_date not in all_models_end_dates:
                    all_models_end_dates.append(model_end_date)
        if len(all_models_end_dates) == 0:
            raise OperationalException(
                'At least 1 saved model is required to '
                'run backtest with the freqai-backtest-live-models option'
            )
        if len(all_models_end_dates) == 1:
            logger.warning(
                "Only 1 model was found. Backtesting will run with the "
                "timerange from the end of the training date to the current date"
            )
        finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
        if len(all_models_end_dates) > 1:
            # After last model end date, use the same period from previous model
            # to finish the backtest
            all_models_end_dates.sort(reverse=True)
            finish_timestamp = all_models_end_dates[0] + \
                (all_models_end_dates[0] - all_models_end_dates[1])
        all_models_end_dates.append(finish_timestamp)
        all_models_end_dates.sort()
        start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates),
                      timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
        end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates),
                    timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
        # add 1 day to string timerange to ensure BT module will load all dataframe data
        end_date = end_date + timedelta(days=1)
        backtesting_timerange = TimeRange(
            'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
        )
        return backtesting_timerange, assets_end_dates
    def get_assets_timestamps_training_from_ready_models(
            self, models_path: Path) -> Dict[str, Any]:
        """
        Scan the models path and returns all assets end training dates (timestamp)
        :param models_path: FreqAI model path
        :return: a Dict with asset and model end training dates info
        """
        assets_end_dates: Dict[str, Any] = {}
        if not models_path.is_dir():
            raise OperationalException(
                'Model folders not found. Saved models are required '
                'to run backtest with the freqai-backtest-live-models option'
            )
        for model_dir in models_path.iterdir():
            if str(model_dir.name).startswith("sub-train"):
                model_end_date = int(model_dir.name.split("_")[1])
                asset = model_dir.name.split("_")[0].replace("sub-train-", "")
                model_file_name = (
                    f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
                    "_model.joblib"
                )
                model_path_file = Path(model_dir / model_file_name)
                if model_path_file.is_file():
                    if asset not in assets_end_dates:
                        assets_end_dates[asset] = []
                    assets_end_dates[asset].append(model_end_date)
        return assets_end_dates
    def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
        """
        Remove all special characters from feature strings (:)
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -139,7 +139,11 @@ class IFreqaiModel(ABC):
        # the concatenated results for the full backtesting period back to the strategy.
        elif not self.follow_mode:
            self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
-            logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
+            if self.dk.backtest_live_models:
                logger.info(
                    f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
            else:
                logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
            dataframe = self.dk.use_strategy_to_populate_indicators(
                strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
            )
@@ -259,23 +263,18 @@ class IFreqaiModel(ABC):
            dataframe_train = dk.slice_dataframe(tr_train, dataframe)
            dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
-            trained_timestamp = tr_train
+            if not self.ensure_data_exists(dataframe_backtest, tr_backtest, pair):
-            tr_train_startts_str = datetime.fromtimestamp(
+                continue
                                                tr_train.startts,
                                                tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
            tr_train_stopts_str = datetime.fromtimestamp(
                                                tr_train.stopts,
                                                tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
            logger.info(
                f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
                f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
                "trains"
            )
-            trained_timestamp_int = int(trained_timestamp.stopts)
+            self.log_backtesting_progress(tr_train, pair, train_it, total_trains)
            dk.set_paths(pair, trained_timestamp_int)
-            dk.set_new_model_names(pair, trained_timestamp)
+            timestamp_model_id = int(tr_train.stopts)
            if dk.backtest_live_models:
                timestamp_model_id = int(tr_backtest.startts)
            dk.set_paths(pair, timestamp_model_id)
            dk.set_new_model_names(pair, timestamp_model_id)
            if dk.check_if_backtest_prediction_is_valid(len(dataframe_backtest)):
                self.dd.load_metadata(dk)
@@ -289,7 +288,7 @@ class IFreqaiModel(ABC):
                    dk.find_labels(dataframe_train)
                    self.model = self.train(dataframe_train, pair, dk)
                    self.dd.pair_dict[pair]["trained_timestamp"] = int(
-                        trained_timestamp.stopts)
+                        tr_train.stopts)
                    if self.plot_features:
                        plot_feature_importance(self.model, pair, dk, self.plot_features)
                    if self.save_backtest_models:
@@ -579,7 +578,7 @@ class IFreqaiModel(ABC):
        model = self.train(unfiltered_dataframe, pair, dk)
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
-        dk.set_new_model_names(pair, new_trained_timerange)
+        dk.set_new_model_names(pair, new_trained_timerange.stopts)
        self.dd.save_data(model, pair, dk)
        if self.plot_features:
@@ -779,6 +778,52 @@ class IFreqaiModel(ABC):
            self.pair_it = 1
            self.current_candle = self.dd.current_candle
    def ensure_data_exists(self, dataframe_backtest: DataFrame,
                           tr_backtest: TimeRange, pair: str) -> bool:
        """
        Check if the dataframe is empty, if not, report useful information to user.
        :param dataframe_backtest: the backtesting dataframe, maybe empty.
        :param tr_backtest: current backtesting timerange.
        :param pair: current pair
        :return: if the data exists or not
        """
        if self.config.get("freqai_backtest_live_models", False) and len(dataframe_backtest) == 0:
            tr_backtest_startts_str = datetime.fromtimestamp(
                                            tr_backtest.startts,
                                            tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
            tr_backtest_stopts_str = datetime.fromtimestamp(
                                            tr_backtest.stopts,
                                            tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
            logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} "
                        f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. "
                        "Probably more than one training within the same candle period.")
            return False
        return True
    def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
                                 train_it: int, total_trains: int):
        """
        Log the backtesting progress so user knows how many pairs have been trained and
        how many more pairs/trains remain.
        :param tr_train: the training timerange
        :param train_it: the train iteration for the current pair (the sliding window progress)
        :param pair: the current pair
        :param total_trains: total trains (total number of slides for the sliding window)
        """
        tr_train_startts_str = datetime.fromtimestamp(
                                            tr_train.startts,
                                            tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
        tr_train_stopts_str = datetime.fromtimestamp(
                                            tr_train.stopts,
                                            tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
        if not self.config.get("freqai_backtest_live_models", False):
            logger.info(
                f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
                f" from {tr_train_startts_str} "
                f"to {tr_train_stopts_str}, {train_it}/{total_trains} "
                "trains"
            )
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModel.py for an example.
--- a/freqtrade/freqai/utils.py
+++ b/freqtrade/freqai/utils.py
@@ -218,3 +218,19 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None:
            default=str,
            number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
        )
 def get_timerange_backtest_live_models(config: Config) -> str:
    """
    Returns a formated timerange for backtest live/ready models
    :param config: Configuration dictionary
    :return: a string timerange (format example: '20220801-20220822')
    """
    dk = FreqaiDataKitchen(config)
    models_path = dk.get_full_models_path(config)
    timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path)
    start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
    end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
    tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
    return tr
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -134,6 +134,10 @@ class Backtesting:
            self.fee = self.exchange.get_fee(symbol=self.pairlists.whitelist[0])
        self.precision_mode = self.exchange.precisionMode
        if self.config.get('freqai_backtest_live_models', False):
            from freqtrade.freqai.utils import get_timerange_backtest_live_models
            self.config['timerange'] = get_timerange_backtest_live_models(self.config)
        self.timerange = TimeRange.parse_timerange(
            None if self.config.get('timerange') is None else str(self.config.get('timerange')))
--- a/tests/freqai/test_freqai_backtesting.py
+++ b/tests/freqai/test_freqai_backtesting.py
@@ -3,8 +3,11 @@ from datetime import datetime, timezone
 from pathlib import Path
 from unittest.mock import PropertyMock
 import pytest
 from freqtrade.commands.optimize_commands import setup_optimize_configuration
 from freqtrade.enums import RunMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.optimize.backtesting import Backtesting
 from tests.conftest import (CURRENT_TEST_STRATEGY, get_args, log_has_re, patch_exchange,
                            patched_configuration_load_config_file)
@@ -51,3 +54,32 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog):
    assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog)
    Backtesting.cleanup()
 def test_freqai_backtest_live_models_model_not_found(freqai_conf, mocker, testdatadir, caplog):
    patch_exchange(mocker)
    now = datetime.now(timezone.utc)
    mocker.patch('freqtrade.plugins.pairlistmanager.PairListManager.whitelist',
                 PropertyMock(return_value=['HULUMULU/USDT', 'XRP/USDT']))
    mocker.patch('freqtrade.optimize.backtesting.history.load_data')
    mocker.patch('freqtrade.optimize.backtesting.history.get_timerange', return_value=(now, now))
    freqai_conf["timerange"] = ""
    patched_configuration_load_config_file(mocker, freqai_conf)
    args = [
        'backtesting',
        '--config', 'config.json',
        '--datadir', str(testdatadir),
        '--strategy-path', str(Path(__file__).parents[1] / 'strategy/strats'),
        '--timeframe', '5m',
        '--freqai-backtest-live-models'
    ]
    args = get_args(args)
    bt_config = setup_optimize_configuration(args, RunMode.BACKTEST)
    with pytest.raises(OperationalException,
                       match=r".* Saved models are required to run backtest .*"):
        Backtesting(bt_config)
    Backtesting.cleanup()
--- a/tests/freqai/test_freqai_datakitchen.py
+++ b/tests/freqai/test_freqai_datakitchen.py
@@ -1,13 +1,18 @@
 import shutil
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from unittest.mock import MagicMock
 import pytest
 from freqtrade.configuration import TimeRange
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.exceptions import OperationalException
-from tests.conftest import log_has_re
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
-from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary,
+from freqtrade.freqai.utils import get_timerange_backtest_live_models
-                                   make_unfiltered_dataframe)
+from tests.conftest import get_patched_exchange, log_has_re
 from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy,
                                   make_data_dictionary, make_unfiltered_dataframe)
@pytest.mark.parametrize(
@@ -159,3 +164,98 @@ def test_make_train_test_datasets(mocker, freqai_conf):
    assert data_dictionary
    assert len(data_dictionary) == 7
    assert len(data_dictionary['train_features'].index) == 1916
 def test_get_pairs_timestamp_validation(mocker, freqai_conf):
    exchange = get_patched_exchange(mocker, freqai_conf)
    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
    strategy.dp = DataProvider(freqai_conf, exchange)
    strategy.freqai_info = freqai_conf.get("freqai", {})
    freqai = strategy.freqai
    freqai.live = True
    freqai.dk = FreqaiDataKitchen(freqai_conf)
    freqai_conf['freqai'].update({"identifier": "invalid_id"})
    model_path = freqai.dk.get_full_models_path(freqai_conf)
    with pytest.raises(
            OperationalException,
            match=r'.*required to run backtest with the freqai-backtest-live-models.*'
            ):
        freqai.dk.get_assets_timestamps_training_from_ready_models(model_path)
@pytest.mark.parametrize('model', [
    'LightGBMRegressor'
    ])
 def test_get_timerange_from_ready_models(mocker, freqai_conf, model):
    freqai_conf.update({"freqaimodel": model})
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"strategy": "freqai_test_strat"})
    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
    exchange = get_patched_exchange(mocker, freqai_conf)
    strategy.dp = DataProvider(freqai_conf, exchange)
    strategy.freqai_info = freqai_conf.get("freqai", {})
    freqai = strategy.freqai
    freqai.live = True
    freqai.dk = FreqaiDataKitchen(freqai_conf)
    timerange = TimeRange.parse_timerange("20180101-20180130")
    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
    freqai.dd.pair_dict = MagicMock()
    data_load_timerange = TimeRange.parse_timerange("20180101-20180130")
    # 1516233600 (2018-01-18 00:00) - Start Training 1
    # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1)
    # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2)
    # 1516838400 (2018-01-25 00:00) - End Timerange
    new_timerange = TimeRange("date", "date", 1516233600, 1516406400)
    freqai.extract_data_and_train_model(
        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
    new_timerange = TimeRange("date", "date", 1516406400, 1516579200)
    freqai.extract_data_and_train_model(
        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
    model_path = freqai.dk.get_full_models_path(freqai_conf)
    (backtesting_timerange,
     pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models(
                        models_path=model_path)
    assert len(pairs_end_dates["ADA"]) == 2
    assert backtesting_timerange.startts == 1516406400
    assert backtesting_timerange.stopts == 1516838400
    backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf)
    assert backtesting_string_timerange == '20180120-20180125'
@pytest.mark.parametrize('model', [
    'LightGBMRegressor'
    ])
 def test_get_full_model_path(mocker, freqai_conf, model):
    freqai_conf.update({"freqaimodel": model})
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"strategy": "freqai_test_strat"})
    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
    exchange = get_patched_exchange(mocker, freqai_conf)
    strategy.dp = DataProvider(freqai_conf, exchange)
    strategy.freqai_info = freqai_conf.get("freqai", {})
    freqai = strategy.freqai
    freqai.live = True
    freqai.dk = FreqaiDataKitchen(freqai_conf)
    timerange = TimeRange.parse_timerange("20180110-20180130")
    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
    freqai.dd.pair_dict = MagicMock()
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")
    freqai.extract_data_and_train_model(
        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
    model_path = freqai.dk.get_full_models_path(freqai_conf)
    assert model_path.is_dir() is True
--- a/tests/test_configuration.py
+++ b/tests/test_configuration.py
@@ -1538,3 +1538,85 @@ def test_flat_vars_to_nested_dict(caplog):
    assert log_has("Loading variable 'FREQTRADE__EXCHANGE__SOME_SETTING'", caplog)
    assert not log_has("Loading variable 'NOT_RELEVANT'", caplog)
 def test_setup_hyperopt_freqai(mocker, default_conf, caplog) -> None:
    patched_configuration_load_config_file(mocker, default_conf)
    mocker.patch(
        'freqtrade.configuration.configuration.create_datadir',
        lambda c, x: x
    )
    mocker.patch(
        'freqtrade.configuration.configuration.create_userdata_dir',
        lambda x, *args, **kwargs: Path(x)
    )
    arglist = [
        'hyperopt',
        '--config', 'config.json',
        '--strategy', CURRENT_TEST_STRATEGY,
        '--timerange', '20220801-20220805',
        "--freqaimodel",
        "LightGBMRegressorMultiTarget",
        "--analyze-per-epoch"
    ]
    args = Arguments(arglist).get_parsed_arg()
    configuration = Configuration(args)
    config = configuration.get_config()
    config['freqai'] = {
        "enabled": True
    }
    with pytest.raises(
        OperationalException, match=r".*analyze-per-epoch parameter is not supported.*"
    ):
        validate_config_consistency(config)
 def test_setup_freqai_backtesting(mocker, default_conf, caplog) -> None:
    patched_configuration_load_config_file(mocker, default_conf)
    mocker.patch(
        'freqtrade.configuration.configuration.create_datadir',
        lambda c, x: x
    )
    mocker.patch(
        'freqtrade.configuration.configuration.create_userdata_dir',
        lambda x, *args, **kwargs: Path(x)
    )
    arglist = [
        'backtesting',
        '--config', 'config.json',
        '--strategy', CURRENT_TEST_STRATEGY,
        '--timerange', '20220801-20220805',
        "--freqaimodel",
        "LightGBMRegressorMultiTarget",
        "--freqai-backtest-live-models"
    ]
    args = Arguments(arglist).get_parsed_arg()
    configuration = Configuration(args)
    config = configuration.get_config()
    config['runmode'] = RunMode.BACKTEST
    with pytest.raises(
        OperationalException, match=r".*--freqai-backtest-live-models parameter is only.*"
    ):
        validate_config_consistency(config)
    conf = deepcopy(config)
    conf['freqai'] = {
        "enabled": True
    }
    with pytest.raises(
        OperationalException, match=r".* timerange parameter is not supported with .*"
    ):
        validate_config_consistency(conf)
    conf['timerange'] = None
    conf['freqai_backtest_live_models'] = False
    with pytest.raises(
        OperationalException, match=r".* pass --timerange if you intend to use FreqAI .*"
    ):
        validate_config_consistency(conf)