From 6606a0113f11570a15f61a3597a2619b5169d7c5 Mon Sep 17 00:00:00 2001 From: Wagner Costa Santos Date: Thu, 20 Oct 2022 14:53:25 -0300 Subject: [PATCH] refactoring - remove unnecessary config file --- freqtrade/freqai/data_kitchen.py | 114 +++++++++++++++++++--- freqtrade/freqai/freqai_util.py | 122 ------------------------ freqtrade/freqai/utils.py | 16 ++++ freqtrade/optimize/backtesting.py | 4 +- tests/freqai/test_freqai_datakitchen.py | 106 +++++++++++++++++++- tests/freqai/test_freqai_util.py | 112 ---------------------- 6 files changed, 224 insertions(+), 250 deletions(-) delete mode 100644 freqtrade/freqai/freqai_util.py delete mode 100644 tests/freqai/test_freqai_util.py diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index d2dc1fc63..ec0503f0b 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,7 +1,7 @@ import copy import logging import shutil -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from math import cos, sin from pathlib import Path from typing import Any, Dict, List, Tuple @@ -21,7 +21,6 @@ from freqtrade.configuration import TimeRange from freqtrade.constants import Config from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds -from freqtrade.freqai import freqai_util from freqtrade.strategy.interface import IStrategy @@ -84,16 +83,17 @@ class FreqaiDataKitchen: self.backtest_live_models = config.get("freqai_backtest_live_models", False) if not self.live: - self.full_path = freqai_util.get_full_models_path(self.config) - self.full_timerange = self.create_fulltimerange( - self.config["timerange"], self.freqai_config.get("train_period_days", 0) - ) + self.full_path = self.get_full_models_path(self.config) if self.backtest_live_models: - self.set_timerange_from_ready_models() - (self.training_timeranges, - self.backtesting_timeranges) = self.split_timerange_live_models() + if self.pair: + self.set_timerange_from_ready_models() + (self.training_timeranges, + self.backtesting_timeranges) = self.split_timerange_live_models() else: + self.full_timerange = self.create_fulltimerange( + self.config["timerange"], self.freqai_config.get("train_period_days", 0) + ) (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( self.full_timerange, config["freqai"]["train_period_days"], @@ -117,7 +117,7 @@ class FreqaiDataKitchen: :param metadata: dict = strategy furnished pair metadata :param trained_timestamp: int = timestamp of most recent training """ - self.full_path = freqai_util.get_full_models_path(self.config) + self.full_path = self.get_full_models_path(self.config) self.data_path = Path( self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}" @@ -1300,10 +1300,102 @@ class FreqaiDataKitchen: def set_timerange_from_ready_models(self): backtesting_timerange, \ assets_end_dates = ( - freqai_util.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) + self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path)) self.backtest_live_models_data = { "backtesting_timerange": backtesting_timerange, "assets_end_dates": assets_end_dates } return + + def get_full_models_path(self, config: Config) -> Path: + """ + Returns default FreqAI model path + :param config: Configuration dictionary + """ + freqai_config: Dict[str, Any] = config["freqai"] + return Path( + config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) + ) + + def get_timerange_and_assets_end_dates_from_ready_models( + self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: + """ + Returns timerange information based on a FreqAI model directory + :param models_path: FreqAI model path + + :return: a Tuple with (Timerange calculated from directory and + a Dict with pair and model end training dates info) + """ + all_models_end_dates = [] + assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models( + models_path) + for key in assets_end_dates: + for model_end_date in assets_end_dates[key]: + if model_end_date not in all_models_end_dates: + all_models_end_dates.append(model_end_date) + + if len(all_models_end_dates) == 0: + raise OperationalException( + 'At least 1 saved model is required to ' + 'run backtest with the freqai-backtest-live-models option' + ) + + if len(all_models_end_dates) == 1: + logger.warning( + "Only 1 model was found. Backtesting will run with the " + "timerange from the end of the training date to the current date" + ) + + finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) + if len(all_models_end_dates) > 1: + # After last model end date, use the same period from previous model + # to finish the backtest + all_models_end_dates.sort(reverse=True) + finish_timestamp = all_models_end_dates[0] + \ + (all_models_end_dates[0] - all_models_end_dates[1]) + + all_models_end_dates.append(finish_timestamp) + all_models_end_dates.sort() + start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) + end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), + timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) + + # add 1 day to string timerange to ensure BT module will load all dataframe data + end_date = end_date + timedelta(days=1) + backtesting_timerange = TimeRange( + 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) + ) + return backtesting_timerange, assets_end_dates + + def get_assets_timestamps_training_from_ready_models( + self, models_path: Path) -> Dict[str, Any]: + """ + Scan the models path and returns all assets end training dates (timestamp) + :param models_path: FreqAI model path + + :return: a Dict with asset and model end training dates info + """ + assets_end_dates: Dict[str, Any] = {} + if not models_path.is_dir(): + raise OperationalException( + 'Model folders not found. Saved models are required ' + 'to run backtest with the freqai-backtest-live-models option' + ) + for model_dir in models_path.iterdir(): + if str(model_dir.name).startswith("sub-train"): + model_end_date = int(model_dir.name.split("_")[1]) + asset = model_dir.name.split("_")[0].replace("sub-train-", "") + model_file_name = ( + f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" + "_model.joblib" + ) + + model_path_file = Path(model_dir / model_file_name) + if model_path_file.is_file(): + if asset not in assets_end_dates: + assets_end_dates[asset] = [] + assets_end_dates[asset].append(model_end_date) + + return assets_end_dates diff --git a/freqtrade/freqai/freqai_util.py b/freqtrade/freqai/freqai_util.py deleted file mode 100644 index 23df61335..000000000 --- a/freqtrade/freqai/freqai_util.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -FreqAI generic functions -""" -import logging -from datetime import datetime, timedelta, timezone -from pathlib import Path -from typing import Any, Dict, Tuple - -from freqtrade.configuration import TimeRange -from freqtrade.constants import Config -from freqtrade.exceptions import OperationalException - - -logger = logging.getLogger(__name__) - - -def get_full_models_path(config: Config) -> Path: - """ - Returns default FreqAI model path - :param config: Configuration dictionary - """ - freqai_config: Dict[str, Any] = config["freqai"] - return Path( - config["user_data_dir"] / "models" / str(freqai_config.get("identifier")) - ) - - -def get_timerange_and_assets_end_dates_from_ready_models( - models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]: - """ - Returns timerange information based on a FreqAI model directory - :param models_path: FreqAI model path - - :return: a Tuple with (Timerange calculated from directory and - a Dict with pair and model end training dates info) - """ - all_models_end_dates = [] - assets_end_dates: Dict[str, Any] = get_assets_timestamps_training_from_ready_models(models_path) - for key in assets_end_dates: - for model_end_date in assets_end_dates[key]: - if model_end_date not in all_models_end_dates: - all_models_end_dates.append(model_end_date) - - if len(all_models_end_dates) == 0: - raise OperationalException( - 'At least 1 saved model is required to ' - 'run backtest with the freqai-backtest-live-models option' - ) - - if len(all_models_end_dates) == 1: - logger.warning( - "Only 1 model was found. Backtesting will run with the " - "timerange from the end of the training date to the current date" - ) - - finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp()) - if len(all_models_end_dates) > 1: - # After last model end date, use the same period from previous model - # to finish the backtest - all_models_end_dates.sort(reverse=True) - finish_timestamp = all_models_end_dates[0] + \ - (all_models_end_dates[0] - all_models_end_dates[1]) - - all_models_end_dates.append(finish_timestamp) - all_models_end_dates.sort() - start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates), - timezone.utc).timetuple()[:3], tzinfo=timezone.utc)) - - # add 1 day to string timerange to ensure BT module will load all dataframe data - end_date = end_date + timedelta(days=1) - backtesting_timerange = TimeRange( - 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) - ) - return backtesting_timerange, assets_end_dates - - -def get_assets_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: - """ - Scan the models path and returns all assets end training dates (timestamp) - :param models_path: FreqAI model path - - :return: a Dict with asset and model end training dates info - """ - assets_end_dates: Dict[str, Any] = {} - if not models_path.is_dir(): - raise OperationalException( - 'Model folders not found. Saved models are required ' - 'to run backtest with the freqai-backtest-live-models option' - ) - for model_dir in models_path.iterdir(): - if str(model_dir.name).startswith("sub-train"): - model_end_date = int(model_dir.name.split("_")[1]) - asset = model_dir.name.split("_")[0].replace("sub-train-", "") - model_file_name = ( - f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" - "_model.joblib" - ) - - model_path_file = Path(model_dir / model_file_name) - if model_path_file.is_file(): - if asset not in assets_end_dates: - assets_end_dates[asset] = [] - assets_end_dates[asset].append(model_end_date) - - return assets_end_dates - - -def get_timerange_backtest_live_models(config: Config): - """ - Returns a formated timerange for backtest live/ready models - :param config: Configuration dictionary - - :return: a string timerange (format example: '20220801-20220822') - """ - models_path = get_full_models_path(config) - timerange, _ = get_timerange_and_assets_end_dates_from_ready_models(models_path) - start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) - end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) - tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" - return tr diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index 22bc1e06e..d3ba09592 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -191,3 +191,19 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen, fig.update_layout(title_text=f"Best and worst features by importance {pair}") label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path) + + +def get_timerange_backtest_live_models(config: Config): + """ + Returns a formated timerange for backtest live/ready models + :param config: Configuration dictionary + + :return: a string timerange (format example: '20220801-20220822') + """ + dk = FreqaiDataKitchen(config) + models_path = dk.get_full_models_path(config) + timerange, _ = dk.get_timerange_and_assets_end_dates_from_ready_models(models_path) + start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) + tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}" + return tr diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index e789ece79..91d60847e 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -135,8 +135,8 @@ class Backtesting: self.precision_mode = self.exchange.precisionMode if self.config.get('freqai_backtest_live_models', False): - from freqtrade.freqai import freqai_util - self.config['timerange'] = freqai_util.get_timerange_backtest_live_models(self.config) + from freqtrade.freqai import utils + self.config['timerange'] = utils.get_timerange_backtest_live_models(self.config) self.timerange = TimeRange.parse_timerange( None if self.config.get('timerange') is None else str(self.config.get('timerange'))) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index f60b29bf1..43acae7c3 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -1,13 +1,18 @@ import shutil from datetime import datetime, timedelta, timezone from pathlib import Path +from unittest.mock import MagicMock import pytest +from freqtrade.configuration import TimeRange +from freqtrade.data.dataprovider import DataProvider from freqtrade.exceptions import OperationalException -from tests.conftest import log_has_re -from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary, - make_unfiltered_dataframe) +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.utils import get_timerange_backtest_live_models +from tests.conftest import get_patched_exchange, log_has_re +from tests.freqai.conftest import (get_patched_data_kitchen, get_patched_freqai_strategy, + make_data_dictionary, make_unfiltered_dataframe) @pytest.mark.parametrize( @@ -158,3 +163,98 @@ def test_make_train_test_datasets(mocker, freqai_conf): assert data_dictionary assert len(data_dictionary) == 7 assert len(data_dictionary['train_features'].index) == 1916 + + +def test_get_pairs_timestamp_validation(mocker, freqai_conf): + exchange = get_patched_exchange(mocker, freqai_conf) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai_conf['freqai'].update({"identifier": "invalid_id"}) + model_path = freqai.dk.get_full_models_path(freqai_conf) + with pytest.raises( + OperationalException, + match=r'.*required to run backtest with the freqai-backtest-live-models.*' + ): + freqai.dk.get_assets_timestamps_training_from_ready_models(model_path) + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_timerange_from_ready_models(mocker, freqai_conf, model): + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180101-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180101-20180130") + + # 1516233600 (2018-01-18 00:00) - Start Training 1 + # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) + # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) + # 1516838400 (2018-01-25 00:00) - End Timerange + + new_timerange = TimeRange("date", "date", 1516233600, 1516406400) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + new_timerange = TimeRange("date", "date", 1516406400, 1516579200) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = freqai.dk.get_full_models_path(freqai_conf) + (backtesting_timerange, + pairs_end_dates) = freqai.dk.get_timerange_and_assets_end_dates_from_ready_models( + models_path=model_path) + + assert len(pairs_end_dates["ADA"]) == 2 + assert backtesting_timerange.startts == 1516406400 + assert backtesting_timerange.stopts == 1516838400 + + backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) + assert backtesting_string_timerange == '20180120-20180125' + + +@pytest.mark.parametrize('model', [ + 'LightGBMRegressor' + ]) +def test_get_full_model_path(mocker, freqai_conf, model): + freqai_conf.update({"freqaimodel": model}) + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model_path = freqai.dk.get_full_models_path(freqai_conf) + assert model_path.is_dir() is True diff --git a/tests/freqai/test_freqai_util.py b/tests/freqai/test_freqai_util.py deleted file mode 100644 index c548fd3f6..000000000 --- a/tests/freqai/test_freqai_util.py +++ /dev/null @@ -1,112 +0,0 @@ -import platform -from unittest.mock import MagicMock - -import pytest - -from freqtrade.configuration import TimeRange -from freqtrade.data.dataprovider import DataProvider -from freqtrade.exceptions import OperationalException -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.freqai_util import (get_assets_timestamps_training_from_ready_models, - get_full_models_path, - get_timerange_and_assets_end_dates_from_ready_models, - get_timerange_backtest_live_models) -from tests.conftest import get_patched_exchange -from tests.freqai.conftest import get_patched_freqai_strategy - - -def is_arm() -> bool: - machine = platform.machine() - return "arm" in machine or "aarch64" in machine - - -@pytest.mark.parametrize('model', [ - 'LightGBMRegressor' - ]) -def test_get_full_model_path(mocker, freqai_conf, model): - if is_arm() and model == 'CatboostRegressor': - pytest.skip("CatBoost is not supported on ARM") - - freqai_conf.update({"freqaimodel": model}) - freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_strat"}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - - freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180110-20180130") - new_timerange = TimeRange.parse_timerange("20180120-20180130") - - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - model_path = get_full_models_path(freqai_conf) - assert model_path.is_dir() is True - - -def test_get_pairs_timestamp_validation(mocker, freqai_conf): - model_path = get_full_models_path(freqai_conf) - with pytest.raises( - OperationalException, - match=r'.*required to run backtest with the freqai-backtest-live-models.*' - ): - get_assets_timestamps_training_from_ready_models(model_path) - - -@pytest.mark.parametrize('model', [ - 'LightGBMRegressor' - ]) -def test_get_timerange_from_ready_models(mocker, freqai_conf, model): - if is_arm() and model == 'CatboostRegressor': - pytest.skip("CatBoost is not supported on ARM") - - freqai_conf.update({"freqaimodel": model}) - freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_strat"}) - - strategy = get_patched_freqai_strategy(mocker, freqai_conf) - exchange = get_patched_exchange(mocker, freqai_conf) - strategy.dp = DataProvider(freqai_conf, exchange) - strategy.freqai_info = freqai_conf.get("freqai", {}) - freqai = strategy.freqai - freqai.live = True - freqai.dk = FreqaiDataKitchen(freqai_conf) - timerange = TimeRange.parse_timerange("20180101-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) - - freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180101-20180130") - - # 1516233600 (2018-01-18 00:00) - Start Training 1 - # 1516406400 (2018-01-20 00:00) - End Training 1 (Backtest slice 1) - # 1516579200 (2018-01-22 00:00) - End Training 2 (Backtest slice 2) - # 1516838400 (2018-01-25 00:00) - End Timerange - - new_timerange = TimeRange("date", "date", 1516233600, 1516406400) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - new_timerange = TimeRange("date", "date", 1516406400, 1516579200) - freqai.extract_data_and_train_model( - new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) - - model_path = get_full_models_path(freqai_conf) - (backtesting_timerange, - pairs_end_dates) = get_timerange_and_assets_end_dates_from_ready_models(models_path=model_path) - - assert len(pairs_end_dates["ADA"]) == 2 - assert backtesting_timerange.startts == 1516406400 - assert backtesting_timerange.stopts == 1516838400 - - backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf) - assert backtesting_string_timerange == '20180120-20180125'