From 16861db653ec8166f73fc8480894f186a137e7bd Mon Sep 17 00:00:00 2001 From: Rokas Kupstys Date: Thu, 6 Jan 2022 11:53:11 +0200 Subject: [PATCH] Implement previous backtest result reuse when config and strategy did not change. --- docs/backtesting.md | 5 ++ freqtrade/commands/arguments.py | 2 +- freqtrade/commands/cli_options.py | 5 ++ freqtrade/configuration/configuration.py | 3 ++ freqtrade/data/btanalysis.py | 67 +++++++++++++++++++++++- freqtrade/misc.py | 33 +++++++++++- freqtrade/optimize/backtesting.py | 51 +++++++++++++++--- freqtrade/optimize/optimize_reports.py | 19 ++++++- tests/optimize/test_optimize_reports.py | 10 ++-- 9 files changed, 179 insertions(+), 16 deletions(-) diff --git a/docs/backtesting.md b/docs/backtesting.md index 001941993..ee930db34 100644 --- a/docs/backtesting.md +++ b/docs/backtesting.md @@ -76,6 +76,7 @@ optional arguments: _today.json` --breakdown {day,week,month} [{day,week,month} ...] Show backtesting breakdown per [day, week, month]. + --no-cache Do not reuse cached backtest results. Common arguments: -v, --verbose Verbose mode (-vv for more, -vvv to get all messages). @@ -457,6 +458,10 @@ freqtrade backtesting --strategy MyAwesomeStrategy --breakdown day month The output will show a table containing the realized absolute Profit (in stake currency) for the given timeperiod, as well as wins, draws and losses that materialized (closed) on this day. +### Backtest result caching + +To save time, by default backtest will reuse a cached result when backtested strategy and config match that of previous backtest. To force a new backtest despite existing result for identical run specify `--no-cache` parameter. + ### Further backtest-result analysis To further analyze your backtest results, you can [export the trades](#exporting-trades-to-file). diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index 032f7dd51..119a45662 100644 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -24,7 +24,7 @@ ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv", ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions", "enable_protections", "dry_run_wallet", "timeframe_detail", "strategy_list", "export", "exportfilename", - "backtest_breakdown"] + "backtest_breakdown", "no_backtest_cache"] ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + ["hyperopt", "hyperopt_path", "position_stacking", "use_max_market_positions", diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index 6aa4ed363..0fb93f0b8 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -205,6 +205,11 @@ AVAILABLE_CLI_OPTIONS = { nargs='+', choices=constants.BACKTEST_BREAKDOWNS ), + "no_backtest_cache": Arg( + '--no-cache', + help='Do not reuse cached backtest results.', + action='store_true' + ), # Edge "stoploss_range": Arg( '--stoplosses', diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py index f5a674878..066097916 100644 --- a/freqtrade/configuration/configuration.py +++ b/freqtrade/configuration/configuration.py @@ -276,6 +276,9 @@ class Configuration: self._args_to_config(config, argname='backtest_breakdown', logstring='Parameter --breakdown detected ...') + self._args_to_config(config, argname='no_backtest_cache', + logstring='Parameter --no-cache detected ...') + self._args_to_config(config, argname='disableparamexport', logstring='Parameter --disableparamexport detected: {} ...') diff --git a/freqtrade/data/btanalysis.py b/freqtrade/data/btanalysis.py index 10dba8683..27ce8e0ba 100644 --- a/freqtrade/data/btanalysis.py +++ b/freqtrade/data/btanalysis.py @@ -2,6 +2,7 @@ Helpers when analyzing backtest data """ import logging +from copy import copy from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union @@ -10,7 +11,7 @@ import pandas as pd from freqtrade.constants import LAST_BT_RESULT_FN from freqtrade.exceptions import OperationalException -from freqtrade.misc import json_load +from freqtrade.misc import get_backtest_metadata_filename, json_load from freqtrade.persistence import LocalTrade, Trade, init_db @@ -102,6 +103,23 @@ def get_latest_hyperopt_file(directory: Union[Path, str], predef_filename: str = return directory / get_latest_hyperopt_filename(directory) +def load_backtest_metadata(filename: Union[Path, str]) -> Dict[str, Any]: + """ + Read metadata dictionary from backtest results file without reading and deserializing entire + file. + :param filename: path to backtest results file. + :return: metadata dict or None if metadata is not present. + """ + filename = get_backtest_metadata_filename(filename) + try: + with filename.open() as fp: + return json_load(fp) + except FileNotFoundError: + return {} + except Exception as e: + raise OperationalException('Unexpected error while loading backtest metadata.') from e + + def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]: """ Load backtest statistics file. @@ -118,9 +136,56 @@ def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]: with filename.open() as file: data = json_load(file) + # Legacy list format does not contain metadata. + if isinstance(data, dict): + data['metadata'] = load_backtest_metadata(filename) + return data +def find_existing_backtest_stats(dirname: Union[Path, str], + run_ids: Dict[str, str]) -> Dict[str, Any]: + """ + Find existing backtest stats that match specified run IDs and load them. + :param dirname: pathlib.Path object, or string pointing to the file. + :param run_ids: {strategy_name: id_string} dictionary. + :return: results dict. + """ + # Copy so we can modify this dict without affecting parent scope. + run_ids = copy(run_ids) + dirname = Path(dirname) + results: Dict[str, Any] = { + 'metadata': {}, + 'strategy': {}, + 'strategy_comparison': [], + } + + # Weird glob expression here avoids including .meta.json files. + for filename in reversed(sorted(dirname.glob('backtest-result-*-[0-9][0-9].json'))): + metadata = load_backtest_metadata(filename) + if not metadata: + # Files are sorted from newest to oldest. When file without metadata is encountered it + # is safe to assume older files will also not have any metadata. + break + + for strategy_name, run_id in list(run_ids.items()): + if metadata.get(strategy_name, {}).get('run_id') == run_id: + # TODO: load_backtest_stats() may load an old version of backtest which is + # incompatible with current version. + del run_ids[strategy_name] + bt_data = load_backtest_stats(filename) + for k in ('metadata', 'strategy'): + results[k][strategy_name] = bt_data[k][strategy_name] + comparison = bt_data['strategy_comparison'] + for i in range(len(comparison)): + if comparison[i]['key'] == strategy_name: + results['strategy_comparison'].append(comparison[i]) + break + if len(run_ids) == 0: + break + return results + + def load_backtest_data(filename: Union[Path, str], strategy: Optional[str] = None) -> pd.DataFrame: """ Load backtest data file. diff --git a/freqtrade/misc.py b/freqtrade/misc.py index 6f439866b..f09e5ee47 100644 --- a/freqtrade/misc.py +++ b/freqtrade/misc.py @@ -2,11 +2,13 @@ Various tool function for Freqtrade and scripts """ import gzip +import hashlib import logging import re +from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import Any, Iterator, List +from typing import Any, Iterator, List, Union from typing.io import IO from urllib.parse import urlparse @@ -228,3 +230,32 @@ def parse_db_uri_for_logging(uri: str): return uri pwd = parsed_db_uri.netloc.split(':')[1].split('@')[0] return parsed_db_uri.geturl().replace(f':{pwd}@', ':*****@') + + +def get_strategy_run_id(strategy) -> str: + """ + Generate unique identification hash for a backtest run. Identical config and strategy file will + always return an identical hash. + :param strategy: strategy object. + :return: hex string id. + """ + digest = hashlib.sha1() + config = deepcopy(strategy.config) + + # Options that have no impact on results of individual backtest. + not_important_keys = ('strategy_list', 'original_config', 'telegram', 'api_server') + for k in not_important_keys: + if k in config: + del config[k] + + digest.update(rapidjson.dumps(config, default=str, + number_mode=rapidjson.NM_NATIVE).encode('utf-8')) + with open(strategy.__file__, 'rb') as fp: + digest.update(fp.read()) + return digest.hexdigest().lower() + + +def get_backtest_metadata_filename(filename: Union[Path, str]) -> Path: + """Return metadata filename for specified backtest results file.""" + filename = Path(filename) + return filename.parent / Path(f'{filename.stem}.meta{filename.suffix}') diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 754b46d81..950531637 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -14,12 +14,13 @@ from pandas import DataFrame from freqtrade.configuration import TimeRange, validate_config_consistency from freqtrade.constants import DATETIME_PRINT_FORMAT from freqtrade.data import history -from freqtrade.data.btanalysis import trade_list_to_dataframe +from freqtrade.data.btanalysis import find_existing_backtest_stats, trade_list_to_dataframe from freqtrade.data.converter import trim_dataframe, trim_dataframes from freqtrade.data.dataprovider import DataProvider from freqtrade.enums import BacktestState, SellType from freqtrade.exceptions import DependencyException, OperationalException from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds +from freqtrade.misc import get_strategy_run_id from freqtrade.mixins import LoggingMixin from freqtrade.optimize.bt_progress import BTProgress from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results, @@ -60,7 +61,7 @@ class Backtesting: LoggingMixin.show_output = False self.config = config - self.results: Optional[Dict[str, Any]] = None + self.results: Dict[str, Any] = {} config['dry_run'] = True self.strategylist: List[IStrategy] = [] @@ -727,6 +728,7 @@ class Backtesting: ) backtest_end_time = datetime.now(timezone.utc) results.update({ + 'run_id': get_strategy_run_id(strat), 'backtest_start_time': int(backtest_start_time.timestamp()), 'backtest_end_time': int(backtest_end_time.timestamp()), }) @@ -745,15 +747,50 @@ class Backtesting: self.load_bt_data_detail() logger.info("Dataload complete. Calculating indicators") - for strat in self.strategylist: - min_date, max_date = self.backtest_one_strategy(strat, data, timerange) - if len(self.strategylist) > 0: + run_ids = { + strategy.get_strategy_name(): get_strategy_run_id(strategy) + for strategy in self.strategylist + } - self.results = generate_backtest_stats(data, self.all_results, - min_date=min_date, max_date=max_date) + # Load previous result that will be updated incrementally. + if self.config.get('timerange', '-').endswith('-'): + self.config['no_backtest_cache'] = True + logger.warning('Backtest result caching disabled due to use of open-ended timerange.') + + if not self.config.get('no_backtest_cache', False): + self.results = find_existing_backtest_stats( + self.config['user_data_dir'] / 'backtest_results', run_ids) + + for strat in self.strategylist: + if self.results and strat.get_strategy_name() in self.results['strategy']: + # When previous result hash matches - reuse that result and skip backtesting. + logger.info(f'Reusing result of previous backtest for {strat.get_strategy_name()}') + continue + min_date, max_date = self.backtest_one_strategy(strat, data, timerange) + + # Update old results with new ones. + if len(self.all_results) > 0: + results = generate_backtest_stats( + data, self.all_results, min_date=min_date, max_date=max_date) + if self.results: + self.results['metadata'].update(results['metadata']) + self.results['strategy'].update(results['strategy']) + self.results['strategy_comparison'].extend(results['strategy_comparison']) + else: + self.results = results if self.config.get('export', 'none') == 'trades': store_backtest_stats(self.config['exportfilename'], self.results) + # Results may be mixed up now. Sort them so they follow --strategy-list order. + if 'strategy_list' in self.config and len(self.results) > 0: + self.results['strategy_comparison'] = sorted( + self.results['strategy_comparison'], + key=lambda c: self.config['strategy_list'].index(c['key'])) + self.results['strategy'] = dict( + sorted(self.results['strategy'].items(), + key=lambda kv: self.config['strategy_list'].index(kv[0]))) + + if len(self.strategylist) > 0: # Show backtest results show_backtest_results(self.config, self.results) diff --git a/freqtrade/optimize/optimize_reports.py b/freqtrade/optimize/optimize_reports.py index d0ffe49a9..46930d7b1 100644 --- a/freqtrade/optimize/optimize_reports.py +++ b/freqtrade/optimize/optimize_reports.py @@ -11,7 +11,8 @@ from tabulate import tabulate from freqtrade.constants import DATETIME_PRINT_FORMAT, LAST_BT_RESULT_FN, UNLIMITED_STAKE_AMOUNT from freqtrade.data.btanalysis import (calculate_csum, calculate_market_change, calculate_max_drawdown) -from freqtrade.misc import decimals_per_coin, file_dump_json, round_coin_value +from freqtrade.misc import (decimals_per_coin, file_dump_json, get_backtest_metadata_filename, + round_coin_value) logger = logging.getLogger(__name__) @@ -33,6 +34,11 @@ def store_backtest_stats(recordfilename: Path, stats: Dict[str, DataFrame]) -> N recordfilename.parent, f'{recordfilename.stem}-{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}' ).with_suffix(recordfilename.suffix) + + # Store metadata separately. + file_dump_json(get_backtest_metadata_filename(filename), stats['metadata']) + del stats['metadata'] + file_dump_json(filename, stats) latest_filename = Path.joinpath(filename.parent, LAST_BT_RESULT_FN) @@ -509,16 +515,25 @@ def generate_backtest_stats(btdata: Dict[str, DataFrame], :param max_date: Backtest end date :return: Dictionary containing results per strategy and a strategy summary. """ - result: Dict[str, Any] = {'strategy': {}} + result: Dict[str, Any] = { + 'metadata': {}, + 'strategy': {}, + 'strategy_comparison': [], + } market_change = calculate_market_change(btdata, 'close') + metadata = {} pairlist = list(btdata.keys()) for strategy, content in all_results.items(): strat_stats = generate_strategy_stats(pairlist, strategy, content, min_date, max_date, market_change=market_change) + metadata[strategy] = { + 'run_id': content['run_id'] + } result['strategy'][strategy] = strat_stats strategy_results = generate_strategy_comparison(bt_stats=result['strategy']) + result['metadata'] = metadata result['strategy_comparison'] = strategy_results return result diff --git a/tests/optimize/test_optimize_reports.py b/tests/optimize/test_optimize_reports.py index ed939d6b0..68257f4d8 100644 --- a/tests/optimize/test_optimize_reports.py +++ b/tests/optimize/test_optimize_reports.py @@ -84,6 +84,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir): 'rejected_signals': 20, 'backtest_start_time': Arrow.utcnow().int_timestamp, 'backtest_end_time': Arrow.utcnow().int_timestamp, + 'run_id': '123', } } timerange = TimeRange.parse_timerange('1510688220-1510700340') @@ -132,6 +133,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir): 'rejected_signals': 20, 'backtest_start_time': Arrow.utcnow().int_timestamp, 'backtest_end_time': Arrow.utcnow().int_timestamp, + 'run_id': '124', } } @@ -178,16 +180,16 @@ def test_store_backtest_stats(testdatadir, mocker): dump_mock = mocker.patch('freqtrade.optimize.optimize_reports.file_dump_json') - store_backtest_stats(testdatadir, {}) + store_backtest_stats(testdatadir, {'metadata': {}}) - assert dump_mock.call_count == 2 + assert dump_mock.call_count == 3 assert isinstance(dump_mock.call_args_list[0][0][0], Path) assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir/'backtest-result')) dump_mock.reset_mock() filename = testdatadir / 'testresult.json' - store_backtest_stats(filename, {}) - assert dump_mock.call_count == 2 + store_backtest_stats(filename, {'metadata': {}}) + assert dump_mock.call_count == 3 assert isinstance(dump_mock.call_args_list[0][0][0], Path) # result will be testdatadir / testresult-.json assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir / 'testresult'))