Implement previous backtest result reuse when config and strategy did not change.

2022-01-06 11:53:11 +02:00 · 2022-01-06 11:53:11 +02:00 · 16861db653
commit 16861db653
parent 6684bff963
9 changed files with 179 additions and 16 deletions
--- a/docs/backtesting.md
+++ b/docs/backtesting.md
@ -76,6 +76,7 @@ optional arguments:
                        _today.json`
  --breakdown {day,week,month} [{day,week,month} ...]
                        Show backtesting breakdown per [day, week, month].
  --no-cache            Do not reuse cached backtest results.
 Common arguments:
  -v, --verbose         Verbose mode (-vv for more, -vvv to get all messages).
@ -457,6 +458,10 @@ freqtrade backtesting --strategy MyAwesomeStrategy --breakdown day month
 The output will show a table containing the realized absolute Profit (in stake currency) for the given timeperiod, as well as wins, draws and losses that materialized (closed) on this day.
 ### Backtest result caching
 To save time, by default backtest will reuse a cached result when backtested strategy and config match that of previous backtest. To force a new backtest despite existing result for identical run specify `--no-cache` parameter.
 ### Further backtest-result analysis
 To further analyze your backtest results, you can [export the trades](#exporting-trades-to-file).
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@ -24,7 +24,7 @@ ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv",
 ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions",
                                        "enable_protections", "dry_run_wallet", "timeframe_detail",
                                        "strategy_list", "export", "exportfilename",
-                                        "backtest_breakdown"]
+                                        "backtest_breakdown", "no_backtest_cache"]
 ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + ["hyperopt", "hyperopt_path",
                                        "position_stacking", "use_max_market_positions",
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@ -205,6 +205,11 @@ AVAILABLE_CLI_OPTIONS = {
        nargs='+',
        choices=constants.BACKTEST_BREAKDOWNS
    ),
    "no_backtest_cache": Arg(
        '--no-cache',
        help='Do not reuse cached backtest results.',
        action='store_true'
    ),
    # Edge
    "stoploss_range": Arg(
        '--stoplosses',
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@ -276,6 +276,9 @@ class Configuration:
        self._args_to_config(config, argname='backtest_breakdown',
                             logstring='Parameter --breakdown detected ...')
        self._args_to_config(config, argname='no_backtest_cache',
                             logstring='Parameter --no-cache detected ...')
        self._args_to_config(config, argname='disableparamexport',
                             logstring='Parameter --disableparamexport detected: {} ...')
--- a/freqtrade/data/btanalysis.py
+++ b/freqtrade/data/btanalysis.py
@ -2,6 +2,7 @@
 Helpers when analyzing backtest data
 """
 import logging
 from copy import copy
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
@ -10,7 +11,7 @@ import pandas as pd
 from freqtrade.constants import LAST_BT_RESULT_FN
 from freqtrade.exceptions import OperationalException
-from freqtrade.misc import json_load
+from freqtrade.misc import get_backtest_metadata_filename, json_load
 from freqtrade.persistence import LocalTrade, Trade, init_db
@ -102,6 +103,23 @@ def get_latest_hyperopt_file(directory: Union[Path, str], predef_filename: str =
    return directory / get_latest_hyperopt_filename(directory)
 def load_backtest_metadata(filename: Union[Path, str]) -> Dict[str, Any]:
    """
    Read metadata dictionary from backtest results file without reading and deserializing entire
    file.
    :param filename: path to backtest results file.
    :return: metadata dict or None if metadata is not present.
    """
    filename = get_backtest_metadata_filename(filename)
    try:
        with filename.open() as fp:
            return json_load(fp)
    except FileNotFoundError:
        return {}
    except Exception as e:
        raise OperationalException('Unexpected error while loading backtest metadata.') from e
 def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]:
    """
    Load backtest statistics file.
@ -118,9 +136,56 @@ def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]:
    with filename.open() as file:
        data = json_load(file)
    # Legacy list format does not contain metadata.
    if isinstance(data, dict):
        data['metadata'] = load_backtest_metadata(filename)
    return data
 def find_existing_backtest_stats(dirname: Union[Path, str],
                                 run_ids: Dict[str, str]) -> Dict[str, Any]:
    """
    Find existing backtest stats that match specified run IDs and load them.
    :param dirname: pathlib.Path object, or string pointing to the file.
    :param run_ids: {strategy_name: id_string} dictionary.
    :return: results dict.
    """
    # Copy so we can modify this dict without affecting parent scope.
    run_ids = copy(run_ids)
    dirname = Path(dirname)
    results: Dict[str, Any] = {
        'metadata': {},
        'strategy': {},
        'strategy_comparison': [],
    }
    # Weird glob expression here avoids including .meta.json files.
    for filename in reversed(sorted(dirname.glob('backtest-result-*-[0-9][0-9].json'))):
        metadata = load_backtest_metadata(filename)
        if not metadata:
            # Files are sorted from newest to oldest. When file without metadata is encountered it
            # is safe to assume older files will also not have any metadata.
            break
        for strategy_name, run_id in list(run_ids.items()):
            if metadata.get(strategy_name, {}).get('run_id') == run_id:
                # TODO: load_backtest_stats() may load an old version of backtest which is
                #  incompatible with current version.
                del run_ids[strategy_name]
                bt_data = load_backtest_stats(filename)
                for k in ('metadata', 'strategy'):
                    results[k][strategy_name] = bt_data[k][strategy_name]
                comparison = bt_data['strategy_comparison']
                for i in range(len(comparison)):
                    if comparison[i]['key'] == strategy_name:
                        results['strategy_comparison'].append(comparison[i])
                        break
        if len(run_ids) == 0:
            break
    return results
 def load_backtest_data(filename: Union[Path, str], strategy: Optional[str] = None) -> pd.DataFrame:
    """
    Load backtest data file.
--- a/freqtrade/misc.py
+++ b/freqtrade/misc.py
@ -2,11 +2,13 @@
 Various tool function for Freqtrade and scripts
 """
 import gzip
 import hashlib
 import logging
 import re
 from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Iterator, List
+from typing import Any, Iterator, List, Union
 from typing.io import IO
 from urllib.parse import urlparse
@ -228,3 +230,32 @@ def parse_db_uri_for_logging(uri: str):
        return uri
    pwd = parsed_db_uri.netloc.split(':')[1].split('@')[0]
    return parsed_db_uri.geturl().replace(f':{pwd}@', ':*****@')
 def get_strategy_run_id(strategy) -> str:
    """
    Generate unique identification hash for a backtest run. Identical config and strategy file will
    always return an identical hash.
    :param strategy: strategy object.
    :return: hex string id.
    """
    digest = hashlib.sha1()
    config = deepcopy(strategy.config)
    # Options that have no impact on results of individual backtest.
    not_important_keys = ('strategy_list', 'original_config', 'telegram', 'api_server')
    for k in not_important_keys:
        if k in config:
            del config[k]
    digest.update(rapidjson.dumps(config, default=str,
                                  number_mode=rapidjson.NM_NATIVE).encode('utf-8'))
    with open(strategy.__file__, 'rb') as fp:
        digest.update(fp.read())
    return digest.hexdigest().lower()
 def get_backtest_metadata_filename(filename: Union[Path, str]) -> Path:
    """Return metadata filename for specified backtest results file."""
    filename = Path(filename)
    return filename.parent / Path(f'{filename.stem}.meta{filename.suffix}')
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@ -14,12 +14,13 @@ from pandas import DataFrame
 from freqtrade.configuration import TimeRange, validate_config_consistency
 from freqtrade.constants import DATETIME_PRINT_FORMAT
 from freqtrade.data import history
-from freqtrade.data.btanalysis import trade_list_to_dataframe
+from freqtrade.data.btanalysis import find_existing_backtest_stats, trade_list_to_dataframe
 from freqtrade.data.converter import trim_dataframe, trim_dataframes
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.enums import BacktestState, SellType
 from freqtrade.exceptions import DependencyException, OperationalException
 from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
 from freqtrade.misc import get_strategy_run_id
 from freqtrade.mixins import LoggingMixin
 from freqtrade.optimize.bt_progress import BTProgress
 from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
@ -60,7 +61,7 @@ class Backtesting:
        LoggingMixin.show_output = False
        self.config = config
-        self.results: Optional[Dict[str, Any]] = None
+        self.results: Dict[str, Any] = {}
        config['dry_run'] = True
        self.strategylist: List[IStrategy] = []
@ -727,6 +728,7 @@ class Backtesting:
        )
        backtest_end_time = datetime.now(timezone.utc)
        results.update({
            'run_id': get_strategy_run_id(strat),
            'backtest_start_time': int(backtest_start_time.timestamp()),
            'backtest_end_time': int(backtest_end_time.timestamp()),
        })
@ -745,15 +747,50 @@ class Backtesting:
        self.load_bt_data_detail()
        logger.info("Dataload complete. Calculating indicators")
-        for strat in self.strategylist:
+        run_ids = {
-            min_date, max_date = self.backtest_one_strategy(strat, data, timerange)
+            strategy.get_strategy_name(): get_strategy_run_id(strategy)
-        if len(self.strategylist) > 0:
+            for strategy in self.strategylist
        }
-            self.results = generate_backtest_stats(data, self.all_results,
+        # Load previous result that will be updated incrementally.
-                                                   min_date=min_date, max_date=max_date)
+        if self.config.get('timerange', '-').endswith('-'):
            self.config['no_backtest_cache'] = True
            logger.warning('Backtest result caching disabled due to use of open-ended timerange.')
        if not self.config.get('no_backtest_cache', False):
            self.results = find_existing_backtest_stats(
                self.config['user_data_dir'] / 'backtest_results', run_ids)
        for strat in self.strategylist:
            if self.results and strat.get_strategy_name() in self.results['strategy']:
                # When previous result hash matches - reuse that result and skip backtesting.
                logger.info(f'Reusing result of previous backtest for {strat.get_strategy_name()}')
                continue
            min_date, max_date = self.backtest_one_strategy(strat, data, timerange)
        # Update old results with new ones.
        if len(self.all_results) > 0:
            results = generate_backtest_stats(
                data, self.all_results, min_date=min_date, max_date=max_date)
            if self.results:
                self.results['metadata'].update(results['metadata'])
                self.results['strategy'].update(results['strategy'])
                self.results['strategy_comparison'].extend(results['strategy_comparison'])
            else:
                self.results = results
            if self.config.get('export', 'none') == 'trades':
                store_backtest_stats(self.config['exportfilename'], self.results)
        # Results may be mixed up now. Sort them so they follow --strategy-list order.
        if 'strategy_list' in self.config and len(self.results) > 0:
            self.results['strategy_comparison'] = sorted(
                self.results['strategy_comparison'],
                key=lambda c: self.config['strategy_list'].index(c['key']))
            self.results['strategy'] = dict(
                sorted(self.results['strategy'].items(),
                       key=lambda kv: self.config['strategy_list'].index(kv[0])))
        if len(self.strategylist) > 0:
            # Show backtest results
            show_backtest_results(self.config, self.results)
--- a/freqtrade/optimize/optimize_reports.py
+++ b/freqtrade/optimize/optimize_reports.py
@ -11,7 +11,8 @@ from tabulate import tabulate
 from freqtrade.constants import DATETIME_PRINT_FORMAT, LAST_BT_RESULT_FN, UNLIMITED_STAKE_AMOUNT
 from freqtrade.data.btanalysis import (calculate_csum, calculate_market_change,
                                       calculate_max_drawdown)
-from freqtrade.misc import decimals_per_coin, file_dump_json, round_coin_value
+from freqtrade.misc import (decimals_per_coin, file_dump_json, get_backtest_metadata_filename,
                            round_coin_value)
 logger = logging.getLogger(__name__)
@ -33,6 +34,11 @@ def store_backtest_stats(recordfilename: Path, stats: Dict[str, DataFrame]) -> N
            recordfilename.parent,
            f'{recordfilename.stem}-{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
        ).with_suffix(recordfilename.suffix)
    # Store metadata separately.
    file_dump_json(get_backtest_metadata_filename(filename), stats['metadata'])
    del stats['metadata']
    file_dump_json(filename, stats)
    latest_filename = Path.joinpath(filename.parent, LAST_BT_RESULT_FN)
@ -509,16 +515,25 @@ def generate_backtest_stats(btdata: Dict[str, DataFrame],
    :param max_date: Backtest end date
    :return: Dictionary containing results per strategy and a strategy summary.
    """
-    result: Dict[str, Any] = {'strategy': {}}
+    result: Dict[str, Any] = {
        'metadata': {},
        'strategy': {},
        'strategy_comparison': [],
    }
    market_change = calculate_market_change(btdata, 'close')
    metadata = {}
    pairlist = list(btdata.keys())
    for strategy, content in all_results.items():
        strat_stats = generate_strategy_stats(pairlist, strategy, content,
                                              min_date, max_date, market_change=market_change)
        metadata[strategy] = {
            'run_id': content['run_id']
        }
        result['strategy'][strategy] = strat_stats
    strategy_results = generate_strategy_comparison(bt_stats=result['strategy'])
    result['metadata'] = metadata
    result['strategy_comparison'] = strategy_results
    return result
--- a/tests/optimize/test_optimize_reports.py
+++ b/tests/optimize/test_optimize_reports.py
@ -84,6 +84,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir):
        'rejected_signals': 20,
        'backtest_start_time': Arrow.utcnow().int_timestamp,
        'backtest_end_time': Arrow.utcnow().int_timestamp,
        'run_id': '123',
        }
        }
    timerange = TimeRange.parse_timerange('1510688220-1510700340')
@ -132,6 +133,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir):
        'rejected_signals': 20,
        'backtest_start_time': Arrow.utcnow().int_timestamp,
        'backtest_end_time': Arrow.utcnow().int_timestamp,
        'run_id': '124',
        }
    }
@ -178,16 +180,16 @@ def test_store_backtest_stats(testdatadir, mocker):
    dump_mock = mocker.patch('freqtrade.optimize.optimize_reports.file_dump_json')
-    store_backtest_stats(testdatadir, {})
+    store_backtest_stats(testdatadir, {'metadata': {}})
-    assert dump_mock.call_count == 2
+    assert dump_mock.call_count == 3
    assert isinstance(dump_mock.call_args_list[0][0][0], Path)
    assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir/'backtest-result'))
    dump_mock.reset_mock()
    filename = testdatadir / 'testresult.json'
-    store_backtest_stats(filename, {})
+    store_backtest_stats(filename, {'metadata': {}})
-    assert dump_mock.call_count == 2
+    assert dump_mock.call_count == 3
    assert isinstance(dump_mock.call_args_list[0][0][0], Path)
    # result will be testdatadir / testresult-<timestamp>.json
    assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir / 'testresult'))