From 16861db653ec8166f73fc8480894f186a137e7bd Mon Sep 17 00:00:00 2001
From: Rokas Kupstys <rokups@zoho.com>
Date: Thu, 6 Jan 2022 11:53:11 +0200
Subject: [PATCH] Implement previous backtest result reuse when config and
 strategy did not change.

---
 docs/backtesting.md                      |  5 ++
 freqtrade/commands/arguments.py          |  2 +-
 freqtrade/commands/cli_options.py        |  5 ++
 freqtrade/configuration/configuration.py |  3 ++
 freqtrade/data/btanalysis.py             | 67 +++++++++++++++++++++++-
 freqtrade/misc.py                        | 33 +++++++++++-
 freqtrade/optimize/backtesting.py        | 51 +++++++++++++++---
 freqtrade/optimize/optimize_reports.py   | 19 ++++++-
 tests/optimize/test_optimize_reports.py  | 10 ++--
 9 files changed, 179 insertions(+), 16 deletions(-)

diff --git a/docs/backtesting.md b/docs/backtesting.md
index 001941993..ee930db34 100644
--- a/docs/backtesting.md
+++ b/docs/backtesting.md
@@ -76,6 +76,7 @@ optional arguments:
                         _today.json`
   --breakdown {day,week,month} [{day,week,month} ...]
                         Show backtesting breakdown per [day, week, month].
+  --no-cache            Do not reuse cached backtest results.
 
 Common arguments:
   -v, --verbose         Verbose mode (-vv for more, -vvv to get all messages).
@@ -457,6 +458,10 @@ freqtrade backtesting --strategy MyAwesomeStrategy --breakdown day month
 
 The output will show a table containing the realized absolute Profit (in stake currency) for the given timeperiod, as well as wins, draws and losses that materialized (closed) on this day.
 
+### Backtest result caching
+
+To save time, by default backtest will reuse a cached result when backtested strategy and config match that of previous backtest. To force a new backtest despite existing result for identical run specify `--no-cache` parameter.
+
 ### Further backtest-result analysis
 
 To further analyze your backtest results, you can [export the trades](#exporting-trades-to-file).
diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index 032f7dd51..119a45662 100644
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -24,7 +24,7 @@ ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv",
 ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions",
                                         "enable_protections", "dry_run_wallet", "timeframe_detail",
                                         "strategy_list", "export", "exportfilename",
-                                        "backtest_breakdown"]
+                                        "backtest_breakdown", "no_backtest_cache"]
 
 ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + ["hyperopt", "hyperopt_path",
                                         "position_stacking", "use_max_market_positions",
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index 6aa4ed363..0fb93f0b8 100644
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -205,6 +205,11 @@ AVAILABLE_CLI_OPTIONS = {
         nargs='+',
         choices=constants.BACKTEST_BREAKDOWNS
     ),
+    "no_backtest_cache": Arg(
+        '--no-cache',
+        help='Do not reuse cached backtest results.',
+        action='store_true'
+    ),
     # Edge
     "stoploss_range": Arg(
         '--stoplosses',
diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index f5a674878..066097916 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -276,6 +276,9 @@ class Configuration:
         self._args_to_config(config, argname='backtest_breakdown',
                              logstring='Parameter --breakdown detected ...')
 
+        self._args_to_config(config, argname='no_backtest_cache',
+                             logstring='Parameter --no-cache detected ...')
+
         self._args_to_config(config, argname='disableparamexport',
                              logstring='Parameter --disableparamexport detected: {} ...')
 
diff --git a/freqtrade/data/btanalysis.py b/freqtrade/data/btanalysis.py
index 10dba8683..27ce8e0ba 100644
--- a/freqtrade/data/btanalysis.py
+++ b/freqtrade/data/btanalysis.py
@@ -2,6 +2,7 @@
 Helpers when analyzing backtest data
 """
 import logging
+from copy import copy
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -10,7 +11,7 @@ import pandas as pd
 
 from freqtrade.constants import LAST_BT_RESULT_FN
 from freqtrade.exceptions import OperationalException
-from freqtrade.misc import json_load
+from freqtrade.misc import get_backtest_metadata_filename, json_load
 from freqtrade.persistence import LocalTrade, Trade, init_db
 
 
@@ -102,6 +103,23 @@ def get_latest_hyperopt_file(directory: Union[Path, str], predef_filename: str =
     return directory / get_latest_hyperopt_filename(directory)
 
 
+def load_backtest_metadata(filename: Union[Path, str]) -> Dict[str, Any]:
+    """
+    Read metadata dictionary from backtest results file without reading and deserializing entire
+    file.
+    :param filename: path to backtest results file.
+    :return: metadata dict or None if metadata is not present.
+    """
+    filename = get_backtest_metadata_filename(filename)
+    try:
+        with filename.open() as fp:
+            return json_load(fp)
+    except FileNotFoundError:
+        return {}
+    except Exception as e:
+        raise OperationalException('Unexpected error while loading backtest metadata.') from e
+
+
 def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]:
     """
     Load backtest statistics file.
@@ -118,9 +136,56 @@ def load_backtest_stats(filename: Union[Path, str]) -> Dict[str, Any]:
     with filename.open() as file:
         data = json_load(file)
 
+    # Legacy list format does not contain metadata.
+    if isinstance(data, dict):
+        data['metadata'] = load_backtest_metadata(filename)
+
     return data
 
 
+def find_existing_backtest_stats(dirname: Union[Path, str],
+                                 run_ids: Dict[str, str]) -> Dict[str, Any]:
+    """
+    Find existing backtest stats that match specified run IDs and load them.
+    :param dirname: pathlib.Path object, or string pointing to the file.
+    :param run_ids: {strategy_name: id_string} dictionary.
+    :return: results dict.
+    """
+    # Copy so we can modify this dict without affecting parent scope.
+    run_ids = copy(run_ids)
+    dirname = Path(dirname)
+    results: Dict[str, Any] = {
+        'metadata': {},
+        'strategy': {},
+        'strategy_comparison': [],
+    }
+
+    # Weird glob expression here avoids including .meta.json files.
+    for filename in reversed(sorted(dirname.glob('backtest-result-*-[0-9][0-9].json'))):
+        metadata = load_backtest_metadata(filename)
+        if not metadata:
+            # Files are sorted from newest to oldest. When file without metadata is encountered it
+            # is safe to assume older files will also not have any metadata.
+            break
+
+        for strategy_name, run_id in list(run_ids.items()):
+            if metadata.get(strategy_name, {}).get('run_id') == run_id:
+                # TODO: load_backtest_stats() may load an old version of backtest which is
+                #  incompatible with current version.
+                del run_ids[strategy_name]
+                bt_data = load_backtest_stats(filename)
+                for k in ('metadata', 'strategy'):
+                    results[k][strategy_name] = bt_data[k][strategy_name]
+                comparison = bt_data['strategy_comparison']
+                for i in range(len(comparison)):
+                    if comparison[i]['key'] == strategy_name:
+                        results['strategy_comparison'].append(comparison[i])
+                        break
+        if len(run_ids) == 0:
+            break
+    return results
+
+
 def load_backtest_data(filename: Union[Path, str], strategy: Optional[str] = None) -> pd.DataFrame:
     """
     Load backtest data file.
diff --git a/freqtrade/misc.py b/freqtrade/misc.py
index 6f439866b..f09e5ee47 100644
--- a/freqtrade/misc.py
+++ b/freqtrade/misc.py
@@ -2,11 +2,13 @@
 Various tool function for Freqtrade and scripts
 """
 import gzip
+import hashlib
 import logging
 import re
+from copy import deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Iterator, List
+from typing import Any, Iterator, List, Union
 from typing.io import IO
 from urllib.parse import urlparse
 
@@ -228,3 +230,32 @@ def parse_db_uri_for_logging(uri: str):
         return uri
     pwd = parsed_db_uri.netloc.split(':')[1].split('@')[0]
     return parsed_db_uri.geturl().replace(f':{pwd}@', ':*****@')
+
+
+def get_strategy_run_id(strategy) -> str:
+    """
+    Generate unique identification hash for a backtest run. Identical config and strategy file will
+    always return an identical hash.
+    :param strategy: strategy object.
+    :return: hex string id.
+    """
+    digest = hashlib.sha1()
+    config = deepcopy(strategy.config)
+
+    # Options that have no impact on results of individual backtest.
+    not_important_keys = ('strategy_list', 'original_config', 'telegram', 'api_server')
+    for k in not_important_keys:
+        if k in config:
+            del config[k]
+
+    digest.update(rapidjson.dumps(config, default=str,
+                                  number_mode=rapidjson.NM_NATIVE).encode('utf-8'))
+    with open(strategy.__file__, 'rb') as fp:
+        digest.update(fp.read())
+    return digest.hexdigest().lower()
+
+
+def get_backtest_metadata_filename(filename: Union[Path, str]) -> Path:
+    """Return metadata filename for specified backtest results file."""
+    filename = Path(filename)
+    return filename.parent / Path(f'{filename.stem}.meta{filename.suffix}')
diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py
index 754b46d81..950531637 100644
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -14,12 +14,13 @@ from pandas import DataFrame
 from freqtrade.configuration import TimeRange, validate_config_consistency
 from freqtrade.constants import DATETIME_PRINT_FORMAT
 from freqtrade.data import history
-from freqtrade.data.btanalysis import trade_list_to_dataframe
+from freqtrade.data.btanalysis import find_existing_backtest_stats, trade_list_to_dataframe
 from freqtrade.data.converter import trim_dataframe, trim_dataframes
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.enums import BacktestState, SellType
 from freqtrade.exceptions import DependencyException, OperationalException
 from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
+from freqtrade.misc import get_strategy_run_id
 from freqtrade.mixins import LoggingMixin
 from freqtrade.optimize.bt_progress import BTProgress
 from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
@@ -60,7 +61,7 @@ class Backtesting:
 
         LoggingMixin.show_output = False
         self.config = config
-        self.results: Optional[Dict[str, Any]] = None
+        self.results: Dict[str, Any] = {}
 
         config['dry_run'] = True
         self.strategylist: List[IStrategy] = []
@@ -727,6 +728,7 @@ class Backtesting:
         )
         backtest_end_time = datetime.now(timezone.utc)
         results.update({
+            'run_id': get_strategy_run_id(strat),
             'backtest_start_time': int(backtest_start_time.timestamp()),
             'backtest_end_time': int(backtest_end_time.timestamp()),
         })
@@ -745,15 +747,50 @@ class Backtesting:
         self.load_bt_data_detail()
         logger.info("Dataload complete. Calculating indicators")
 
-        for strat in self.strategylist:
-            min_date, max_date = self.backtest_one_strategy(strat, data, timerange)
-        if len(self.strategylist) > 0:
+        run_ids = {
+            strategy.get_strategy_name(): get_strategy_run_id(strategy)
+            for strategy in self.strategylist
+        }
 
-            self.results = generate_backtest_stats(data, self.all_results,
-                                                   min_date=min_date, max_date=max_date)
+        # Load previous result that will be updated incrementally.
+        if self.config.get('timerange', '-').endswith('-'):
+            self.config['no_backtest_cache'] = True
+            logger.warning('Backtest result caching disabled due to use of open-ended timerange.')
+
+        if not self.config.get('no_backtest_cache', False):
+            self.results = find_existing_backtest_stats(
+                self.config['user_data_dir'] / 'backtest_results', run_ids)
+
+        for strat in self.strategylist:
+            if self.results and strat.get_strategy_name() in self.results['strategy']:
+                # When previous result hash matches - reuse that result and skip backtesting.
+                logger.info(f'Reusing result of previous backtest for {strat.get_strategy_name()}')
+                continue
+            min_date, max_date = self.backtest_one_strategy(strat, data, timerange)
+
+        # Update old results with new ones.
+        if len(self.all_results) > 0:
+            results = generate_backtest_stats(
+                data, self.all_results, min_date=min_date, max_date=max_date)
+            if self.results:
+                self.results['metadata'].update(results['metadata'])
+                self.results['strategy'].update(results['strategy'])
+                self.results['strategy_comparison'].extend(results['strategy_comparison'])
+            else:
+                self.results = results
 
             if self.config.get('export', 'none') == 'trades':
                 store_backtest_stats(self.config['exportfilename'], self.results)
 
+        # Results may be mixed up now. Sort them so they follow --strategy-list order.
+        if 'strategy_list' in self.config and len(self.results) > 0:
+            self.results['strategy_comparison'] = sorted(
+                self.results['strategy_comparison'],
+                key=lambda c: self.config['strategy_list'].index(c['key']))
+            self.results['strategy'] = dict(
+                sorted(self.results['strategy'].items(),
+                       key=lambda kv: self.config['strategy_list'].index(kv[0])))
+
+        if len(self.strategylist) > 0:
             # Show backtest results
             show_backtest_results(self.config, self.results)
diff --git a/freqtrade/optimize/optimize_reports.py b/freqtrade/optimize/optimize_reports.py
index d0ffe49a9..46930d7b1 100644
--- a/freqtrade/optimize/optimize_reports.py
+++ b/freqtrade/optimize/optimize_reports.py
@@ -11,7 +11,8 @@ from tabulate import tabulate
 from freqtrade.constants import DATETIME_PRINT_FORMAT, LAST_BT_RESULT_FN, UNLIMITED_STAKE_AMOUNT
 from freqtrade.data.btanalysis import (calculate_csum, calculate_market_change,
                                        calculate_max_drawdown)
-from freqtrade.misc import decimals_per_coin, file_dump_json, round_coin_value
+from freqtrade.misc import (decimals_per_coin, file_dump_json, get_backtest_metadata_filename,
+                            round_coin_value)
 
 
 logger = logging.getLogger(__name__)
@@ -33,6 +34,11 @@ def store_backtest_stats(recordfilename: Path, stats: Dict[str, DataFrame]) -> N
             recordfilename.parent,
             f'{recordfilename.stem}-{datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}'
         ).with_suffix(recordfilename.suffix)
+
+    # Store metadata separately.
+    file_dump_json(get_backtest_metadata_filename(filename), stats['metadata'])
+    del stats['metadata']
+
     file_dump_json(filename, stats)
 
     latest_filename = Path.joinpath(filename.parent, LAST_BT_RESULT_FN)
@@ -509,16 +515,25 @@ def generate_backtest_stats(btdata: Dict[str, DataFrame],
     :param max_date: Backtest end date
     :return: Dictionary containing results per strategy and a strategy summary.
     """
-    result: Dict[str, Any] = {'strategy': {}}
+    result: Dict[str, Any] = {
+        'metadata': {},
+        'strategy': {},
+        'strategy_comparison': [],
+    }
     market_change = calculate_market_change(btdata, 'close')
+    metadata = {}
     pairlist = list(btdata.keys())
     for strategy, content in all_results.items():
         strat_stats = generate_strategy_stats(pairlist, strategy, content,
                                               min_date, max_date, market_change=market_change)
+        metadata[strategy] = {
+            'run_id': content['run_id']
+        }
         result['strategy'][strategy] = strat_stats
 
     strategy_results = generate_strategy_comparison(bt_stats=result['strategy'])
 
+    result['metadata'] = metadata
     result['strategy_comparison'] = strategy_results
 
     return result
diff --git a/tests/optimize/test_optimize_reports.py b/tests/optimize/test_optimize_reports.py
index ed939d6b0..68257f4d8 100644
--- a/tests/optimize/test_optimize_reports.py
+++ b/tests/optimize/test_optimize_reports.py
@@ -84,6 +84,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir):
         'rejected_signals': 20,
         'backtest_start_time': Arrow.utcnow().int_timestamp,
         'backtest_end_time': Arrow.utcnow().int_timestamp,
+        'run_id': '123',
         }
         }
     timerange = TimeRange.parse_timerange('1510688220-1510700340')
@@ -132,6 +133,7 @@ def test_generate_backtest_stats(default_conf, testdatadir, tmpdir):
         'rejected_signals': 20,
         'backtest_start_time': Arrow.utcnow().int_timestamp,
         'backtest_end_time': Arrow.utcnow().int_timestamp,
+        'run_id': '124',
         }
     }
 
@@ -178,16 +180,16 @@ def test_store_backtest_stats(testdatadir, mocker):
 
     dump_mock = mocker.patch('freqtrade.optimize.optimize_reports.file_dump_json')
 
-    store_backtest_stats(testdatadir, {})
+    store_backtest_stats(testdatadir, {'metadata': {}})
 
-    assert dump_mock.call_count == 2
+    assert dump_mock.call_count == 3
     assert isinstance(dump_mock.call_args_list[0][0][0], Path)
     assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir/'backtest-result'))
 
     dump_mock.reset_mock()
     filename = testdatadir / 'testresult.json'
-    store_backtest_stats(filename, {})
-    assert dump_mock.call_count == 2
+    store_backtest_stats(filename, {'metadata': {}})
+    assert dump_mock.call_count == 3
     assert isinstance(dump_mock.call_args_list[0][0][0], Path)
     # result will be testdatadir / testresult-<timestamp>.json
     assert str(dump_mock.call_args_list[0][0][0]).startswith(str(testdatadir / 'testresult'))