Merge pull request #5391 from freqtrade/fix/get_analyzed_pair

Fix recently introduced lookahead bias in backtesting
2021-08-09 15:25:12 +02:00
parent 58e4255ae3 895b912c71
commit 8bb42a07ce
2 changed files with 17 additions and 5 deletions
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -466,6 +466,8 @@ class Backtesting:
            for i, pair in enumerate(data):
                row_index = indexes[pair]
                try:
+                    # Row is treated as "current incomplete candle".
+                    # Buy / sell signals are shifted by 1 to compensate for this.
                    row = data[pair][row_index]
                except IndexError:
                    # missing Data for one pair at the end.
@@ -476,8 +478,8 @@ class Backtesting:
                if row[DATE_IDX] > tmp:
                    continue

-                row_index += 1
                self.dataprovider._set_dataframe_max_index(row_index)
+                row_index += 1
                indexes[pair] = row_index

                # without positionstacking, we can only have one open trade per pair.
--- a/tests/optimize/test_backtesting.py
+++ b/tests/optimize/test_backtesting.py
@@ -1,6 +1,7 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument

 import random
+from datetime import timedelta
 from pathlib import Path
 from unittest.mock import MagicMock, PropertyMock

@@ -741,8 +742,13 @@ def test_backtest_alternate_buy_sell(default_conf, fee, mocker, testdatadir):
    # 100 buys signals
    results = result['results']
    assert len(results) == 100
-    # Cached data should be 200 (no change since required_startup is 0)
-    assert len(backtesting.dataprovider.get_analyzed_dataframe('UNITTEST/BTC', '1m')[0]) == 200
+    # Cached data should be 199 (missing 1 candle at the start)
+    analyzed_df = backtesting.dataprovider.get_analyzed_dataframe('UNITTEST/BTC', '1m')[0]
+    assert len(analyzed_df) == 199
+    # Expect last candle to be 1 below end date (as the last candle is assumed as "incomplete"
+    # during backtesting)
+    expected_last_candle_date = backtest_conf['end_date'] - timedelta(minutes=1)
+    assert analyzed_df.iloc[-1]['date'].to_pydatetime() == expected_last_candle_date

    # One trade was force-closed at the end
    assert len(results.loc[results['is_open']]) == 0
@@ -774,6 +780,7 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    data = trim_dictlist(data, -500)

    # Remove data for one pair from the beginning of the data
+    if tres > 0:
        data[pair] = data[pair][tres:].reset_index()
    default_conf['timeframe'] = '5m'

@@ -800,8 +807,11 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    assert len(evaluate_result_multi(results['results'], '5m', 3)) == 0

    # Cached data correctly removed amounts
-    removed_candles = len(data[pair]) - 1 - backtesting.strategy.startup_candle_count
+    offset = 2 if tres == 0 else 1
+    removed_candles = len(data[pair]) - offset - backtesting.strategy.startup_candle_count
    assert len(backtesting.dataprovider.get_analyzed_dataframe(pair, '5m')[0]) == removed_candles
+    assert len(backtesting.dataprovider.get_analyzed_dataframe(
+        'NXT/BTC', '5m')[0]) == len(data['NXT/BTC']) - 2 - backtesting.strategy.startup_candle_count

    backtest_conf = {
        'processed': processed,