Merge pull request #1287 from freqtrade/backtest_data_validation
Backtest data validation
This commit is contained in:
@@ -10,8 +10,12 @@ except ImportError:
|
||||
_UJSON = False
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Tuple, Any
|
||||
import operator
|
||||
|
||||
import arrow
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade import misc, constants, OperationalException
|
||||
from freqtrade.exchange import Exchange
|
||||
@@ -59,6 +63,42 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
|
||||
return tickerlist[start_index:stop_index]
|
||||
|
||||
|
||||
def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
|
||||
"""
|
||||
Get the maximum timeframe for the given backtest data
|
||||
:param data: dictionary with preprocessed backtesting data
|
||||
:return: tuple containing min_date, max_date
|
||||
"""
|
||||
timeframe = [
|
||||
(arrow.get(frame['date'].min()), arrow.get(frame['date'].max()))
|
||||
for frame in data.values()
|
||||
]
|
||||
return min(timeframe, key=operator.itemgetter(0))[0], \
|
||||
max(timeframe, key=operator.itemgetter(1))[1]
|
||||
|
||||
|
||||
def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime,
|
||||
max_date: datetime, ticker_interval_mins: int) -> bool:
|
||||
"""
|
||||
Validates preprocessed backtesting data for missing values and shows warnings about it that.
|
||||
|
||||
:param data: dictionary with preprocessed backtesting data
|
||||
:param min_date: start-date of the data
|
||||
:param max_date: end-date of the data
|
||||
:param ticker_interval_mins: ticker interval in minutes
|
||||
"""
|
||||
# total difference in minutes / interval-minutes
|
||||
expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins)
|
||||
found_missing = False
|
||||
for pair, df in data.items():
|
||||
dflen = len(df)
|
||||
if dflen < expected_frames:
|
||||
found_missing = True
|
||||
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
|
||||
pair, expected_frames, dflen, expected_frames - dflen)
|
||||
return found_missing
|
||||
|
||||
|
||||
def load_tickerdata_file(
|
||||
datadir: str, pair: str,
|
||||
ticker_interval: str,
|
||||
|
@@ -4,14 +4,12 @@
|
||||
This module contains the backtesting logic
|
||||
"""
|
||||
import logging
|
||||
import operator
|
||||
from argparse import Namespace
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
|
||||
from typing import Any, Dict, List, NamedTuple, Optional
|
||||
|
||||
import arrow
|
||||
from pandas import DataFrame
|
||||
from tabulate import tabulate
|
||||
|
||||
@@ -88,24 +86,9 @@ class Backtesting(object):
|
||||
"""
|
||||
self.strategy = strategy
|
||||
self.ticker_interval = self.config.get('ticker_interval')
|
||||
self.tickerdata_to_dataframe = strategy.tickerdata_to_dataframe
|
||||
self.advise_buy = strategy.advise_buy
|
||||
self.advise_sell = strategy.advise_sell
|
||||
|
||||
@staticmethod
|
||||
def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
|
||||
"""
|
||||
Get the maximum timeframe for the given backtest data
|
||||
:param data: dictionary with preprocessed backtesting data
|
||||
:return: tuple containing min_date, max_date
|
||||
"""
|
||||
timeframe = [
|
||||
(arrow.get(frame['date'].min()), arrow.get(frame['date'].max()))
|
||||
for frame in data.values()
|
||||
]
|
||||
return min(timeframe, key=operator.itemgetter(0))[0], \
|
||||
max(timeframe, key=operator.itemgetter(1))[1]
|
||||
|
||||
def _generate_text_table(self, data: Dict[str, Dict], results: DataFrame,
|
||||
skip_nan: bool = False) -> str:
|
||||
"""
|
||||
@@ -371,10 +354,12 @@ class Backtesting(object):
|
||||
self._set_strategy(strat)
|
||||
|
||||
# need to reprocess data every time to populate signals
|
||||
preprocessed = self.tickerdata_to_dataframe(data)
|
||||
preprocessed = self.strategy.tickerdata_to_dataframe(data)
|
||||
|
||||
# Print timeframe
|
||||
min_date, max_date = self.get_timeframe(preprocessed)
|
||||
min_date, max_date = optimize.get_timeframe(preprocessed)
|
||||
# Validate dataframe for missing values
|
||||
optimize.validate_backtest_data(preprocessed, min_date, max_date,
|
||||
constants.TICKER_INTERVAL_MINUTES[self.ticker_interval])
|
||||
logger.info(
|
||||
'Measuring data from %s up to %s (%s days)..',
|
||||
min_date.isoformat(),
|
||||
|
@@ -352,7 +352,7 @@ class Hyperopt(Backtesting):
|
||||
|
||||
if self.has_space('buy'):
|
||||
self.strategy.advise_indicators = Hyperopt.populate_indicators # type: ignore
|
||||
dump(self.tickerdata_to_dataframe(data), TICKERDATA_PICKLE)
|
||||
dump(self.strategy.tickerdata_to_dataframe(data), TICKERDATA_PICKLE)
|
||||
self.exchange = None # type: ignore
|
||||
self.load_previous_results()
|
||||
|
||||
|
@@ -89,7 +89,7 @@ def simple_backtest(config, contour, num_results, mocker) -> None:
|
||||
backtesting = Backtesting(config)
|
||||
|
||||
data = load_data_test(contour)
|
||||
processed = backtesting.tickerdata_to_dataframe(data)
|
||||
processed = backtesting.strategy.tickerdata_to_dataframe(data)
|
||||
assert isinstance(processed, dict)
|
||||
results = backtesting.backtest(
|
||||
{
|
||||
@@ -119,13 +119,13 @@ def _load_pair_as_ticks(pair, tickfreq):
|
||||
|
||||
# FIX: fixturize this?
|
||||
def _make_backtest_conf(mocker, conf=None, pair='UNITTEST/BTC', record=None):
|
||||
data = optimize.load_data(None, ticker_interval='8m', pairs=[pair])
|
||||
data = optimize.load_data(None, ticker_interval='1m', pairs=[pair])
|
||||
data = trim_dictlist(data, -201)
|
||||
patch_exchange(mocker)
|
||||
backtesting = Backtesting(conf)
|
||||
return {
|
||||
'stake_amount': conf['stake_amount'],
|
||||
'processed': backtesting.tickerdata_to_dataframe(data),
|
||||
'processed': backtesting.strategy.tickerdata_to_dataframe(data),
|
||||
'max_open_trades': 10,
|
||||
'position_stacking': False,
|
||||
'record': record
|
||||
@@ -313,7 +313,7 @@ def test_backtesting_init(mocker, default_conf) -> None:
|
||||
backtesting = Backtesting(default_conf)
|
||||
assert backtesting.config == default_conf
|
||||
assert backtesting.ticker_interval == '5m'
|
||||
assert callable(backtesting.tickerdata_to_dataframe)
|
||||
assert callable(backtesting.strategy.tickerdata_to_dataframe)
|
||||
assert callable(backtesting.advise_buy)
|
||||
assert callable(backtesting.advise_sell)
|
||||
get_fee.assert_called()
|
||||
@@ -327,7 +327,7 @@ def test_tickerdata_to_dataframe(default_conf, mocker) -> None:
|
||||
tickerlist = {'UNITTEST/BTC': tick}
|
||||
|
||||
backtesting = Backtesting(default_conf)
|
||||
data = backtesting.tickerdata_to_dataframe(tickerlist)
|
||||
data = backtesting.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
assert len(data['UNITTEST/BTC']) == 99
|
||||
|
||||
# Load strategy to compare the result between Backtesting function and strategy are the same
|
||||
@@ -336,22 +336,6 @@ def test_tickerdata_to_dataframe(default_conf, mocker) -> None:
|
||||
assert data['UNITTEST/BTC'].equals(data2['UNITTEST/BTC'])
|
||||
|
||||
|
||||
def test_get_timeframe(default_conf, mocker) -> None:
|
||||
patch_exchange(mocker)
|
||||
backtesting = Backtesting(default_conf)
|
||||
|
||||
data = backtesting.tickerdata_to_dataframe(
|
||||
optimize.load_data(
|
||||
None,
|
||||
ticker_interval='1m',
|
||||
pairs=['UNITTEST/BTC']
|
||||
)
|
||||
)
|
||||
min_date, max_date = backtesting.get_timeframe(data)
|
||||
assert min_date.isoformat() == '2017-11-04T23:02:00+00:00'
|
||||
assert max_date.isoformat() == '2017-11-14T22:58:00+00:00'
|
||||
|
||||
|
||||
def test_generate_text_table(default_conf, mocker):
|
||||
patch_exchange(mocker)
|
||||
backtesting = Backtesting(default_conf)
|
||||
@@ -451,21 +435,21 @@ def test_generate_text_table_strategyn(default_conf, mocker):
|
||||
|
||||
|
||||
def test_backtesting_start(default_conf, mocker, caplog) -> None:
|
||||
def get_timeframe(input1, input2):
|
||||
def get_timeframe(input1):
|
||||
return Arrow(2017, 11, 14, 21, 17), Arrow(2017, 11, 14, 22, 59)
|
||||
|
||||
mocker.patch('freqtrade.optimize.load_data', mocked_load_data)
|
||||
mocker.patch('freqtrade.optimize.get_timeframe', get_timeframe)
|
||||
mocker.patch('freqtrade.exchange.Exchange.refresh_tickers', MagicMock())
|
||||
patch_exchange(mocker)
|
||||
mocker.patch.multiple(
|
||||
'freqtrade.optimize.backtesting.Backtesting',
|
||||
backtest=MagicMock(),
|
||||
_generate_text_table=MagicMock(return_value='1'),
|
||||
get_timeframe=get_timeframe,
|
||||
)
|
||||
|
||||
default_conf['exchange']['pair_whitelist'] = ['UNITTEST/BTC']
|
||||
default_conf['ticker_interval'] = 1
|
||||
default_conf['ticker_interval'] = "1m"
|
||||
default_conf['live'] = False
|
||||
default_conf['datadir'] = None
|
||||
default_conf['export'] = None
|
||||
@@ -486,17 +470,17 @@ def test_backtesting_start(default_conf, mocker, caplog) -> None:
|
||||
|
||||
|
||||
def test_backtesting_start_no_data(default_conf, mocker, caplog) -> None:
|
||||
def get_timeframe(input1, input2):
|
||||
def get_timeframe(input1):
|
||||
return Arrow(2017, 11, 14, 21, 17), Arrow(2017, 11, 14, 22, 59)
|
||||
|
||||
mocker.patch('freqtrade.optimize.load_data', MagicMock(return_value={}))
|
||||
mocker.patch('freqtrade.optimize.get_timeframe', get_timeframe)
|
||||
mocker.patch('freqtrade.exchange.Exchange.refresh_tickers', MagicMock())
|
||||
patch_exchange(mocker)
|
||||
mocker.patch.multiple(
|
||||
'freqtrade.optimize.backtesting.Backtesting',
|
||||
backtest=MagicMock(),
|
||||
_generate_text_table=MagicMock(return_value='1'),
|
||||
get_timeframe=get_timeframe,
|
||||
)
|
||||
|
||||
default_conf['exchange']['pair_whitelist'] = ['UNITTEST/BTC']
|
||||
@@ -520,7 +504,7 @@ def test_backtest(default_conf, fee, mocker) -> None:
|
||||
pair = 'UNITTEST/BTC'
|
||||
data = optimize.load_data(None, ticker_interval='5m', pairs=['UNITTEST/BTC'])
|
||||
data = trim_dictlist(data, -200)
|
||||
data_processed = backtesting.tickerdata_to_dataframe(data)
|
||||
data_processed = backtesting.strategy.tickerdata_to_dataframe(data)
|
||||
results = backtesting.backtest(
|
||||
{
|
||||
'stake_amount': default_conf['stake_amount'],
|
||||
@@ -571,7 +555,7 @@ def test_backtest_1min_ticker_interval(default_conf, fee, mocker) -> None:
|
||||
results = backtesting.backtest(
|
||||
{
|
||||
'stake_amount': default_conf['stake_amount'],
|
||||
'processed': backtesting.tickerdata_to_dataframe(data),
|
||||
'processed': backtesting.strategy.tickerdata_to_dataframe(data),
|
||||
'max_open_trades': 1,
|
||||
'position_stacking': False
|
||||
}
|
||||
@@ -585,7 +569,7 @@ def test_processed(default_conf, mocker) -> None:
|
||||
backtesting = Backtesting(default_conf)
|
||||
|
||||
dict_of_tickerrows = load_data_test('raise')
|
||||
dataframes = backtesting.tickerdata_to_dataframe(dict_of_tickerrows)
|
||||
dataframes = backtesting.strategy.tickerdata_to_dataframe(dict_of_tickerrows)
|
||||
dataframe = dataframes['UNITTEST/BTC']
|
||||
cols = dataframe.columns
|
||||
# assert the dataframe got some of the indicator columns
|
||||
|
@@ -194,7 +194,7 @@ def test_start_calls_optimizer(mocker, default_conf, caplog) -> None:
|
||||
default_conf.update({'spaces': 'all'})
|
||||
|
||||
hyperopt = Hyperopt(default_conf)
|
||||
hyperopt.tickerdata_to_dataframe = MagicMock()
|
||||
hyperopt.strategy.tickerdata_to_dataframe = MagicMock()
|
||||
|
||||
hyperopt.start()
|
||||
parallel.assert_called_once()
|
||||
@@ -242,7 +242,7 @@ def test_has_space(hyperopt):
|
||||
def test_populate_indicators(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': tick}
|
||||
dataframes = hyperopt.tickerdata_to_dataframe(tickerlist)
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.populate_indicators(dataframes['UNITTEST/BTC'], {'pair': 'UNITTEST/BTC'})
|
||||
|
||||
# Check if some indicators are generated. We will not test all of them
|
||||
@@ -254,7 +254,7 @@ def test_populate_indicators(hyperopt) -> None:
|
||||
def test_buy_strategy_generator(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': tick}
|
||||
dataframes = hyperopt.tickerdata_to_dataframe(tickerlist)
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.populate_indicators(dataframes['UNITTEST/BTC'], {'pair': 'UNITTEST/BTC'})
|
||||
|
||||
populate_buy_trend = hyperopt.buy_strategy_generator(
|
||||
|
@@ -7,7 +7,7 @@ from shutil import copyfile
|
||||
|
||||
import arrow
|
||||
|
||||
from freqtrade import optimize
|
||||
from freqtrade import optimize, constants
|
||||
from freqtrade.arguments import TimeRange
|
||||
from freqtrade.misc import file_dump_json
|
||||
from freqtrade.optimize.__init__ import (download_backtesting_testdata,
|
||||
@@ -15,7 +15,8 @@ from freqtrade.optimize.__init__ import (download_backtesting_testdata,
|
||||
load_cached_data_for_updating,
|
||||
load_tickerdata_file,
|
||||
make_testdata_path, trim_tickerlist)
|
||||
from freqtrade.tests.conftest import get_patched_exchange, log_has
|
||||
from freqtrade.strategy.default_strategy import DefaultStrategy
|
||||
from freqtrade.tests.conftest import get_patched_exchange, log_has, patch_exchange
|
||||
|
||||
# Change this if modifying UNITTEST/BTC testdatafile
|
||||
_BTC_UNITTEST_LENGTH = 13681
|
||||
@@ -433,3 +434,61 @@ def test_file_dump_json() -> None:
|
||||
|
||||
# Remove the file
|
||||
_clean_test_file(file)
|
||||
|
||||
|
||||
def test_get_timeframe(default_conf, mocker) -> None:
|
||||
patch_exchange(mocker)
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
|
||||
data = strategy.tickerdata_to_dataframe(
|
||||
optimize.load_data(
|
||||
None,
|
||||
ticker_interval='1m',
|
||||
pairs=['UNITTEST/BTC']
|
||||
)
|
||||
)
|
||||
min_date, max_date = optimize.get_timeframe(data)
|
||||
assert min_date.isoformat() == '2017-11-04T23:02:00+00:00'
|
||||
assert max_date.isoformat() == '2017-11-14T22:58:00+00:00'
|
||||
|
||||
|
||||
def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None:
|
||||
patch_exchange(mocker)
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
|
||||
data = strategy.tickerdata_to_dataframe(
|
||||
optimize.load_data(
|
||||
None,
|
||||
ticker_interval='1m',
|
||||
pairs=['UNITTEST/BTC']
|
||||
)
|
||||
)
|
||||
min_date, max_date = optimize.get_timeframe(data)
|
||||
caplog.clear()
|
||||
assert optimize.validate_backtest_data(data, min_date, max_date,
|
||||
constants.TICKER_INTERVAL_MINUTES["1m"])
|
||||
assert len(caplog.record_tuples) == 1
|
||||
assert log_has(
|
||||
"UNITTEST/BTC has missing frames: expected 14396, got 13680, that's 716 missing values",
|
||||
caplog.record_tuples)
|
||||
|
||||
|
||||
def test_validate_backtest_data(default_conf, mocker, caplog) -> None:
|
||||
patch_exchange(mocker)
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
|
||||
timerange = TimeRange('index', 'index', 200, 250)
|
||||
data = strategy.tickerdata_to_dataframe(
|
||||
optimize.load_data(
|
||||
None,
|
||||
ticker_interval='5m',
|
||||
pairs=['UNITTEST/BTC'],
|
||||
timerange=timerange
|
||||
)
|
||||
)
|
||||
|
||||
min_date, max_date = optimize.get_timeframe(data)
|
||||
caplog.clear()
|
||||
assert not optimize.validate_backtest_data(data, min_date, max_date,
|
||||
constants.TICKER_INTERVAL_MINUTES["5m"])
|
||||
assert len(caplog.record_tuples) == 0
|
||||
|
Reference in New Issue
Block a user