Add validate_backtest_data function

This commit is contained in:
Matthias 2018-10-18 19:42:54 +02:00
parent d7459bbbf3
commit fb52d32296
3 changed files with 65 additions and 2 deletions

View File

@ -10,6 +10,7 @@ except ImportError:
_UJSON = False _UJSON = False
import logging import logging
import os import os
from datetime import datetime
from typing import Optional, List, Dict, Tuple, Any from typing import Optional, List, Dict, Tuple, Any
import operator import operator
@ -76,6 +77,24 @@ def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]
max(timeframe, key=operator.itemgetter(1))[1] max(timeframe, key=operator.itemgetter(1))[1]
def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime,
max_date: datetime, ticker_interval_mins: int) -> None:
"""
Validates preprocessed backtesting data for missing values and shows warnings about it that.
:param data: dictionary with preprocessed backtesting data
:param min_date: start-date of the data
:param max_date: end-date of the data
:param ticker_interval_mins: ticker interval in minutes
"""
# total difference in minutes / interval-minutes
expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins)
for pair, df in data.items():
if len(df) < expected_frames:
logger.warning('%s has missing frames: expected %s, got %s',
pair, expected_frames, len(df))
def load_tickerdata_file( def load_tickerdata_file(
datadir: str, pair: str, datadir: str, pair: str,
ticker_interval: str, ticker_interval: str,

View File

@ -356,8 +356,10 @@ class Backtesting(object):
# need to reprocess data every time to populate signals # need to reprocess data every time to populate signals
preprocessed = self.strategy.tickerdata_to_dataframe(data) preprocessed = self.strategy.tickerdata_to_dataframe(data)
# Print timeframe
min_date, max_date = optimize.get_timeframe(preprocessed) min_date, max_date = optimize.get_timeframe(preprocessed)
# Validate dataframe for missing values
optimize.validate_backtest_data(preprocessed, min_date, max_date,
constants.TICKER_INTERVAL_MINUTES[self.ticker_interval])
logger.info( logger.info(
'Measuring data from %s up to %s (%s days)..', 'Measuring data from %s up to %s (%s days)..',
min_date.isoformat(), min_date.isoformat(),

View File

@ -7,7 +7,7 @@ from shutil import copyfile
import arrow import arrow
from freqtrade import optimize from freqtrade import optimize, constants
from freqtrade.arguments import TimeRange from freqtrade.arguments import TimeRange
from freqtrade.misc import file_dump_json from freqtrade.misc import file_dump_json
from freqtrade.optimize.__init__ import (download_backtesting_testdata, from freqtrade.optimize.__init__ import (download_backtesting_testdata,
@ -450,3 +450,45 @@ def test_get_timeframe(default_conf, mocker) -> None:
min_date, max_date = optimize.get_timeframe(data) min_date, max_date = optimize.get_timeframe(data)
assert min_date.isoformat() == '2017-11-04T23:02:00+00:00' assert min_date.isoformat() == '2017-11-04T23:02:00+00:00'
assert max_date.isoformat() == '2017-11-14T22:58:00+00:00' assert max_date.isoformat() == '2017-11-14T22:58:00+00:00'
def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None:
patch_exchange(mocker)
strategy = DefaultStrategy(default_conf)
data = strategy.tickerdata_to_dataframe(
optimize.load_data(
None,
ticker_interval='1m',
pairs=['UNITTEST/BTC']
)
)
min_date, max_date = optimize.get_timeframe(data)
caplog.clear()
optimize.validate_backtest_data(data, min_date, max_date,
constants.TICKER_INTERVAL_MINUTES["1m"])
assert len(caplog.record_tuples) == 1
assert log_has('UNITTEST/BTC has missing frames: expected 14396, got 13680',
caplog.record_tuples)
def test_validate_backtest_data(default_conf, mocker, caplog) -> None:
patch_exchange(mocker)
strategy = DefaultStrategy(default_conf)
timerange = TimeRange('index', 'index', 200, 250)
data = strategy.tickerdata_to_dataframe(
optimize.load_data(
None,
ticker_interval='5m',
pairs=['UNITTEST/BTC'],
timerange=timerange
)
)
min_date, max_date = optimize.get_timeframe(data)
caplog.clear()
optimize.validate_backtest_data(data, min_date, max_date,
constants.TICKER_INTERVAL_MINUTES["5m"])
assert len(caplog.record_tuples) == 0