Merge pull request #1936 from freqtrade/fix/validate_dataframe
Properly warn if data is incomplete
This commit is contained in:
commit
1f8dc7f845
@ -112,7 +112,7 @@ pair = "XLM/BTC" # Make sure to use a pair that exists on that exchange!
|
||||
raw = ct.fetch_ohlcv(pair, timeframe=timeframe)
|
||||
|
||||
# convert to dataframe
|
||||
df1 = parse_ticker_dataframe(raw, timeframe, drop_incomplete=False)
|
||||
df1 = parse_ticker_dataframe(raw, timeframe, pair=pair, drop_incomplete=False)
|
||||
|
||||
print(df1["date"].tail(1))
|
||||
print(datetime.utcnow())
|
||||
|
@ -10,13 +10,14 @@ from pandas import DataFrame, to_datetime
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_ticker_dataframe(ticker: list, ticker_interval: str, *,
|
||||
def parse_ticker_dataframe(ticker: list, ticker_interval: str, pair: str, *,
|
||||
fill_missing: bool = True,
|
||||
drop_incomplete: bool = True) -> DataFrame:
|
||||
"""
|
||||
Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe
|
||||
:param ticker: ticker list, as returned by exchange.async_get_candle_history
|
||||
:param ticker_interval: ticker_interval (e.g. 5m). Used to fill up eventual missing data
|
||||
:param pair: Pair this data is for (used to warn if fillup was necessary)
|
||||
:param fill_missing: fill up missing candles with 0 candles
|
||||
(see ohlcv_fill_up_missing_data for details)
|
||||
:param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
|
||||
@ -51,12 +52,12 @@ def parse_ticker_dataframe(ticker: list, ticker_interval: str, *,
|
||||
logger.debug('Dropping last candle')
|
||||
|
||||
if fill_missing:
|
||||
return ohlcv_fill_up_missing_data(frame, ticker_interval)
|
||||
return ohlcv_fill_up_missing_data(frame, ticker_interval, pair)
|
||||
else:
|
||||
return frame
|
||||
|
||||
|
||||
def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> DataFrame:
|
||||
def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str, pair: str) -> DataFrame:
|
||||
"""
|
||||
Fills up missing data with 0 volume rows,
|
||||
using the previous close as price for "open", "high" "low" and "close", volume is set to 0
|
||||
@ -84,7 +85,10 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> Da
|
||||
'low': df['close'],
|
||||
})
|
||||
df.reset_index(inplace=True)
|
||||
logger.debug(f"Missing data fillup: before: {len(dataframe)} - after: {len(df)}")
|
||||
len_before = len(dataframe)
|
||||
len_after = len(df)
|
||||
if len_before != len_after:
|
||||
logger.info(f"Missing data fillup for {pair}: before: {len_before} - after: {len_after}")
|
||||
return df
|
||||
|
||||
|
||||
|
@ -116,7 +116,7 @@ def load_pair_history(pair: str,
|
||||
logger.warning('Missing data at end for pair %s, data ends at %s',
|
||||
pair,
|
||||
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
|
||||
return parse_ticker_dataframe(pairdata, ticker_interval,
|
||||
return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair,
|
||||
fill_missing=fill_up_missing,
|
||||
drop_incomplete=drop_incomplete)
|
||||
else:
|
||||
@ -286,12 +286,13 @@ def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]
|
||||
max(timeframe, key=operator.itemgetter(1))[1]
|
||||
|
||||
|
||||
def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime,
|
||||
def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime,
|
||||
max_date: datetime, ticker_interval_mins: int) -> bool:
|
||||
"""
|
||||
Validates preprocessed backtesting data for missing values and shows warnings about it that.
|
||||
|
||||
:param data: dictionary with preprocessed backtesting data
|
||||
:param data: preprocessed backtesting data (as DataFrame)
|
||||
:param pair: pair used for log output.
|
||||
:param min_date: start-date of the data
|
||||
:param max_date: end-date of the data
|
||||
:param ticker_interval_mins: ticker interval in minutes
|
||||
@ -299,10 +300,9 @@ def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime,
|
||||
# total difference in minutes / interval-minutes
|
||||
expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins)
|
||||
found_missing = False
|
||||
for pair, df in data.items():
|
||||
dflen = len(df)
|
||||
if dflen < expected_frames:
|
||||
found_missing = True
|
||||
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
|
||||
pair, expected_frames, dflen, expected_frames - dflen)
|
||||
dflen = len(data)
|
||||
if dflen < expected_frames:
|
||||
found_missing = True
|
||||
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
|
||||
pair, expected_frames, dflen, expected_frames - dflen)
|
||||
return found_missing
|
||||
|
@ -581,7 +581,7 @@ class Exchange(object):
|
||||
self._pairs_last_refresh_time[(pair, ticker_interval)] = ticks[-1][0] // 1000
|
||||
# keeping parsed dataframe in cache
|
||||
self._klines[(pair, ticker_interval)] = parse_ticker_dataframe(
|
||||
ticks, ticker_interval, fill_missing=True,
|
||||
ticks, ticker_interval, pair=pair, fill_missing=True,
|
||||
drop_incomplete=self._ohlcv_partial_candle)
|
||||
return tickers
|
||||
|
||||
|
@ -349,7 +349,7 @@ class Backtesting(object):
|
||||
row = ticker[pair][indexes[pair]]
|
||||
except IndexError:
|
||||
# missing Data for one pair at the end.
|
||||
# Warnings for this are shown by `validate_backtest_data`
|
||||
# Warnings for this are shown during data loading
|
||||
continue
|
||||
|
||||
# Waits until the time-counter reaches the start of the data for this pair.
|
||||
@ -420,20 +420,19 @@ class Backtesting(object):
|
||||
max_open_trades = 0
|
||||
all_results = {}
|
||||
|
||||
min_date, max_date = history.get_timeframe(data)
|
||||
|
||||
logger.info(
|
||||
'Backtesting with data from %s up to %s (%s days)..',
|
||||
min_date.isoformat(),
|
||||
max_date.isoformat(),
|
||||
(max_date - min_date).days
|
||||
)
|
||||
|
||||
for strat in self.strategylist:
|
||||
logger.info("Running backtesting for Strategy %s", strat.get_strategy_name())
|
||||
self._set_strategy(strat)
|
||||
|
||||
min_date, max_date = history.get_timeframe(data)
|
||||
# Validate dataframe for missing values (mainly at start and end, as fillup is called)
|
||||
history.validate_backtest_data(data, min_date, max_date,
|
||||
timeframe_to_minutes(self.ticker_interval))
|
||||
logger.info(
|
||||
'Backtesting with data from %s up to %s (%s days)..',
|
||||
min_date.isoformat(),
|
||||
max_date.isoformat(),
|
||||
(max_date - min_date).days
|
||||
)
|
||||
# need to reprocess data every time to populate signals
|
||||
preprocessed = self.strategy.tickerdata_to_dataframe(data)
|
||||
|
||||
|
@ -19,8 +19,7 @@ from skopt import Optimizer
|
||||
from skopt.space import Dimension
|
||||
|
||||
from freqtrade.arguments import Arguments
|
||||
from freqtrade.data.history import load_data, get_timeframe, validate_backtest_data
|
||||
from freqtrade.exchange import timeframe_to_minutes
|
||||
from freqtrade.data.history import load_data, get_timeframe
|
||||
from freqtrade.optimize.backtesting import Backtesting
|
||||
from freqtrade.resolvers.hyperopt_resolver import HyperOptResolver
|
||||
|
||||
@ -281,9 +280,7 @@ class Hyperopt(Backtesting):
|
||||
return
|
||||
|
||||
min_date, max_date = get_timeframe(data)
|
||||
# Validate dataframe for missing values (mainly at start and end, as fillup is called)
|
||||
validate_backtest_data(data, min_date, max_date,
|
||||
timeframe_to_minutes(self.ticker_interval))
|
||||
|
||||
logger.info(
|
||||
'Hyperopting with data from %s up to %s (%s days)..',
|
||||
min_date.isoformat(),
|
||||
|
@ -674,7 +674,7 @@ def ticker_history_list():
|
||||
|
||||
@pytest.fixture
|
||||
def ticker_history(ticker_history_list):
|
||||
return parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True)
|
||||
return parse_ticker_dataframe(ticker_history_list, "5m", pair="UNITTEST/BTC", fill_missing=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -879,7 +879,8 @@ def tickers():
|
||||
@pytest.fixture
|
||||
def result():
|
||||
with Path('freqtrade/tests/testdata/UNITTEST_BTC-1m.json').open('r') as data_file:
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True)
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)
|
||||
|
||||
# FIX:
|
||||
# Create an fixture/function
|
||||
|
@ -15,7 +15,8 @@ def test_parse_ticker_dataframe(ticker_history_list, caplog):
|
||||
|
||||
caplog.set_level(logging.DEBUG)
|
||||
# Test file with BV data
|
||||
dataframe = parse_ticker_dataframe(ticker_history_list, '5m', fill_missing=True)
|
||||
dataframe = parse_ticker_dataframe(ticker_history_list, '5m',
|
||||
pair="UNITTEST/BTC", fill_missing=True)
|
||||
assert dataframe.columns.tolist() == columns
|
||||
assert log_has('Parsing tickerlist to dataframe', caplog.record_tuples)
|
||||
|
||||
@ -27,18 +28,19 @@ def test_ohlcv_fill_up_missing_data(caplog):
|
||||
pair='UNITTEST/BTC',
|
||||
fill_up_missing=False)
|
||||
caplog.set_level(logging.DEBUG)
|
||||
data2 = ohlcv_fill_up_missing_data(data, '1m')
|
||||
data2 = ohlcv_fill_up_missing_data(data, '1m', 'UNITTEST/BTC')
|
||||
assert len(data2) > len(data)
|
||||
# Column names should not change
|
||||
assert (data.columns == data2.columns).all()
|
||||
|
||||
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
|
||||
assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
|
||||
f"{len(data)} - after: {len(data2)}",
|
||||
caplog.record_tuples)
|
||||
|
||||
# Test fillup actually fixes invalid backtest data
|
||||
min_date, max_date = get_timeframe({'UNITTEST/BTC': data})
|
||||
assert validate_backtest_data({'UNITTEST/BTC': data}, min_date, max_date, 1)
|
||||
assert not validate_backtest_data({'UNITTEST/BTC': data2}, min_date, max_date, 1)
|
||||
assert validate_backtest_data(data, 'UNITTEST/BTC', min_date, max_date, 1)
|
||||
assert not validate_backtest_data(data2, 'UNITTEST/BTC', min_date, max_date, 1)
|
||||
|
||||
|
||||
def test_ohlcv_fill_up_missing_data2(caplog):
|
||||
@ -78,10 +80,10 @@ def test_ohlcv_fill_up_missing_data2(caplog):
|
||||
]
|
||||
|
||||
# Generate test-data without filling missing
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False)
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC", fill_missing=False)
|
||||
assert len(data) == 3
|
||||
caplog.set_level(logging.DEBUG)
|
||||
data2 = ohlcv_fill_up_missing_data(data, ticker_interval)
|
||||
data2 = ohlcv_fill_up_missing_data(data, ticker_interval, "UNITTEST/BTC")
|
||||
assert len(data2) == 4
|
||||
# 3rd candle has been filled
|
||||
row = data2.loc[2, :]
|
||||
@ -94,7 +96,8 @@ def test_ohlcv_fill_up_missing_data2(caplog):
|
||||
# Column names should not change
|
||||
assert (data.columns == data2.columns).all()
|
||||
|
||||
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
|
||||
assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
|
||||
f"{len(data)} - after: {len(data2)}",
|
||||
caplog.record_tuples)
|
||||
|
||||
|
||||
@ -134,12 +137,14 @@ def test_ohlcv_drop_incomplete(caplog):
|
||||
]
|
||||
]
|
||||
caplog.set_level(logging.DEBUG)
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False, drop_incomplete=False)
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC",
|
||||
fill_missing=False, drop_incomplete=False)
|
||||
assert len(data) == 4
|
||||
assert not log_has("Dropping last candle", caplog.record_tuples)
|
||||
|
||||
# Drop last candle
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False, drop_incomplete=True)
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC",
|
||||
fill_missing=False, drop_incomplete=True)
|
||||
assert len(data) == 3
|
||||
|
||||
assert log_has("Dropping last candle", caplog.record_tuples)
|
||||
|
@ -555,8 +555,8 @@ def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None:
|
||||
)
|
||||
min_date, max_date = history.get_timeframe(data)
|
||||
caplog.clear()
|
||||
assert history.validate_backtest_data(data, min_date, max_date,
|
||||
timeframe_to_minutes('1m'))
|
||||
assert history.validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC',
|
||||
min_date, max_date, timeframe_to_minutes('1m'))
|
||||
assert len(caplog.record_tuples) == 1
|
||||
assert log_has(
|
||||
"UNITTEST/BTC has missing frames: expected 14396, got 13680, that's 716 missing values",
|
||||
@ -579,6 +579,6 @@ def test_validate_backtest_data(default_conf, mocker, caplog) -> None:
|
||||
|
||||
min_date, max_date = history.get_timeframe(data)
|
||||
caplog.clear()
|
||||
assert not history.validate_backtest_data(data, min_date, max_date,
|
||||
timeframe_to_minutes('5m'))
|
||||
assert not history.validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC',
|
||||
min_date, max_date, timeframe_to_minutes('5m'))
|
||||
assert len(caplog.record_tuples) == 0
|
||||
|
@ -263,7 +263,7 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals
|
||||
hz = 0.1
|
||||
base = 0.001
|
||||
|
||||
ETHBTC = [
|
||||
NEOBTC = [
|
||||
[
|
||||
ticker_start_time.shift(minutes=(x * ticker_interval_in_minute)).timestamp * 1000,
|
||||
math.sin(x * hz) / 1000 + base,
|
||||
@ -285,8 +285,8 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals
|
||||
123.45
|
||||
] for x in range(0, 500)]
|
||||
|
||||
pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC, '1h', fill_missing=True),
|
||||
'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', fill_missing=True)}
|
||||
pairdata = {'NEO/BTC': parse_ticker_dataframe(NEOBTC, '1h', pair="NEO/BTC", fill_missing=True),
|
||||
'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', pair="LTC/BTC", fill_missing=True)}
|
||||
return pairdata
|
||||
|
||||
|
||||
|
@ -73,7 +73,8 @@ def load_data_test(what):
|
||||
pair[x][5] # Keep old volume
|
||||
] for x in range(0, datalen)
|
||||
]
|
||||
return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', fill_missing=True)}
|
||||
return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
|
||||
|
||||
def simple_backtest(config, contour, num_results, mocker) -> None:
|
||||
@ -102,7 +103,8 @@ def simple_backtest(config, contour, num_results, mocker) -> None:
|
||||
def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=False,
|
||||
timerange=None, exchange=None, live=False):
|
||||
tickerdata = history.load_tickerdata_file(datadir, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', fill_missing=True)}
|
||||
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
return pairdata
|
||||
|
||||
|
||||
@ -350,7 +352,8 @@ def test_tickerdata_to_dataframe_bt(default_conf, mocker) -> None:
|
||||
patch_exchange(mocker)
|
||||
timerange = TimeRange(None, 'line', 0, -100)
|
||||
tick = history.load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
|
||||
backtesting = Backtesting(default_conf)
|
||||
data = backtesting.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
|
@ -427,7 +427,8 @@ def test_has_space(hyperopt):
|
||||
|
||||
def test_populate_indicators(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
|
||||
{'pair': 'UNITTEST/BTC'})
|
||||
@ -440,7 +441,8 @@ def test_populate_indicators(hyperopt) -> None:
|
||||
|
||||
def test_buy_strategy_generator(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
|
||||
{'pair': 'UNITTEST/BTC'})
|
||||
|
@ -10,7 +10,8 @@ from freqtrade.strategy.default_strategy import DefaultStrategy
|
||||
@pytest.fixture
|
||||
def result():
|
||||
with open('freqtrade/tests/testdata/ETH_BTC-1m.json') as data_file:
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True)
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)
|
||||
|
||||
|
||||
def test_default_strategy_structure():
|
||||
|
@ -111,7 +111,8 @@ def test_tickerdata_to_dataframe(default_conf) -> None:
|
||||
|
||||
timerange = TimeRange(None, 'line', 0, -100)
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
data = strategy.tickerdata_to_dataframe(tickerlist)
|
||||
assert len(data['UNITTEST/BTC']) == 102 # partial candle was removed
|
||||
|
||||
|
@ -17,7 +17,8 @@ def test_shorten_date() -> None:
|
||||
|
||||
|
||||
def test_datesarray_to_datetimearray(ticker_history_list):
|
||||
dataframes = parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True)
|
||||
dataframes = parse_ticker_dataframe(ticker_history_list, "5m", pair="UNITTEST/BTC",
|
||||
fill_missing=True)
|
||||
dates = datesarray_to_datetimearray(dataframes['date'])
|
||||
|
||||
assert isinstance(dates[0], datetime.datetime)
|
||||
@ -34,7 +35,8 @@ def test_datesarray_to_datetimearray(ticker_history_list):
|
||||
def test_common_datearray(default_conf) -> None:
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", fill_missing=True)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", pair="UNITTEST/BTC",
|
||||
fill_missing=True)}
|
||||
dataframes = strategy.tickerdata_to_dataframe(tickerlist)
|
||||
|
||||
dates = common_datearray(dataframes)
|
||||
|
Loading…
Reference in New Issue
Block a user