Merge pull request #1454 from freqtrade/feat/interpolate_missing

interpolate missing candles
This commit is contained in:
Misagh 2019-01-04 22:33:53 +01:00 committed by GitHub
commit 26a77e193e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 157 additions and 40 deletions

View File

@ -5,13 +5,19 @@ import logging
import pandas as pd
from pandas import DataFrame, to_datetime
from freqtrade.constants import TICKER_INTERVAL_MINUTES
logger = logging.getLogger(__name__)
def parse_ticker_dataframe(ticker: list) -> DataFrame:
def parse_ticker_dataframe(ticker: list, ticker_interval: str,
fill_missing: bool = True) -> DataFrame:
"""
Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe
:param ticker: ticker list, as returned by exchange.async_get_candle_history
:param ticker_interval: ticker_interval (e.g. 5m). Used to fill up eventual missing data
:param fill_missing: fill up missing candles with 0 candles
(see ohlcv_fill_up_missing_data for details)
:return: DataFrame
"""
logger.debug("Parsing tickerlist to dataframe")
@ -33,7 +39,41 @@ def parse_ticker_dataframe(ticker: list) -> DataFrame:
})
frame.drop(frame.tail(1).index, inplace=True) # eliminate partial candle
logger.debug('Dropping last candle')
return frame
if fill_missing:
return ohlcv_fill_up_missing_data(frame, ticker_interval)
else:
return frame
def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> DataFrame:
"""
Fills up missing data with 0 volume rows,
using the previous close as price for "open", "high" "low" and "close", volume is set to 0
"""
ohlc_dict = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}
tick_mins = TICKER_INTERVAL_MINUTES[ticker_interval]
# Resample to create "NAN" values
df = dataframe.resample(f'{tick_mins}min', on='date').agg(ohlc_dict)
# Forwardfill close for missing columns
df['close'] = df['close'].fillna(method='ffill')
# Use close for "open, high, low"
df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
value={'open': df['close'],
'high': df['close'],
'low': df['close'],
})
df.reset_index(inplace=True)
logger.debug(f"Missing data fillup: before: {len(dataframe)} - after: {len(df)}")
return df
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:

View File

@ -82,6 +82,7 @@ def load_pair_history(pair: str,
timerange: TimeRange = TimeRange(None, None, 0, 0),
refresh_pairs: bool = False,
exchange: Optional[Exchange] = None,
fill_up_missing: bool = True
) -> DataFrame:
"""
Loads cached ticker history for the given pair.
@ -111,7 +112,7 @@ def load_pair_history(pair: str,
logger.warning('Missing data at end for pair %s, data ends at %s',
pair,
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
return parse_ticker_dataframe(pairdata)
return parse_ticker_dataframe(pairdata, ticker_interval, fill_up_missing)
else:
logger.warning('No data for pair: "%s", Interval: %s. '
'Use --refresh-pairs-cached to download the data',
@ -124,7 +125,8 @@ def load_data(datadir: Optional[Path],
pairs: List[str],
refresh_pairs: bool = False,
exchange: Optional[Exchange] = None,
timerange: TimeRange = TimeRange(None, None, 0, 0)) -> Dict[str, DataFrame]:
timerange: TimeRange = TimeRange(None, None, 0, 0),
fill_up_missing: bool = True) -> Dict[str, DataFrame]:
"""
Loads ticker history data for a list of pairs the given parameters
:return: dict(<pair>:<tickerlist>)
@ -135,7 +137,8 @@ def load_data(datadir: Optional[Path],
hist = load_pair_history(pair=pair, ticker_interval=ticker_interval,
datadir=datadir, timerange=timerange,
refresh_pairs=refresh_pairs,
exchange=exchange)
exchange=exchange,
fill_up_missing=fill_up_missing)
if hist is not None:
result[pair] = hist
return result

View File

@ -558,7 +558,7 @@ class Exchange(object):
if ticks:
self._pairs_last_refresh_time[pair] = ticks[-1][0] // 1000
# keeping parsed dataframe in cache
self._klines[pair] = parse_ticker_dataframe(ticks)
self._klines[pair] = parse_ticker_dataframe(ticks, tick_interval, fill_missing=True)
return tickers
@retrier_async

View File

@ -394,12 +394,9 @@ class Backtesting(object):
logger.info("Running backtesting for Strategy %s", strat.get_strategy_name())
self._set_strategy(strat)
# need to reprocess data every time to populate signals
preprocessed = self.strategy.tickerdata_to_dataframe(data)
min_date, max_date = optimize.get_timeframe(preprocessed)
# Validate dataframe for missing values
optimize.validate_backtest_data(preprocessed, min_date, max_date,
min_date, max_date = optimize.get_timeframe(data)
# Validate dataframe for missing values (mainly at start and end, as fillup is called)
optimize.validate_backtest_data(data, min_date, max_date,
constants.TICKER_INTERVAL_MINUTES[self.ticker_interval])
logger.info(
'Measuring data from %s up to %s (%s days)..',
@ -407,6 +404,8 @@ class Backtesting(object):
max_date.isoformat(),
(max_date - min_date).days
)
# need to reprocess data every time to populate signals
preprocessed = self.strategy.tickerdata_to_dataframe(data)
# Execute backtest and print results
all_results[self.strategy.get_strategy_name()] = self.backtest(

View File

@ -542,7 +542,7 @@ def ticker_history_list():
@pytest.fixture
def ticker_history(ticker_history_list):
return parse_ticker_dataframe(ticker_history_list)
return parse_ticker_dataframe(ticker_history_list, "5m", True)
@pytest.fixture
@ -724,7 +724,7 @@ def tickers():
@pytest.fixture
def result():
with open('freqtrade/tests/testdata/UNITTEST_BTC-1m.json') as data_file:
return parse_ticker_dataframe(json.load(data_file))
return parse_ticker_dataframe(json.load(data_file), '1m', True)
# FIX:
# Create an fixture/function

View File

@ -1,25 +1,99 @@
# pragma pylint: disable=missing-docstring, C0103
import logging
from freqtrade.data.converter import parse_ticker_dataframe
from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data
from freqtrade.data.history import load_pair_history
from freqtrade.optimize import validate_backtest_data, get_timeframe
from freqtrade.tests.conftest import log_has
def test_dataframe_correct_length(result):
dataframe = parse_ticker_dataframe(result)
assert len(result.index) - 1 == len(dataframe.index) # last partial candle removed
def test_dataframe_correct_columns(result):
assert result.columns.tolist() == \
['date', 'open', 'high', 'low', 'close', 'volume']
assert result.columns.tolist() == ['date', 'open', 'high', 'low', 'close', 'volume']
def test_parse_ticker_dataframe(ticker_history, caplog):
def test_parse_ticker_dataframe(ticker_history_list, caplog):
columns = ['date', 'open', 'high', 'low', 'close', 'volume']
caplog.set_level(logging.DEBUG)
# Test file with BV data
dataframe = parse_ticker_dataframe(ticker_history)
dataframe = parse_ticker_dataframe(ticker_history_list, '5m', fill_missing=True)
assert dataframe.columns.tolist() == columns
assert log_has('Parsing tickerlist to dataframe', caplog.record_tuples)
def test_ohlcv_fill_up_missing_data(caplog):
data = load_pair_history(datadir=None,
ticker_interval='1m',
refresh_pairs=False,
pair='UNITTEST/BTC',
fill_up_missing=False)
caplog.set_level(logging.DEBUG)
data2 = ohlcv_fill_up_missing_data(data, '1m')
assert len(data2) > len(data)
# Column names should not change
assert (data.columns == data2.columns).all()
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
caplog.record_tuples)
# Test fillup actually fixes invalid backtest data
min_date, max_date = get_timeframe({'UNITTEST/BTC': data})
assert validate_backtest_data({'UNITTEST/BTC': data}, min_date, max_date, 1)
assert not validate_backtest_data({'UNITTEST/BTC': data2}, min_date, max_date, 1)
def test_ohlcv_fill_up_missing_data2(caplog):
ticker_interval = '5m'
ticks = [[
1511686200000, # 8:50:00
8.794e-05, # open
8.948e-05, # high
8.794e-05, # low
8.88e-05, # close
2255, # volume (in quote currency)
],
[
1511686500000, # 8:55:00
8.88e-05,
8.942e-05,
8.88e-05,
8.893e-05,
9911,
],
[
1511687100000, # 9:05:00
8.891e-05,
8.893e-05,
8.875e-05,
8.877e-05,
2251
],
[
1511687400000, # 9:10:00
8.877e-05,
8.883e-05,
8.895e-05,
8.817e-05,
123551
]
]
# Generate test-data without filling missing
data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False)
assert len(data) == 3
caplog.set_level(logging.DEBUG)
data2 = ohlcv_fill_up_missing_data(data, ticker_interval)
assert len(data2) == 4
# 3rd candle has been filled
row = data2.loc[2, :]
assert row['volume'] == 0
# close shoult match close of previous candle
assert row['close'] == data.loc[1, 'close']
assert row['open'] == row['close']
assert row['high'] == row['close']
assert row['low'] == row['close']
# Column names should not change
assert (data.columns == data2.columns).all()
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
caplog.record_tuples)

View File

@ -281,8 +281,8 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals
123.45
] for x in range(0, 500)]
pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC),
'LTC/BTC': parse_ticker_dataframe(LTCBTC)}
pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC, '1h', fill_missing=True),
'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', fill_missing=True)}
return pairdata

View File

@ -75,7 +75,7 @@ def load_data_test(what):
pair[x][5] # Keep old volume
] for x in range(0, datalen)
]
return {'UNITTEST/BTC': parse_ticker_dataframe(data)}
return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', fill_missing=True)}
def simple_backtest(config, contour, num_results, mocker) -> None:
@ -104,7 +104,7 @@ def simple_backtest(config, contour, num_results, mocker) -> None:
def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=False,
timerange=None, exchange=None):
tickerdata = history.load_tickerdata_file(datadir, 'UNITTEST/BTC', '1m', timerange=timerange)
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata)}
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', fill_missing=True)}
return pairdata
@ -323,15 +323,15 @@ def test_backtesting_init(mocker, default_conf) -> None:
assert backtesting.fee == 0.5
def test_tickerdata_to_dataframe(default_conf, mocker) -> None:
def test_tickerdata_to_dataframe_bt(default_conf, mocker) -> None:
patch_exchange(mocker)
timerange = TimeRange(None, 'line', 0, -100)
tick = history.load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
backtesting = Backtesting(default_conf)
data = backtesting.strategy.tickerdata_to_dataframe(tickerlist)
assert len(data['UNITTEST/BTC']) == 99
assert len(data['UNITTEST/BTC']) == 102
# Load strategy to compare the result between Backtesting function and strategy are the same
strategy = DefaultStrategy(default_conf)
@ -594,7 +594,7 @@ def test_processed(default_conf, mocker) -> None:
def test_backtest_pricecontours(default_conf, fee, mocker) -> None:
mocker.patch('freqtrade.exchange.Exchange.get_fee', fee)
tests = [['raise', 18], ['lower', 0], ['sine', 19]]
tests = [['raise', 19], ['lower', 0], ['sine', 18]]
# We need to enable sell-signal - otherwise it sells on ROI!!
default_conf['experimental'] = {"use_sell_signal": True}

View File

@ -243,7 +243,7 @@ def test_has_space(hyperopt):
def test_populate_indicators(hyperopt) -> None:
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
{'pair': 'UNITTEST/BTC'})
@ -256,7 +256,7 @@ def test_populate_indicators(hyperopt) -> None:
def test_buy_strategy_generator(hyperopt) -> None:
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
{'pair': 'UNITTEST/BTC'})

View File

@ -30,7 +30,8 @@ def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None:
history.load_data(
datadir=None,
ticker_interval='1m',
pairs=['UNITTEST/BTC']
pairs=['UNITTEST/BTC'],
fill_up_missing=False
)
)
min_date, max_date = optimize.get_timeframe(data)

View File

@ -10,7 +10,7 @@ from freqtrade.strategy.default_strategy import DefaultStrategy
@pytest.fixture
def result():
with open('freqtrade/tests/testdata/ETH_BTC-1m.json') as data_file:
return parse_ticker_dataframe(json.load(data_file))
return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True)
def test_default_strategy_structure():

View File

@ -111,9 +111,9 @@ def test_tickerdata_to_dataframe(default_conf) -> None:
timerange = TimeRange(None, 'line', 0, -100)
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', True)}
data = strategy.tickerdata_to_dataframe(tickerlist)
assert len(data['UNITTEST/BTC']) == 99 # partial candle was removed
assert len(data['UNITTEST/BTC']) == 102 # partial candle was removed
def test_min_roi_reached(default_conf, fee) -> None:

View File

@ -17,7 +17,7 @@ def test_shorten_date() -> None:
def test_datesarray_to_datetimearray(ticker_history_list):
dataframes = parse_ticker_dataframe(ticker_history_list)
dataframes = parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True)
dates = datesarray_to_datetimearray(dataframes['date'])
assert isinstance(dates[0], datetime.datetime)
@ -34,7 +34,7 @@ def test_datesarray_to_datetimearray(ticker_history_list):
def test_common_datearray(default_conf) -> None:
strategy = DefaultStrategy(default_conf)
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", fill_missing=True)}
dataframes = strategy.tickerdata_to_dataframe(tickerlist)
dates = common_datearray(dataframes)