Merge pull request #1454 from freqtrade/feat/interpolate_missing
interpolate missing candles
This commit is contained in:
commit
26a77e193e
@ -5,13 +5,19 @@ import logging
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, to_datetime
|
||||
|
||||
from freqtrade.constants import TICKER_INTERVAL_MINUTES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_ticker_dataframe(ticker: list) -> DataFrame:
|
||||
def parse_ticker_dataframe(ticker: list, ticker_interval: str,
|
||||
fill_missing: bool = True) -> DataFrame:
|
||||
"""
|
||||
Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe
|
||||
:param ticker: ticker list, as returned by exchange.async_get_candle_history
|
||||
:param ticker_interval: ticker_interval (e.g. 5m). Used to fill up eventual missing data
|
||||
:param fill_missing: fill up missing candles with 0 candles
|
||||
(see ohlcv_fill_up_missing_data for details)
|
||||
:return: DataFrame
|
||||
"""
|
||||
logger.debug("Parsing tickerlist to dataframe")
|
||||
@ -33,9 +39,43 @@ def parse_ticker_dataframe(ticker: list) -> DataFrame:
|
||||
})
|
||||
frame.drop(frame.tail(1).index, inplace=True) # eliminate partial candle
|
||||
logger.debug('Dropping last candle')
|
||||
|
||||
if fill_missing:
|
||||
return ohlcv_fill_up_missing_data(frame, ticker_interval)
|
||||
else:
|
||||
return frame
|
||||
|
||||
|
||||
def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> DataFrame:
|
||||
"""
|
||||
Fills up missing data with 0 volume rows,
|
||||
using the previous close as price for "open", "high" "low" and "close", volume is set to 0
|
||||
|
||||
"""
|
||||
ohlc_dict = {
|
||||
'open': 'first',
|
||||
'high': 'max',
|
||||
'low': 'min',
|
||||
'close': 'last',
|
||||
'volume': 'sum'
|
||||
}
|
||||
tick_mins = TICKER_INTERVAL_MINUTES[ticker_interval]
|
||||
# Resample to create "NAN" values
|
||||
df = dataframe.resample(f'{tick_mins}min', on='date').agg(ohlc_dict)
|
||||
|
||||
# Forwardfill close for missing columns
|
||||
df['close'] = df['close'].fillna(method='ffill')
|
||||
# Use close for "open, high, low"
|
||||
df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
|
||||
value={'open': df['close'],
|
||||
'high': df['close'],
|
||||
'low': df['close'],
|
||||
})
|
||||
df.reset_index(inplace=True)
|
||||
logger.debug(f"Missing data fillup: before: {len(dataframe)} - after: {len(df)}")
|
||||
return df
|
||||
|
||||
|
||||
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
|
||||
"""
|
||||
Gets order book list, returns dataframe with below format per suggested by creslin
|
||||
|
@ -82,6 +82,7 @@ def load_pair_history(pair: str,
|
||||
timerange: TimeRange = TimeRange(None, None, 0, 0),
|
||||
refresh_pairs: bool = False,
|
||||
exchange: Optional[Exchange] = None,
|
||||
fill_up_missing: bool = True
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Loads cached ticker history for the given pair.
|
||||
@ -111,7 +112,7 @@ def load_pair_history(pair: str,
|
||||
logger.warning('Missing data at end for pair %s, data ends at %s',
|
||||
pair,
|
||||
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
|
||||
return parse_ticker_dataframe(pairdata)
|
||||
return parse_ticker_dataframe(pairdata, ticker_interval, fill_up_missing)
|
||||
else:
|
||||
logger.warning('No data for pair: "%s", Interval: %s. '
|
||||
'Use --refresh-pairs-cached to download the data',
|
||||
@ -124,7 +125,8 @@ def load_data(datadir: Optional[Path],
|
||||
pairs: List[str],
|
||||
refresh_pairs: bool = False,
|
||||
exchange: Optional[Exchange] = None,
|
||||
timerange: TimeRange = TimeRange(None, None, 0, 0)) -> Dict[str, DataFrame]:
|
||||
timerange: TimeRange = TimeRange(None, None, 0, 0),
|
||||
fill_up_missing: bool = True) -> Dict[str, DataFrame]:
|
||||
"""
|
||||
Loads ticker history data for a list of pairs the given parameters
|
||||
:return: dict(<pair>:<tickerlist>)
|
||||
@ -135,7 +137,8 @@ def load_data(datadir: Optional[Path],
|
||||
hist = load_pair_history(pair=pair, ticker_interval=ticker_interval,
|
||||
datadir=datadir, timerange=timerange,
|
||||
refresh_pairs=refresh_pairs,
|
||||
exchange=exchange)
|
||||
exchange=exchange,
|
||||
fill_up_missing=fill_up_missing)
|
||||
if hist is not None:
|
||||
result[pair] = hist
|
||||
return result
|
||||
|
@ -558,7 +558,7 @@ class Exchange(object):
|
||||
if ticks:
|
||||
self._pairs_last_refresh_time[pair] = ticks[-1][0] // 1000
|
||||
# keeping parsed dataframe in cache
|
||||
self._klines[pair] = parse_ticker_dataframe(ticks)
|
||||
self._klines[pair] = parse_ticker_dataframe(ticks, tick_interval, fill_missing=True)
|
||||
return tickers
|
||||
|
||||
@retrier_async
|
||||
|
@ -394,12 +394,9 @@ class Backtesting(object):
|
||||
logger.info("Running backtesting for Strategy %s", strat.get_strategy_name())
|
||||
self._set_strategy(strat)
|
||||
|
||||
# need to reprocess data every time to populate signals
|
||||
preprocessed = self.strategy.tickerdata_to_dataframe(data)
|
||||
|
||||
min_date, max_date = optimize.get_timeframe(preprocessed)
|
||||
# Validate dataframe for missing values
|
||||
optimize.validate_backtest_data(preprocessed, min_date, max_date,
|
||||
min_date, max_date = optimize.get_timeframe(data)
|
||||
# Validate dataframe for missing values (mainly at start and end, as fillup is called)
|
||||
optimize.validate_backtest_data(data, min_date, max_date,
|
||||
constants.TICKER_INTERVAL_MINUTES[self.ticker_interval])
|
||||
logger.info(
|
||||
'Measuring data from %s up to %s (%s days)..',
|
||||
@ -407,6 +404,8 @@ class Backtesting(object):
|
||||
max_date.isoformat(),
|
||||
(max_date - min_date).days
|
||||
)
|
||||
# need to reprocess data every time to populate signals
|
||||
preprocessed = self.strategy.tickerdata_to_dataframe(data)
|
||||
|
||||
# Execute backtest and print results
|
||||
all_results[self.strategy.get_strategy_name()] = self.backtest(
|
||||
|
@ -542,7 +542,7 @@ def ticker_history_list():
|
||||
|
||||
@pytest.fixture
|
||||
def ticker_history(ticker_history_list):
|
||||
return parse_ticker_dataframe(ticker_history_list)
|
||||
return parse_ticker_dataframe(ticker_history_list, "5m", True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -724,7 +724,7 @@ def tickers():
|
||||
@pytest.fixture
|
||||
def result():
|
||||
with open('freqtrade/tests/testdata/UNITTEST_BTC-1m.json') as data_file:
|
||||
return parse_ticker_dataframe(json.load(data_file))
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', True)
|
||||
|
||||
# FIX:
|
||||
# Create an fixture/function
|
||||
|
@ -1,25 +1,99 @@
|
||||
# pragma pylint: disable=missing-docstring, C0103
|
||||
import logging
|
||||
|
||||
from freqtrade.data.converter import parse_ticker_dataframe
|
||||
from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data
|
||||
from freqtrade.data.history import load_pair_history
|
||||
from freqtrade.optimize import validate_backtest_data, get_timeframe
|
||||
from freqtrade.tests.conftest import log_has
|
||||
|
||||
|
||||
def test_dataframe_correct_length(result):
|
||||
dataframe = parse_ticker_dataframe(result)
|
||||
assert len(result.index) - 1 == len(dataframe.index) # last partial candle removed
|
||||
|
||||
|
||||
def test_dataframe_correct_columns(result):
|
||||
assert result.columns.tolist() == \
|
||||
['date', 'open', 'high', 'low', 'close', 'volume']
|
||||
assert result.columns.tolist() == ['date', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
|
||||
def test_parse_ticker_dataframe(ticker_history, caplog):
|
||||
def test_parse_ticker_dataframe(ticker_history_list, caplog):
|
||||
columns = ['date', 'open', 'high', 'low', 'close', 'volume']
|
||||
|
||||
caplog.set_level(logging.DEBUG)
|
||||
# Test file with BV data
|
||||
dataframe = parse_ticker_dataframe(ticker_history)
|
||||
dataframe = parse_ticker_dataframe(ticker_history_list, '5m', fill_missing=True)
|
||||
assert dataframe.columns.tolist() == columns
|
||||
assert log_has('Parsing tickerlist to dataframe', caplog.record_tuples)
|
||||
|
||||
|
||||
def test_ohlcv_fill_up_missing_data(caplog):
|
||||
data = load_pair_history(datadir=None,
|
||||
ticker_interval='1m',
|
||||
refresh_pairs=False,
|
||||
pair='UNITTEST/BTC',
|
||||
fill_up_missing=False)
|
||||
caplog.set_level(logging.DEBUG)
|
||||
data2 = ohlcv_fill_up_missing_data(data, '1m')
|
||||
assert len(data2) > len(data)
|
||||
# Column names should not change
|
||||
assert (data.columns == data2.columns).all()
|
||||
|
||||
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
|
||||
caplog.record_tuples)
|
||||
|
||||
# Test fillup actually fixes invalid backtest data
|
||||
min_date, max_date = get_timeframe({'UNITTEST/BTC': data})
|
||||
assert validate_backtest_data({'UNITTEST/BTC': data}, min_date, max_date, 1)
|
||||
assert not validate_backtest_data({'UNITTEST/BTC': data2}, min_date, max_date, 1)
|
||||
|
||||
|
||||
def test_ohlcv_fill_up_missing_data2(caplog):
|
||||
ticker_interval = '5m'
|
||||
ticks = [[
|
||||
1511686200000, # 8:50:00
|
||||
8.794e-05, # open
|
||||
8.948e-05, # high
|
||||
8.794e-05, # low
|
||||
8.88e-05, # close
|
||||
2255, # volume (in quote currency)
|
||||
],
|
||||
[
|
||||
1511686500000, # 8:55:00
|
||||
8.88e-05,
|
||||
8.942e-05,
|
||||
8.88e-05,
|
||||
8.893e-05,
|
||||
9911,
|
||||
],
|
||||
[
|
||||
1511687100000, # 9:05:00
|
||||
8.891e-05,
|
||||
8.893e-05,
|
||||
8.875e-05,
|
||||
8.877e-05,
|
||||
2251
|
||||
],
|
||||
[
|
||||
1511687400000, # 9:10:00
|
||||
8.877e-05,
|
||||
8.883e-05,
|
||||
8.895e-05,
|
||||
8.817e-05,
|
||||
123551
|
||||
]
|
||||
]
|
||||
|
||||
# Generate test-data without filling missing
|
||||
data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False)
|
||||
assert len(data) == 3
|
||||
caplog.set_level(logging.DEBUG)
|
||||
data2 = ohlcv_fill_up_missing_data(data, ticker_interval)
|
||||
assert len(data2) == 4
|
||||
# 3rd candle has been filled
|
||||
row = data2.loc[2, :]
|
||||
assert row['volume'] == 0
|
||||
# close shoult match close of previous candle
|
||||
assert row['close'] == data.loc[1, 'close']
|
||||
assert row['open'] == row['close']
|
||||
assert row['high'] == row['close']
|
||||
assert row['low'] == row['close']
|
||||
# Column names should not change
|
||||
assert (data.columns == data2.columns).all()
|
||||
|
||||
assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}",
|
||||
caplog.record_tuples)
|
||||
|
@ -281,8 +281,8 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals
|
||||
123.45
|
||||
] for x in range(0, 500)]
|
||||
|
||||
pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC),
|
||||
'LTC/BTC': parse_ticker_dataframe(LTCBTC)}
|
||||
pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC, '1h', fill_missing=True),
|
||||
'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', fill_missing=True)}
|
||||
return pairdata
|
||||
|
||||
|
||||
|
@ -75,7 +75,7 @@ def load_data_test(what):
|
||||
pair[x][5] # Keep old volume
|
||||
] for x in range(0, datalen)
|
||||
]
|
||||
return {'UNITTEST/BTC': parse_ticker_dataframe(data)}
|
||||
return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', fill_missing=True)}
|
||||
|
||||
|
||||
def simple_backtest(config, contour, num_results, mocker) -> None:
|
||||
@ -104,7 +104,7 @@ def simple_backtest(config, contour, num_results, mocker) -> None:
|
||||
def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=False,
|
||||
timerange=None, exchange=None):
|
||||
tickerdata = history.load_tickerdata_file(datadir, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata)}
|
||||
pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', fill_missing=True)}
|
||||
return pairdata
|
||||
|
||||
|
||||
@ -323,15 +323,15 @@ def test_backtesting_init(mocker, default_conf) -> None:
|
||||
assert backtesting.fee == 0.5
|
||||
|
||||
|
||||
def test_tickerdata_to_dataframe(default_conf, mocker) -> None:
|
||||
def test_tickerdata_to_dataframe_bt(default_conf, mocker) -> None:
|
||||
patch_exchange(mocker)
|
||||
timerange = TimeRange(None, 'line', 0, -100)
|
||||
tick = history.load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
|
||||
backtesting = Backtesting(default_conf)
|
||||
data = backtesting.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
assert len(data['UNITTEST/BTC']) == 99
|
||||
assert len(data['UNITTEST/BTC']) == 102
|
||||
|
||||
# Load strategy to compare the result between Backtesting function and strategy are the same
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
@ -594,7 +594,7 @@ def test_processed(default_conf, mocker) -> None:
|
||||
|
||||
def test_backtest_pricecontours(default_conf, fee, mocker) -> None:
|
||||
mocker.patch('freqtrade.exchange.Exchange.get_fee', fee)
|
||||
tests = [['raise', 18], ['lower', 0], ['sine', 19]]
|
||||
tests = [['raise', 19], ['lower', 0], ['sine', 18]]
|
||||
# We need to enable sell-signal - otherwise it sells on ROI!!
|
||||
default_conf['experimental'] = {"use_sell_signal": True}
|
||||
|
||||
|
@ -243,7 +243,7 @@ def test_has_space(hyperopt):
|
||||
|
||||
def test_populate_indicators(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
|
||||
{'pair': 'UNITTEST/BTC'})
|
||||
@ -256,7 +256,7 @@ def test_populate_indicators(hyperopt) -> None:
|
||||
|
||||
def test_buy_strategy_generator(hyperopt) -> None:
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)}
|
||||
dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist)
|
||||
dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'],
|
||||
{'pair': 'UNITTEST/BTC'})
|
||||
|
@ -30,7 +30,8 @@ def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None:
|
||||
history.load_data(
|
||||
datadir=None,
|
||||
ticker_interval='1m',
|
||||
pairs=['UNITTEST/BTC']
|
||||
pairs=['UNITTEST/BTC'],
|
||||
fill_up_missing=False
|
||||
)
|
||||
)
|
||||
min_date, max_date = optimize.get_timeframe(data)
|
||||
|
@ -10,7 +10,7 @@ from freqtrade.strategy.default_strategy import DefaultStrategy
|
||||
@pytest.fixture
|
||||
def result():
|
||||
with open('freqtrade/tests/testdata/ETH_BTC-1m.json') as data_file:
|
||||
return parse_ticker_dataframe(json.load(data_file))
|
||||
return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True)
|
||||
|
||||
|
||||
def test_default_strategy_structure():
|
||||
|
@ -111,9 +111,9 @@ def test_tickerdata_to_dataframe(default_conf) -> None:
|
||||
|
||||
timerange = TimeRange(None, 'line', 0, -100)
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange)
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', True)}
|
||||
data = strategy.tickerdata_to_dataframe(tickerlist)
|
||||
assert len(data['UNITTEST/BTC']) == 99 # partial candle was removed
|
||||
assert len(data['UNITTEST/BTC']) == 102 # partial candle was removed
|
||||
|
||||
|
||||
def test_min_roi_reached(default_conf, fee) -> None:
|
||||
|
@ -17,7 +17,7 @@ def test_shorten_date() -> None:
|
||||
|
||||
|
||||
def test_datesarray_to_datetimearray(ticker_history_list):
|
||||
dataframes = parse_ticker_dataframe(ticker_history_list)
|
||||
dataframes = parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True)
|
||||
dates = datesarray_to_datetimearray(dataframes['date'])
|
||||
|
||||
assert isinstance(dates[0], datetime.datetime)
|
||||
@ -34,7 +34,7 @@ def test_datesarray_to_datetimearray(ticker_history_list):
|
||||
def test_common_datearray(default_conf) -> None:
|
||||
strategy = DefaultStrategy(default_conf)
|
||||
tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m')
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick)}
|
||||
tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", fill_missing=True)}
|
||||
dataframes = strategy.tickerdata_to_dataframe(tickerlist)
|
||||
|
||||
dates = common_datearray(dataframes)
|
||||
|
Loading…
Reference in New Issue
Block a user