stable/tests/data/test_datahandler.py

535 lines
19 KiB
Python

# pragma pylint: disable=missing-docstring, protected-access, C0103
import re
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.constants import AVAILABLE_DATAHANDLERS
from freqtrade.data.history.featherdatahandler import FeatherDataHandler
from freqtrade.data.history.hdf5datahandler import HDF5DataHandler
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler, get_datahandlerclass
from freqtrade.data.history.jsondatahandler import JsonDataHandler, JsonGzDataHandler
from freqtrade.data.history.parquetdatahandler import ParquetDataHandler
from freqtrade.enums import CandleType, TradingMode
from tests.conftest import log_has, log_has_re
def test_datahandler_ohlcv_get_pairs(testdatadir):
pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '5m', candle_type=CandleType.SPOT)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'UNITTEST/BTC', 'XLM/BTC', 'ETH/BTC', 'TRX/BTC', 'LTC/BTC',
'XMR/BTC', 'ZEC/BTC', 'ADA/BTC', 'ETC/BTC', 'NXT/BTC',
'DASH/BTC', 'XRP/ETH'}
pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '8m', candle_type=CandleType.SPOT)
assert set(pairs) == {'UNITTEST/BTC'}
pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '5m', candle_type=CandleType.SPOT)
assert set(pairs) == {'UNITTEST/BTC'}
pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.MARK)
assert set(pairs) == {'UNITTEST/USDT:USDT', 'XRP/USDT:USDT'}
pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.FUTURES)
assert set(pairs) == {'XRP/USDT:USDT'}
pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.MARK)
assert set(pairs) == {'UNITTEST/USDT:USDT'}
@pytest.mark.parametrize('filename,pair,timeframe,candletype', [
('XMR_BTC-5m.json', 'XMR_BTC', '5m', ''),
('XMR_USDT-1h.h5', 'XMR_USDT', '1h', ''),
('BTC-PERP-1h.h5', 'BTC-PERP', '1h', ''),
('BTC_USDT-2h.jsongz', 'BTC_USDT', '2h', ''),
('BTC_USDT-2h-mark.jsongz', 'BTC_USDT', '2h', 'mark'),
('XMR_USDT-1h-mark.h5', 'XMR_USDT', '1h', 'mark'),
('XMR_USDT-1h-random.h5', 'XMR_USDT', '1h', 'random'),
('BTC-PERP-1h-index.h5', 'BTC-PERP', '1h', 'index'),
('XMR_USDT_USDT-1h-mark.h5', 'XMR_USDT_USDT', '1h', 'mark'),
])
def test_datahandler_ohlcv_regex(filename, pair, timeframe, candletype):
regex = JsonDataHandler._OHLCV_REGEX
match = re.search(regex, filename)
assert len(match.groups()) > 1
assert match[1] == pair
assert match[2] == timeframe
assert match[3] == candletype
@pytest.mark.parametrize('input,expected', [
('XMR_USDT', 'XMR/USDT'),
('BTC_USDT', 'BTC/USDT'),
('USDT_BUSD', 'USDT/BUSD'),
('BTC_USDT_USDT', 'BTC/USDT:USDT'), # Futures
('XRP_USDT_USDT', 'XRP/USDT:USDT'), # futures
('BTC-PERP', 'BTC-PERP'),
('BTC-PERP_USDT', 'BTC-PERP:USDT'),
('UNITTEST_USDT', 'UNITTEST/USDT'),
])
def test_rebuild_pair_from_filename(input, expected):
assert IDataHandler.rebuild_pair_from_filename(input) == expected
def test_datahandler_ohlcv_get_available_data(testdatadir):
paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
# Convert to set to avoid failures due to sorting
assert set(paircombs) == {
('UNITTEST/BTC', '5m', CandleType.SPOT),
('ETH/BTC', '5m', CandleType.SPOT),
('XLM/BTC', '5m', CandleType.SPOT),
('TRX/BTC', '5m', CandleType.SPOT),
('LTC/BTC', '5m', CandleType.SPOT),
('XMR/BTC', '5m', CandleType.SPOT),
('ZEC/BTC', '5m', CandleType.SPOT),
('UNITTEST/BTC', '1m', CandleType.SPOT),
('ADA/BTC', '5m', CandleType.SPOT),
('ETC/BTC', '5m', CandleType.SPOT),
('NXT/BTC', '5m', CandleType.SPOT),
('DASH/BTC', '5m', CandleType.SPOT),
('XRP/ETH', '1m', CandleType.SPOT),
('XRP/ETH', '5m', CandleType.SPOT),
('UNITTEST/BTC', '30m', CandleType.SPOT),
('UNITTEST/BTC', '8m', CandleType.SPOT),
('NOPAIR/XXX', '4m', CandleType.SPOT),
}
paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.FUTURES)
# Convert to set to avoid failures due to sorting
assert set(paircombs) == {
('UNITTEST/USDT:USDT', '1h', 'mark'),
('XRP/USDT:USDT', '5m', 'futures'),
('XRP/USDT:USDT', '1h', 'futures'),
('XRP/USDT:USDT', '1h', 'mark'),
('XRP/USDT:USDT', '8h', 'mark'),
('XRP/USDT:USDT', '8h', 'funding_rate'),
}
paircombs = JsonGzDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
assert set(paircombs) == {('UNITTEST/BTC', '8m', CandleType.SPOT)}
paircombs = HDF5DataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
assert set(paircombs) == {('UNITTEST/BTC', '5m', CandleType.SPOT)}
def test_jsondatahandler_trades_get_pairs(testdatadir):
pairs = JsonGzDataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH', 'XRP/OLD'}
def test_jsondatahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 2
def test_jsondatahandler_ohlcv_load(testdatadir, caplog):
dh = JsonDataHandler(testdatadir)
df = dh.ohlcv_load('XRP/ETH', '5m', 'spot')
assert len(df) == 712
df_mark = dh.ohlcv_load('UNITTEST/USDT:USDT', '1h', candle_type="mark")
assert len(df_mark) == 100
df_no_mark = dh.ohlcv_load('UNITTEST/USDT', '1h', 'spot')
assert len(df_no_mark) == 0
# Failure case (empty array)
df1 = dh.ohlcv_load('NOPAIR/XXX', '4m', 'spot')
assert len(df1) == 0
assert log_has("Could not load data for NOPAIR/XXX.", caplog)
assert df.columns.equals(df1.columns)
def test_datahandler_ohlcv_data_min_max(testdatadir):
dh = JsonDataHandler(testdatadir)
min_max = dh.ohlcv_data_min_max('UNITTEST/BTC', '5m', 'spot')
assert len(min_max) == 2
# Empty pair
min_max = dh.ohlcv_data_min_max('UNITTEST/BTC', '8m', 'spot')
assert len(min_max) == 2
assert min_max[0] == datetime.fromtimestamp(0, tz=timezone.utc)
assert min_max[0] == min_max[1]
# Empty pair2
min_max = dh.ohlcv_data_min_max('NOPAIR/XXX', '4m', 'spot')
assert len(min_max) == 2
assert min_max[0] == datetime.fromtimestamp(0, tz=timezone.utc)
assert min_max[0] == min_max[1]
def test_datahandler__check_empty_df(testdatadir, caplog):
dh = JsonDataHandler(testdatadir)
expected_text = r"Price jump in UNITTEST/USDT, 1h, spot between"
df = DataFrame([
[
1511686200000, # 8:50:00
8.794, # open
8.948, # high
8.794, # low
8.88, # close
2255, # volume (in quote currency)
],
[
1511686500000, # 8:55:00
8.88,
8.942,
8.88,
8.893,
9911,
],
[
1511687100000, # 9:05:00
8.891,
8.893,
8.875,
8.877,
2251
],
[
1511687400000, # 9:10:00
8.877,
8.883,
8.895,
8.817,
123551
]
], columns=['date', 'open', 'high', 'low', 'close', 'volume'])
dh._check_empty_df(df, 'UNITTEST/USDT', '1h', CandleType.SPOT, True, True)
assert not log_has_re(expected_text, caplog)
df = DataFrame([
[
1511686200000, # 8:50:00
8.794, # open
8.948, # high
8.794, # low
8.88, # close
2255, # volume (in quote currency)
],
[
1511686500000, # 8:55:00
8.88,
8.942,
8.88,
8.893,
9911,
],
[
1511687100000, # 9:05:00
889.1, # Price jump by several decimals
889.3,
887.5,
887.7,
2251
],
[
1511687400000, # 9:10:00
8.877,
8.883,
8.895,
8.817,
123551
]
], columns=['date', 'open', 'high', 'low', 'close', 'volume'])
dh._check_empty_df(df, 'UNITTEST/USDT', '1h', CandleType.SPOT, True, True)
assert log_has_re(expected_text, caplog)
@pytest.mark.parametrize('datahandler', ['feather', 'parquet'])
def test_datahandler_trades_not_supported(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_load('UNITTEST/ETH')
with pytest.raises(NotImplementedError):
dh.trades_store('UNITTEST/ETH', MagicMock())
def test_jsondatahandler_trades_load(testdatadir, caplog):
dh = JsonGzDataHandler(testdatadir)
logmsg = "Old trades format detected - converting"
dh.trades_load('XRP/ETH')
assert not log_has(logmsg, caplog)
# Test conversation is happening
dh.trades_load('XRP/OLD')
assert log_has(logmsg, caplog)
def test_jsondatahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame(), CandleType.SPOT)
with pytest.raises(NotImplementedError):
dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame(), CandleType.MARK)
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_trades_append(datahandler, testdatadir):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_append('UNITTEST/ETH', [])
def test_hdf5datahandler_trades_get_pairs(testdatadir):
pairs = HDF5DataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH'}
def test_hdf5datahandler_trades_load(testdatadir):
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
# data goes from 2019-10-11 - 2019-10-13
timerange = TimeRange.parse_timerange('20191011-20191012')
trades2 = dh._trades_load('XRP/ETH', timerange)
assert len(trades) > len(trades2)
# Check that ID is None (If it's nan, it's wrong)
assert trades2[0][2] is None
# unfiltered load has trades before starttime
assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0
# filtered list does not have trades before starttime
assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0
# unfiltered load has trades after endtime
assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0
# filtered list does not have trades after endtime
assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
def test_hdf5datahandler_trades_store(testdatadir, tmpdir):
tmpdir1 = Path(tmpdir)
dh = get_datahandler(testdatadir, 'hdf5')
trades = dh.trades_load('XRP/ETH')
dh1 = get_datahandler(tmpdir1, 'hdf5')
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.h5'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_hdf5datahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'hdf5')
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
# Data goes from 2018-01-10 - 2018-01-30
('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'),
# Mark data goes from to 2021-11-15 2021-11-19
('UNITTEST/USDT:USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'),
])
def test_hdf5datahandler_ohlcv_load_and_resave(
testdatadir,
tmpdir,
pair,
timeframe,
candle_type,
candle_append,
startdt, enddt
):
tmpdir1 = Path(tmpdir)
tmpdir2 = tmpdir1
if candle_type not in ('', 'spot'):
tmpdir2 = tmpdir1 / 'futures'
tmpdir2.mkdir()
dh = get_datahandler(testdatadir, 'hdf5')
ohlcv = dh._ohlcv_load(pair, timeframe, None, candle_type=candle_type)
assert isinstance(ohlcv, DataFrame)
assert len(ohlcv) > 0
file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.h5"
assert not file.is_file()
dh1 = get_datahandler(tmpdir1, 'hdf5')
dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type)
assert file.is_file()
assert not ohlcv[ohlcv['date'] < startdt].empty
timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}")
# Call private function to ensure timerange is filtered in hdf5
ohlcv = dh._ohlcv_load(pair, timeframe, timerange, candle_type=candle_type)
ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type)
assert len(ohlcv) == len(ohlcv1)
assert ohlcv.equals(ohlcv1)
assert ohlcv[ohlcv['date'] < startdt].empty
assert ohlcv[ohlcv['date'] > enddt].empty
# Try loading inexisting file
ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type)
assert ohlcv.empty
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
# Data goes from 2018-01-10 - 2018-01-30
('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'),
# Mark data goes from to 2021-11-15 2021-11-19
('UNITTEST/USDT:USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'),
])
@pytest.mark.parametrize('datahandler', ['hdf5', 'feather', 'parquet'])
def test_generic_datahandler_ohlcv_load_and_resave(
datahandler,
testdatadir,
tmpdir,
pair,
timeframe,
candle_type,
candle_append,
startdt, enddt
):
tmpdir1 = Path(tmpdir)
tmpdir2 = tmpdir1
if candle_type not in ('', 'spot'):
tmpdir2 = tmpdir1 / 'futures'
tmpdir2.mkdir()
# Load data from one common file
dhbase = get_datahandler(testdatadir, 'json')
ohlcv = dhbase._ohlcv_load(pair, timeframe, None, candle_type=candle_type)
assert isinstance(ohlcv, DataFrame)
assert len(ohlcv) > 0
# Get data to test
dh = get_datahandler(testdatadir, datahandler)
file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.{dh._get_file_extension()}"
assert not file.is_file()
dh1 = get_datahandler(tmpdir1, datahandler)
dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type)
assert file.is_file()
assert not ohlcv[ohlcv['date'] < startdt].empty
timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}")
ohlcv = dhbase.ohlcv_load(pair, timeframe, timerange=timerange, candle_type=candle_type)
if datahandler == 'hdf5':
ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type)
if candle_type == 'mark':
ohlcv1['volume'] = 0.0
else:
ohlcv1 = dh1.ohlcv_load('UNITTEST/NEW', timeframe,
timerange=timerange, candle_type=candle_type)
assert len(ohlcv) == len(ohlcv1)
assert ohlcv.equals(ohlcv1)
assert ohlcv[ohlcv['date'] < startdt].empty
assert ohlcv[ohlcv['date'] > enddt].empty
# Try loading inexisting file
ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type)
assert ohlcv.empty
def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = get_datahandler(testdatadir, 'hdf5')
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 2
def test_gethandlerclass():
cl = get_datahandlerclass('json')
assert cl == JsonDataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('jsongz')
assert cl == JsonGzDataHandler
assert issubclass(cl, IDataHandler)
assert issubclass(cl, JsonDataHandler)
cl = get_datahandlerclass('hdf5')
assert cl == HDF5DataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('feather')
assert cl == FeatherDataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('parquet')
assert cl == ParquetDataHandler
assert issubclass(cl, IDataHandler)
with pytest.raises(ValueError, match=r"No datahandler for .*"):
get_datahandlerclass('DeadBeef')
def test_get_datahandler(testdatadir):
dh = get_datahandler(testdatadir, 'json')
assert type(dh) == JsonDataHandler
dh = get_datahandler(testdatadir, 'jsongz')
assert type(dh) == JsonGzDataHandler
dh1 = get_datahandler(testdatadir, 'jsongz', dh)
assert id(dh1) == id(dh)
dh = get_datahandler(testdatadir, 'hdf5')
assert type(dh) == HDF5DataHandler