stable/tests/data/test_history.py

1001 lines
39 KiB
Python

# pragma pylint: disable=missing-docstring, protected-access, C0103
import json
import re
import uuid
from pathlib import Path
from shutil import copyfile
from unittest.mock import MagicMock, PropertyMock
import arrow
import pytest
from pandas import DataFrame
from pandas.testing import assert_frame_equal
from freqtrade.configuration import TimeRange
from freqtrade.constants import AVAILABLE_DATAHANDLERS
from freqtrade.data.converter import ohlcv_to_dataframe
from freqtrade.data.history.hdf5datahandler import HDF5DataHandler
from freqtrade.data.history.history_utils import (_download_pair_history, _download_trades_history,
_load_cached_data_for_updating,
convert_trades_to_ohlcv, get_timerange, load_data,
load_pair_history, refresh_backtest_ohlcv_data,
refresh_backtest_trades_data, refresh_data,
validate_backtest_data)
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler, get_datahandlerclass
from freqtrade.data.history.jsondatahandler import JsonDataHandler, JsonGzDataHandler
from freqtrade.enums import CandleType, TradingMode
from freqtrade.exchange import timeframe_to_minutes
from freqtrade.misc import file_dump_json
from freqtrade.resolvers import StrategyResolver
from tests.conftest import (CURRENT_TEST_STRATEGY, get_patched_exchange, log_has, log_has_re,
patch_exchange)
# Change this if modifying UNITTEST/BTC testdatafile
_BTC_UNITTEST_LENGTH = 13681
def _backup_file(file: Path, copy_file: bool = False) -> None:
"""
Backup existing file to avoid deleting the user file
:param file: complete path to the file
:param copy_file: keep file in place too.
:return: None
"""
file_swp = str(file) + '.swp'
if file.is_file():
file.rename(file_swp)
if copy_file:
copyfile(file_swp, file)
def _clean_test_file(file: Path) -> None:
"""
Backup existing file to avoid deleting the user file
:param file: complete path to the file
:return: None
"""
file_swp = Path(str(file) + '.swp')
# 1. Delete file from the test
if file.is_file():
file.unlink()
# 2. Rollback to the initial file
if file_swp.is_file():
file_swp.rename(file)
def test_load_data_30min_timeframe(mocker, caplog, default_conf, testdatadir) -> None:
ld = load_pair_history(pair='UNITTEST/BTC', timeframe='30m', datadir=testdatadir)
assert isinstance(ld, DataFrame)
assert not log_has(
'Download history data for pair: "UNITTEST/BTC", timeframe: 30m '
'and store in None.', caplog
)
def test_load_data_7min_timeframe(mocker, caplog, default_conf, testdatadir) -> None:
ld = load_pair_history(pair='UNITTEST/BTC', timeframe='7m', datadir=testdatadir)
assert isinstance(ld, DataFrame)
assert ld.empty
assert log_has(
'No history for UNITTEST/BTC, spot, 7m found. '
'Use `freqtrade download-data` to download the data', caplog
)
def test_load_data_1min_timeframe(ohlcv_history, mocker, caplog, testdatadir) -> None:
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv', return_value=ohlcv_history)
file = testdatadir / 'UNITTEST_BTC-1m.json'
load_data(datadir=testdatadir, timeframe='1m', pairs=['UNITTEST/BTC'])
assert file.is_file()
assert not log_has(
'Download history data for pair: "UNITTEST/BTC", interval: 1m '
'and store in None.', caplog
)
def test_load_data_mark(ohlcv_history, mocker, caplog, testdatadir) -> None:
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv', return_value=ohlcv_history)
file = testdatadir / 'futures/UNITTEST_USDT-1h-mark.json'
load_data(datadir=testdatadir, timeframe='1h', pairs=['UNITTEST/BTC'], candle_type='mark')
assert file.is_file()
assert not log_has(
'Download history data for pair: "UNITTEST/USDT", interval: 1m '
'and store in None.', caplog
)
def test_load_data_startup_candles(mocker, caplog, default_conf, testdatadir) -> None:
ltfmock = mocker.patch(
'freqtrade.data.history.jsondatahandler.JsonDataHandler._ohlcv_load',
MagicMock(return_value=DataFrame()))
timerange = TimeRange('date', None, 1510639620, 0)
load_pair_history(pair='UNITTEST/BTC', timeframe='1m',
datadir=testdatadir, timerange=timerange,
startup_candles=20,)
assert ltfmock.call_count == 1
assert ltfmock.call_args_list[0][1]['timerange'] != timerange
# startts is 20 minutes earlier
assert ltfmock.call_args_list[0][1]['timerange'].startts == timerange.startts - 20 * 60
@pytest.mark.parametrize('candle_type', ['mark', ''])
def test_load_data_with_new_pair_1min(ohlcv_history_list, mocker, caplog,
default_conf, tmpdir, candle_type) -> None:
"""
Test load_pair_history() with 1 min timeframe
"""
tmpdir1 = Path(tmpdir)
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv', return_value=ohlcv_history_list)
exchange = get_patched_exchange(mocker, default_conf)
file = tmpdir1 / 'MEME_BTC-1m.json'
# do not download a new pair if refresh_pairs isn't set
load_pair_history(datadir=tmpdir1, timeframe='1m', pair='MEME/BTC', candle_type=candle_type)
assert not file.is_file()
assert log_has(
f"No history for MEME/BTC, {candle_type}, 1m found. "
"Use `freqtrade download-data` to download the data", caplog
)
# download a new pair if refresh_pairs is set
refresh_data(datadir=tmpdir1, timeframe='1m', pairs=['MEME/BTC'],
exchange=exchange, candle_type=CandleType.SPOT
)
load_pair_history(datadir=tmpdir1, timeframe='1m', pair='MEME/BTC', candle_type=candle_type)
assert file.is_file()
assert log_has_re(
r'Download history data for pair: "MEME/BTC" \(0/1\), timeframe: 1m, '
r'candle type: spot and store in .*', caplog
)
def test_testdata_path(testdatadir) -> None:
assert str(Path('tests') / 'testdata') in str(testdatadir)
@pytest.mark.parametrize("pair,expected_result,candle_type", [
("ETH/BTC", 'freqtrade/hello/world/ETH_BTC-5m.json', ""),
("Fabric Token/ETH", 'freqtrade/hello/world/Fabric_Token_ETH-5m.json', ""),
("ETHH20", 'freqtrade/hello/world/ETHH20-5m.json', ""),
(".XBTBON2H", 'freqtrade/hello/world/_XBTBON2H-5m.json', ""),
("ETHUSD.d", 'freqtrade/hello/world/ETHUSD_d-5m.json', ""),
("ACC_OLD/BTC", 'freqtrade/hello/world/ACC_OLD_BTC-5m.json', ""),
("ETH/BTC", 'freqtrade/hello/world/futures/ETH_BTC-5m-mark.json', "mark"),
("ACC_OLD/BTC", 'freqtrade/hello/world/futures/ACC_OLD_BTC-5m-index.json', "index"),
])
def test_json_pair_data_filename(pair, expected_result, candle_type):
fn = JsonDataHandler._pair_data_filename(
Path('freqtrade/hello/world'),
pair,
'5m',
CandleType.from_string(candle_type)
)
assert isinstance(fn, Path)
assert fn == Path(expected_result)
fn = JsonGzDataHandler._pair_data_filename(
Path('freqtrade/hello/world'),
pair,
'5m',
candle_type=CandleType.from_string(candle_type)
)
assert isinstance(fn, Path)
assert fn == Path(expected_result + '.gz')
@pytest.mark.parametrize("pair,expected_result", [
("ETH/BTC", 'freqtrade/hello/world/ETH_BTC-trades.json'),
("Fabric Token/ETH", 'freqtrade/hello/world/Fabric_Token_ETH-trades.json'),
("ETHH20", 'freqtrade/hello/world/ETHH20-trades.json'),
(".XBTBON2H", 'freqtrade/hello/world/_XBTBON2H-trades.json'),
("ETHUSD.d", 'freqtrade/hello/world/ETHUSD_d-trades.json'),
("ACC_OLD_BTC", 'freqtrade/hello/world/ACC_OLD_BTC-trades.json'),
])
def test_json_pair_trades_filename(pair, expected_result):
fn = JsonDataHandler._pair_trades_filename(Path('freqtrade/hello/world'), pair)
assert isinstance(fn, Path)
assert fn == Path(expected_result)
fn = JsonGzDataHandler._pair_trades_filename(Path('freqtrade/hello/world'), pair)
assert isinstance(fn, Path)
assert fn == Path(expected_result + '.gz')
def test_load_cached_data_for_updating(mocker, testdatadir) -> None:
data_handler = get_datahandler(testdatadir, 'json')
test_data = None
test_filename = testdatadir.joinpath('UNITTEST_BTC-1m.json')
with open(test_filename, "rt") as file:
test_data = json.load(file)
test_data_df = ohlcv_to_dataframe(test_data, '1m', 'UNITTEST/BTC',
fill_missing=False, drop_incomplete=False)
# now = last cached item + 1 hour
now_ts = test_data[-1][0] / 1000 + 60 * 60
mocker.patch('arrow.utcnow', return_value=arrow.get(now_ts))
# timeframe starts earlier than the cached data
# should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == test_data[0][0] - 1000
# timeframe starts in the center of the cached data
# should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0]
# timeframe starts after the cached data
# should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0)
data, start_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0]
# no datafile exist
# should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == (now_ts - 10000) * 1000
# no datafile exist, no timeframe is set
# should return an empty array and None
data, start_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts is None
@pytest.mark.parametrize('candle_type,subdir,file_tail', [
('mark', 'futures/', '-mark'),
('spot', '', ''),
])
def test_download_pair_history(
ohlcv_history_list,
mocker,
default_conf,
tmpdir,
candle_type,
subdir,
file_tail
) -> None:
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv', return_value=ohlcv_history_list)
exchange = get_patched_exchange(mocker, default_conf)
tmpdir1 = Path(tmpdir)
file1_1 = tmpdir1 / f'{subdir}MEME_BTC-1m{file_tail}.json'
file1_5 = tmpdir1 / f'{subdir}MEME_BTC-5m{file_tail}.json'
file2_1 = tmpdir1 / f'{subdir}CFI_BTC-1m{file_tail}.json'
file2_5 = tmpdir1 / f'{subdir}CFI_BTC-5m{file_tail}.json'
assert not file1_1.is_file()
assert not file2_1.is_file()
assert _download_pair_history(datadir=tmpdir1, exchange=exchange,
pair='MEME/BTC',
timeframe='1m',
candle_type=candle_type)
assert _download_pair_history(datadir=tmpdir1, exchange=exchange,
pair='CFI/BTC',
timeframe='1m',
candle_type=candle_type)
assert not exchange._pairs_last_refresh_time
assert file1_1.is_file()
assert file2_1.is_file()
# clean files freshly downloaded
_clean_test_file(file1_1)
_clean_test_file(file2_1)
assert not file1_5.is_file()
assert not file2_5.is_file()
assert _download_pair_history(datadir=tmpdir1, exchange=exchange,
pair='MEME/BTC',
timeframe='5m',
candle_type=candle_type)
assert _download_pair_history(datadir=tmpdir1, exchange=exchange,
pair='CFI/BTC',
timeframe='5m',
candle_type=candle_type)
assert not exchange._pairs_last_refresh_time
assert file1_5.is_file()
assert file2_5.is_file()
def test_download_pair_history2(mocker, default_conf, testdatadir) -> None:
tick = [
[1509836520000, 0.00162008, 0.00162008, 0.00162008, 0.00162008, 108.14853839],
[1509836580000, 0.00161, 0.00161, 0.00161, 0.00161, 82.390199]
]
json_dump_mock = mocker.patch(
'freqtrade.data.history.jsondatahandler.JsonDataHandler.ohlcv_store',
return_value=None)
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv', return_value=tick)
exchange = get_patched_exchange(mocker, default_conf)
_download_pair_history(datadir=testdatadir, exchange=exchange, pair="UNITTEST/BTC",
timeframe='1m', candle_type='spot')
_download_pair_history(datadir=testdatadir, exchange=exchange, pair="UNITTEST/BTC",
timeframe='3m', candle_type='spot')
_download_pair_history(datadir=testdatadir, exchange=exchange, pair="UNITTEST/USDT",
timeframe='1h', candle_type='mark')
assert json_dump_mock.call_count == 3
def test_download_backtesting_data_exception(mocker, caplog, default_conf, tmpdir) -> None:
mocker.patch('freqtrade.exchange.Exchange.get_historic_ohlcv',
side_effect=Exception('File Error'))
tmpdir1 = Path(tmpdir)
exchange = get_patched_exchange(mocker, default_conf)
assert not _download_pair_history(datadir=tmpdir1, exchange=exchange,
pair='MEME/BTC',
timeframe='1m', candle_type='spot')
assert log_has('Failed to download history data for pair: "MEME/BTC", timeframe: 1m.', caplog)
def test_load_partial_missing(testdatadir, caplog) -> None:
# Make sure we start fresh - test missing data at start
start = arrow.get('2018-01-01T00:00:00')
end = arrow.get('2018-01-11T00:00:00')
data = load_data(testdatadir, '5m', ['UNITTEST/BTC'], startup_candles=20,
timerange=TimeRange('date', 'date', start.int_timestamp, end.int_timestamp))
assert log_has(
'Using indicator startup period: 20 ...', caplog
)
# timedifference in 5 minutes
td = ((end - start).total_seconds() // 60 // 5) + 1
assert td != len(data['UNITTEST/BTC'])
start_real = data['UNITTEST/BTC'].iloc[0, 0]
assert log_has(f'UNITTEST/BTC, spot, 5m, '
f'data starts at {start_real.strftime("%Y-%m-%d %H:%M:%S")}',
caplog)
# Make sure we start fresh - test missing data at end
caplog.clear()
start = arrow.get('2018-01-10T00:00:00')
end = arrow.get('2018-02-20T00:00:00')
data = load_data(datadir=testdatadir, timeframe='5m', pairs=['UNITTEST/BTC'],
timerange=TimeRange('date', 'date', start.int_timestamp, end.int_timestamp))
# timedifference in 5 minutes
td = ((end - start).total_seconds() // 60 // 5) + 1
assert td != len(data['UNITTEST/BTC'])
# Shift endtime with +5 - as last candle is dropped (partial candle)
end_real = arrow.get(data['UNITTEST/BTC'].iloc[-1, 0]).shift(minutes=5)
assert log_has(f'UNITTEST/BTC, spot, 5m, '
f'data ends at {end_real.strftime("%Y-%m-%d %H:%M:%S")}',
caplog)
def test_init(default_conf, mocker) -> None:
assert {} == load_data(
datadir=Path(''),
pairs=[],
timeframe=default_conf['timeframe']
)
def test_init_with_refresh(default_conf, mocker) -> None:
exchange = get_patched_exchange(mocker, default_conf)
refresh_data(
datadir=Path(''),
pairs=[],
timeframe=default_conf['timeframe'],
exchange=exchange,
candle_type=CandleType.SPOT
)
assert {} == load_data(
datadir=Path(''),
pairs=[],
timeframe=default_conf['timeframe']
)
def test_file_dump_json_tofile(testdatadir) -> None:
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
data = {'bar': 'foo'}
# check the file we will create does not exist
assert not file.is_file()
# Create the Json file
file_dump_json(file, data)
# Check the file was create
assert file.is_file()
# Open the Json file created and test the data is in it
with file.open() as data_file:
json_from_file = json.load(data_file)
assert 'bar' in json_from_file
assert json_from_file['bar'] == 'foo'
# Remove the file
_clean_test_file(file)
def test_get_timerange(default_conf, mocker, testdatadir) -> None:
patch_exchange(mocker)
default_conf.update({'strategy': CURRENT_TEST_STRATEGY})
strategy = StrategyResolver.load_strategy(default_conf)
data = strategy.advise_all_indicators(
load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)
)
min_date, max_date = get_timerange(data)
assert min_date.isoformat() == '2017-11-04T23:02:00+00:00'
assert max_date.isoformat() == '2017-11-14T22:58:00+00:00'
def test_validate_backtest_data_warn(default_conf, mocker, caplog, testdatadir) -> None:
patch_exchange(mocker)
default_conf.update({'strategy': CURRENT_TEST_STRATEGY})
strategy = StrategyResolver.load_strategy(default_conf)
data = strategy.advise_all_indicators(
load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC'],
fill_up_missing=False
)
)
min_date, max_date = get_timerange(data)
caplog.clear()
assert validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC',
min_date, max_date, timeframe_to_minutes('1m'))
assert len(caplog.record_tuples) == 1
assert log_has(
"UNITTEST/BTC has missing frames: expected 14396, got 13680, that's 716 missing values",
caplog)
def test_validate_backtest_data(default_conf, mocker, caplog, testdatadir) -> None:
patch_exchange(mocker)
default_conf.update({'strategy': CURRENT_TEST_STRATEGY})
strategy = StrategyResolver.load_strategy(default_conf)
timerange = TimeRange('index', 'index', 200, 250)
data = strategy.advise_all_indicators(
load_data(
datadir=testdatadir,
timeframe='5m',
pairs=['UNITTEST/BTC'],
timerange=timerange
)
)
min_date, max_date = get_timerange(data)
caplog.clear()
assert not validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC',
min_date, max_date, timeframe_to_minutes('5m'))
assert len(caplog.record_tuples) == 0
@pytest.mark.parametrize('trademode,callcount', [
('spot', 4),
('margin', 4),
('futures', 8), # Called 8 times - 4 normal, 2 funding and 2 mark/index calls
])
def test_refresh_backtest_ohlcv_data(
mocker, default_conf, markets, caplog, testdatadir, trademode, callcount):
dl_mock = mocker.patch('freqtrade.data.history.history_utils._download_pair_history',
MagicMock())
mocker.patch(
'freqtrade.exchange.Exchange.markets', PropertyMock(return_value=markets)
)
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
mocker.patch.object(Path, "unlink", MagicMock())
ex = get_patched_exchange(mocker, default_conf)
timerange = TimeRange.parse_timerange("20190101-20190102")
refresh_backtest_ohlcv_data(exchange=ex, pairs=["ETH/BTC", "XRP/BTC"],
timeframes=["1m", "5m"], datadir=testdatadir,
timerange=timerange, erase=True,
trading_mode=trademode
)
assert dl_mock.call_count == callcount
assert dl_mock.call_args[1]['timerange'].starttype == 'date'
assert log_has("Downloading pair ETH/BTC, interval 1m.", caplog)
def test_download_data_no_markets(mocker, default_conf, caplog, testdatadir):
dl_mock = mocker.patch('freqtrade.data.history.history_utils._download_pair_history',
MagicMock())
ex = get_patched_exchange(mocker, default_conf)
mocker.patch(
'freqtrade.exchange.Exchange.markets', PropertyMock(return_value={})
)
timerange = TimeRange.parse_timerange("20190101-20190102")
unav_pairs = refresh_backtest_ohlcv_data(exchange=ex, pairs=["BTT/BTC", "LTC/USDT"],
timeframes=["1m", "5m"],
datadir=testdatadir,
timerange=timerange, erase=False,
trading_mode='spot'
)
assert dl_mock.call_count == 0
assert "BTT/BTC" in unav_pairs
assert "LTC/USDT" in unav_pairs
assert log_has("Skipping pair BTT/BTC...", caplog)
def test_refresh_backtest_trades_data(mocker, default_conf, markets, caplog, testdatadir):
dl_mock = mocker.patch('freqtrade.data.history.history_utils._download_trades_history',
MagicMock())
mocker.patch(
'freqtrade.exchange.Exchange.markets', PropertyMock(return_value=markets)
)
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
mocker.patch.object(Path, "unlink", MagicMock())
ex = get_patched_exchange(mocker, default_conf)
timerange = TimeRange.parse_timerange("20190101-20190102")
unavailable_pairs = refresh_backtest_trades_data(exchange=ex,
pairs=["ETH/BTC", "XRP/BTC", "XRP/ETH"],
datadir=testdatadir,
timerange=timerange, erase=True
)
assert dl_mock.call_count == 2
assert dl_mock.call_args[1]['timerange'].starttype == 'date'
assert log_has("Downloading trades for pair ETH/BTC.", caplog)
assert unavailable_pairs == ["XRP/ETH"]
assert log_has("Skipping pair XRP/ETH...", caplog)
def test_download_trades_history(trades_history, mocker, default_conf, testdatadir, caplog,
tmpdir) -> None:
tmpdir1 = Path(tmpdir)
ght_mock = MagicMock(side_effect=lambda pair, *args, **kwargs: (pair, trades_history))
mocker.patch('freqtrade.exchange.Exchange.get_historic_trades',
ght_mock)
exchange = get_patched_exchange(mocker, default_conf)
file1 = tmpdir1 / 'ETH_BTC-trades.json.gz'
data_handler = get_datahandler(tmpdir1, data_format='jsongz')
assert not file1.is_file()
assert _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='ETH/BTC')
assert log_has("New Amount of trades: 5", caplog)
assert file1.is_file()
ght_mock.reset_mock()
since_time = int(trades_history[-3][0] // 1000)
since_time2 = int(trades_history[-1][0] // 1000)
timerange = TimeRange('date', None, since_time, 0)
assert _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='ETH/BTC', timerange=timerange)
assert ght_mock.call_count == 1
# Check this in seconds - since we had to convert to seconds above too.
assert int(ght_mock.call_args_list[0][1]['since'] // 1000) == since_time2 - 5
assert ght_mock.call_args_list[0][1]['from_id'] is not None
file1.unlink()
mocker.patch('freqtrade.exchange.Exchange.get_historic_trades',
MagicMock(side_effect=ValueError))
assert not _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='ETH/BTC')
assert log_has_re('Failed to download historic trades for pair: "ETH/BTC".*', caplog)
file2 = tmpdir1 / 'XRP_ETH-trades.json.gz'
copyfile(testdatadir / file2.name, file2)
ght_mock.reset_mock()
mocker.patch('freqtrade.exchange.Exchange.get_historic_trades',
ght_mock)
# Since before first start date
since_time = int(trades_history[0][0] // 1000) - 500
timerange = TimeRange('date', None, since_time, 0)
assert _download_trades_history(data_handler=data_handler, exchange=exchange,
pair='XRP/ETH', timerange=timerange)
assert ght_mock.call_count == 1
assert int(ght_mock.call_args_list[0][1]['since'] // 1000) == since_time
assert ght_mock.call_args_list[0][1]['from_id'] is None
assert log_has_re(r'Start earlier than available data. Redownloading trades for.*', caplog)
_clean_test_file(file2)
def test_convert_trades_to_ohlcv(testdatadir, tmpdir, caplog):
tmpdir1 = Path(tmpdir)
pair = 'XRP/ETH'
file1 = tmpdir1 / 'XRP_ETH-1m.json'
file5 = tmpdir1 / 'XRP_ETH-5m.json'
filetrades = tmpdir1 / 'XRP_ETH-trades.json.gz'
copyfile(testdatadir / file1.name, file1)
copyfile(testdatadir / file5.name, file5)
copyfile(testdatadir / filetrades.name, filetrades)
# Compare downloaded dataset with converted dataset
dfbak_1m = load_pair_history(datadir=tmpdir1, timeframe="1m", pair=pair)
dfbak_5m = load_pair_history(datadir=tmpdir1, timeframe="5m", pair=pair)
tr = TimeRange.parse_timerange('20191011-20191012')
convert_trades_to_ohlcv([pair], timeframes=['1m', '5m'],
datadir=tmpdir1, timerange=tr, erase=True)
assert log_has("Deleting existing data for pair XRP/ETH, interval 1m.", caplog)
# Load new data
df_1m = load_pair_history(datadir=tmpdir1, timeframe="1m", pair=pair)
df_5m = load_pair_history(datadir=tmpdir1, timeframe="5m", pair=pair)
assert df_1m.equals(dfbak_1m)
assert df_5m.equals(dfbak_5m)
assert not log_has('Could not convert NoDatapair to OHLCV.', caplog)
convert_trades_to_ohlcv(['NoDatapair'], timeframes=['1m', '5m'],
datadir=tmpdir1, timerange=tr, erase=True)
assert log_has('Could not convert NoDatapair to OHLCV.', caplog)
def test_datahandler_ohlcv_get_pairs(testdatadir):
pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '5m', candle_type=CandleType.SPOT)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'UNITTEST/BTC', 'XLM/BTC', 'ETH/BTC', 'TRX/BTC', 'LTC/BTC',
'XMR/BTC', 'ZEC/BTC', 'ADA/BTC', 'ETC/BTC', 'NXT/BTC',
'DASH/BTC', 'XRP/ETH'}
pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '8m', candle_type=CandleType.SPOT)
assert set(pairs) == {'UNITTEST/BTC'}
pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '5m', candle_type=CandleType.SPOT)
assert set(pairs) == {'UNITTEST/BTC'}
pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.MARK)
assert set(pairs) == {'UNITTEST/USDT', 'XRP/USDT'}
pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.FUTURES)
assert set(pairs) == {'XRP/USDT'}
pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '1h', candle_type=CandleType.MARK)
assert set(pairs) == {'UNITTEST/USDT:USDT'}
@pytest.mark.parametrize('filename,pair,timeframe,candletype', [
('XMR_BTC-5m.json', 'XMR_BTC', '5m', ''),
('XMR_USDT-1h.h5', 'XMR_USDT', '1h', ''),
('BTC-PERP-1h.h5', 'BTC-PERP', '1h', ''),
('BTC_USDT-2h.jsongz', 'BTC_USDT', '2h', ''),
('BTC_USDT-2h-mark.jsongz', 'BTC_USDT', '2h', 'mark'),
('XMR_USDT-1h-mark.h5', 'XMR_USDT', '1h', 'mark'),
('XMR_USDT-1h-random.h5', 'XMR_USDT', '1h', 'random'),
('BTC-PERP-1h-index.h5', 'BTC-PERP', '1h', 'index'),
('XMR_USDT_USDT-1h-mark.h5', 'XMR_USDT_USDT', '1h', 'mark'),
])
def test_datahandler_ohlcv_regex(filename, pair, timeframe, candletype):
regex = JsonDataHandler._OHLCV_REGEX
match = re.search(regex, filename)
assert len(match.groups()) > 1
assert match[1] == pair
assert match[2] == timeframe
assert match[3] == candletype
@pytest.mark.parametrize('input,expected', [
('XMR_USDT', 'XMR/USDT'),
('BTC_USDT', 'BTC/USDT'),
('USDT_BUSD', 'USDT/BUSD'),
('BTC_USDT_USDT', 'BTC/USDT:USDT'), # Futures
('XRP_USDT_USDT', 'XRP/USDT:USDT'), # futures
('BTC-PERP', 'BTC-PERP'),
('BTC-PERP_USDT', 'BTC-PERP:USDT'), # potential FTX case
('UNITTEST_USDT', 'UNITTEST/USDT'),
])
def test_rebuild_pair_from_filename(input, expected):
assert IDataHandler.rebuild_pair_from_filename(input) == expected
def test_datahandler_ohlcv_get_available_data(testdatadir):
paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
# Convert to set to avoid failures due to sorting
assert set(paircombs) == {
('UNITTEST/BTC', '5m', CandleType.SPOT),
('ETH/BTC', '5m', CandleType.SPOT),
('XLM/BTC', '5m', CandleType.SPOT),
('TRX/BTC', '5m', CandleType.SPOT),
('LTC/BTC', '5m', CandleType.SPOT),
('XMR/BTC', '5m', CandleType.SPOT),
('ZEC/BTC', '5m', CandleType.SPOT),
('UNITTEST/BTC', '1m', CandleType.SPOT),
('ADA/BTC', '5m', CandleType.SPOT),
('ETC/BTC', '5m', CandleType.SPOT),
('NXT/BTC', '5m', CandleType.SPOT),
('DASH/BTC', '5m', CandleType.SPOT),
('XRP/ETH', '1m', CandleType.SPOT),
('XRP/ETH', '5m', CandleType.SPOT),
('UNITTEST/BTC', '30m', CandleType.SPOT),
('UNITTEST/BTC', '8m', CandleType.SPOT),
('NOPAIR/XXX', '4m', CandleType.SPOT),
}
paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.FUTURES)
# Convert to set to avoid failures due to sorting
assert set(paircombs) == {
('UNITTEST/USDT', '1h', 'mark'),
('XRP/USDT', '1h', 'futures'),
('XRP/USDT', '1h', 'mark'),
('XRP/USDT', '8h', 'mark'),
('XRP/USDT', '8h', 'funding_rate'),
}
paircombs = JsonGzDataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
assert set(paircombs) == {('UNITTEST/BTC', '8m', CandleType.SPOT)}
paircombs = HDF5DataHandler.ohlcv_get_available_data(testdatadir, TradingMode.SPOT)
assert set(paircombs) == {('UNITTEST/BTC', '5m', CandleType.SPOT)}
def test_jsondatahandler_trades_get_pairs(testdatadir):
pairs = JsonGzDataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH', 'XRP/OLD'}
def test_jsondatahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 2
def test_jsondatahandler_ohlcv_load(testdatadir, caplog):
dh = JsonDataHandler(testdatadir)
df = dh.ohlcv_load('XRP/ETH', '5m', 'spot')
assert len(df) == 711
df_mark = dh.ohlcv_load('UNITTEST/USDT', '1h', candle_type="mark")
assert len(df_mark) == 99
df_no_mark = dh.ohlcv_load('UNITTEST/USDT', '1h', 'spot')
assert len(df_no_mark) == 0
# Failure case (empty array)
df1 = dh.ohlcv_load('NOPAIR/XXX', '4m', 'spot')
assert len(df1) == 0
assert log_has("Could not load data for NOPAIR/XXX.", caplog)
assert df.columns.equals(df1.columns)
def test_jsondatahandler_trades_load(testdatadir, caplog):
dh = JsonGzDataHandler(testdatadir)
logmsg = "Old trades format detected - converting"
dh.trades_load('XRP/ETH')
assert not log_has(logmsg, caplog)
# Test conversation is happening
dh.trades_load('XRP/OLD')
assert log_has(logmsg, caplog)
def test_jsondatahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = JsonGzDataHandler(testdatadir)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame(), CandleType.SPOT)
with pytest.raises(NotImplementedError):
dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame(), CandleType.MARK)
@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
def test_datahandler_trades_append(datahandler, testdatadir):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_append('UNITTEST/ETH', [])
def test_hdf5datahandler_trades_get_pairs(testdatadir):
pairs = HDF5DataHandler.trades_get_pairs(testdatadir)
# Convert to set to avoid failures due to sorting
assert set(pairs) == {'XRP/ETH'}
def test_hdf5datahandler_trades_load(testdatadir):
dh = HDF5DataHandler(testdatadir)
trades = dh.trades_load('XRP/ETH')
assert isinstance(trades, list)
trades1 = dh.trades_load('UNITTEST/NONEXIST')
assert trades1 == []
# data goes from 2019-10-11 - 2019-10-13
timerange = TimeRange.parse_timerange('20191011-20191012')
trades2 = dh._trades_load('XRP/ETH', timerange)
assert len(trades) > len(trades2)
# Check that ID is None (If it's nan, it's wrong)
assert trades2[0][2] is None
# unfiltered load has trades before starttime
assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0
# filtered list does not have trades before starttime
assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0
# unfiltered load has trades after endtime
assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0
# filtered list does not have trades after endtime
assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
def test_hdf5datahandler_trades_store(testdatadir, tmpdir):
tmpdir1 = Path(tmpdir)
dh = HDF5DataHandler(testdatadir)
trades = dh.trades_load('XRP/ETH')
dh1 = HDF5DataHandler(tmpdir1)
dh1.trades_store('XRP/NEW', trades)
file = tmpdir1 / 'XRP_NEW-trades.h5'
assert file.is_file()
# Load trades back
trades_new = dh1.trades_load('XRP/NEW')
assert len(trades_new) == len(trades)
assert trades[0][0] == trades_new[0][0]
assert trades[0][1] == trades_new[0][1]
# assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense
assert trades[0][3] == trades_new[0][3]
assert trades[0][4] == trades_new[0][4]
assert trades[0][5] == trades_new[0][5]
assert trades[0][6] == trades_new[0][6]
assert trades[-1][0] == trades_new[-1][0]
assert trades[-1][1] == trades_new[-1][1]
# assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense
assert trades[-1][3] == trades_new[-1][3]
assert trades[-1][4] == trades_new[-1][4]
assert trades[-1][5] == trades_new[-1][5]
assert trades[-1][6] == trades_new[-1][6]
def test_hdf5datahandler_trades_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = HDF5DataHandler(testdatadir)
assert not dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.trades_purge('UNITTEST/NONEXIST')
assert unlinkmock.call_count == 1
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
# Data goes from 2018-01-10 - 2018-01-30
('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'),
# Mark data goes from to 2021-11-15 2021-11-19
('UNITTEST/USDT:USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'),
])
def test_hdf5datahandler_ohlcv_load_and_resave(
testdatadir,
tmpdir,
pair,
timeframe,
candle_type,
candle_append,
startdt, enddt
):
tmpdir1 = Path(tmpdir)
tmpdir2 = tmpdir1
if candle_type not in ('', 'spot'):
tmpdir2 = tmpdir1 / 'futures'
tmpdir2.mkdir()
dh = HDF5DataHandler(testdatadir)
ohlcv = dh._ohlcv_load(pair, timeframe, None, candle_type=candle_type)
assert isinstance(ohlcv, DataFrame)
assert len(ohlcv) > 0
file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.h5"
assert not file.is_file()
dh1 = HDF5DataHandler(tmpdir1)
dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type)
assert file.is_file()
assert not ohlcv[ohlcv['date'] < startdt].empty
timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}")
# Call private function to ensure timerange is filtered in hdf5
ohlcv = dh._ohlcv_load(pair, timeframe, timerange, candle_type=candle_type)
ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type)
assert len(ohlcv) == len(ohlcv1)
assert ohlcv.equals(ohlcv1)
assert ohlcv[ohlcv['date'] < startdt].empty
assert ohlcv[ohlcv['date'] > enddt].empty
# Try loading inexisting file
ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type)
assert ohlcv.empty
def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
dh = HDF5DataHandler(testdatadir)
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 0
mocker.patch.object(Path, "exists", MagicMock(return_value=True))
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', '')
assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m', candle_type='mark')
assert unlinkmock.call_count == 2
def test_gethandlerclass():
cl = get_datahandlerclass('json')
assert cl == JsonDataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('jsongz')
assert cl == JsonGzDataHandler
assert issubclass(cl, IDataHandler)
assert issubclass(cl, JsonDataHandler)
cl = get_datahandlerclass('hdf5')
assert cl == HDF5DataHandler
assert issubclass(cl, IDataHandler)
with pytest.raises(ValueError, match=r"No datahandler for .*"):
get_datahandlerclass('DeadBeef')
def test_get_datahandler(testdatadir):
dh = get_datahandler(testdatadir, 'json')
assert type(dh) == JsonDataHandler
dh = get_datahandler(testdatadir, 'jsongz')
assert type(dh) == JsonGzDataHandler
dh1 = get_datahandler(testdatadir, 'jsongz', dh)
assert id(dh1) == id(dh)
dh = get_datahandler(testdatadir, 'hdf5')
assert type(dh) == HDF5DataHandler