stable/tests/data/test_converter.py

317 lines
10 KiB
Python
Raw Normal View History

2018-01-28 07:38:41 +00:00
# pragma pylint: disable=missing-docstring, C0103
2018-12-11 18:48:36 +00:00
import logging
2018-02-04 08:28:02 +00:00
import pytest
from freqtrade.configuration.timerange import TimeRange
2020-09-28 17:43:15 +00:00
from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format,
ohlcv_fill_up_missing_data, ohlcv_to_dataframe,
trades_dict_to_list, trades_remove_duplicates,
trades_to_ohlcv, trim_dataframe)
2020-09-28 17:43:15 +00:00
from freqtrade.data.history import (get_timerange, load_data, load_pair_history,
validate_backtest_data)
2019-09-08 07:54:15 +00:00
from tests.conftest import log_has
2019-12-28 09:45:26 +00:00
from tests.data.test_history import _backup_file, _clean_test_file
2018-02-04 08:28:02 +00:00
2017-11-07 19:12:56 +00:00
def test_dataframe_correct_columns(result):
2018-12-31 08:18:22 +00:00
assert result.columns.tolist() == ['date', 'open', 'high', 'low', 'close', 'volume']
2017-10-30 23:36:35 +00:00
def test_ohlcv_to_dataframe(ohlcv_history_list, caplog):
2018-07-16 05:59:14 +00:00
columns = ['date', 'open', 'high', 'low', 'close', 'volume']
2018-12-11 18:48:36 +00:00
caplog.set_level(logging.DEBUG)
2018-07-16 05:59:14 +00:00
# Test file with BV data
dataframe = ohlcv_to_dataframe(ohlcv_history_list, '5m', pair="UNITTEST/BTC",
fill_missing=True)
2018-07-16 05:59:14 +00:00
assert dataframe.columns.tolist() == columns
assert log_has('Converting candle (OHLCV) data to dataframe for pair UNITTEST/BTC.', caplog)
2018-12-31 08:18:22 +00:00
def test_trades_to_ohlcv(ohlcv_history_list, caplog):
caplog.set_level(logging.DEBUG)
with pytest.raises(ValueError, match="Trade-list empty."):
trades_to_ohlcv([], '1m')
trades = [
[1570752011620, "13519807", None, "sell", 0.00141342, 23.0, 0.03250866],
[1570752011620, "13519808", None, "sell", 0.00141266, 54.0, 0.07628364],
[1570752017964, "13519809", None, "sell", 0.00141266, 8.0, 0.01130128]]
df = trades_to_ohlcv(trades, '1m')
assert not df.empty
assert len(df) == 1
assert 'open' in df.columns
assert 'high' in df.columns
assert 'low' in df.columns
assert 'close' in df.columns
assert df.loc[:, 'high'][0] == 0.00141342
assert df.loc[:, 'low'][0] == 0.00141266
2019-09-07 18:56:03 +00:00
def test_ohlcv_fill_up_missing_data(testdatadir, caplog):
data = load_pair_history(datadir=testdatadir,
timeframe='1m',
pair='UNITTEST/BTC',
fill_up_missing=False)
2018-12-31 08:18:22 +00:00
caplog.set_level(logging.DEBUG)
data2 = ohlcv_fill_up_missing_data(data, '1m', 'UNITTEST/BTC')
2018-12-31 08:18:22 +00:00
assert len(data2) > len(data)
# Column names should not change
assert (data.columns == data2.columns).all()
assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
2019-08-11 18:16:34 +00:00
f"{len(data)} - after: {len(data2)}", caplog)
# Test fillup actually fixes invalid backtest data
2019-12-17 22:06:03 +00:00
min_date, max_date = get_timerange({'UNITTEST/BTC': data})
assert validate_backtest_data(data, 'UNITTEST/BTC', min_date, max_date, 1)
assert not validate_backtest_data(data2, 'UNITTEST/BTC', min_date, max_date, 1)
2018-12-31 18:40:14 +00:00
def test_ohlcv_fill_up_missing_data2(caplog):
timeframe = '5m'
2018-12-31 18:40:14 +00:00
ticks = [[
1511686200000, # 8:50:00
8.794e-05, # open
8.948e-05, # high
8.794e-05, # low
8.88e-05, # close
2255, # volume (in quote currency)
],
[
1511686500000, # 8:55:00
8.88e-05,
8.942e-05,
8.88e-05,
8.893e-05,
9911,
],
[
1511687100000, # 9:05:00
8.891e-05,
8.893e-05,
8.875e-05,
8.877e-05,
2251
],
[
1511687400000, # 9:10:00
8.877e-05,
8.883e-05,
8.895e-05,
8.817e-05,
123551
]
]
# Generate test-data without filling missing
data = ohlcv_to_dataframe(ticks, timeframe, pair="UNITTEST/BTC",
fill_missing=False)
2018-12-31 18:40:14 +00:00
assert len(data) == 3
caplog.set_level(logging.DEBUG)
data2 = ohlcv_fill_up_missing_data(data, timeframe, "UNITTEST/BTC")
2018-12-31 18:40:14 +00:00
assert len(data2) == 4
# 3rd candle has been filled
row = data2.loc[2, :]
assert row['volume'] == 0
# close shoult match close of previous candle
assert row['close'] == data.loc[1, 'close']
assert row['open'] == row['close']
assert row['high'] == row['close']
assert row['low'] == row['close']
# Column names should not change
assert (data.columns == data2.columns).all()
2019-06-15 14:58:17 +00:00
assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
2019-08-11 18:16:34 +00:00
f"{len(data)} - after: {len(data2)}", caplog)
2019-06-09 12:51:58 +00:00
def test_ohlcv_drop_incomplete(caplog):
timeframe = '1d'
2019-06-09 12:51:58 +00:00
ticks = [[
1559750400000, # 2019-06-04
8.794e-05, # open
8.948e-05, # high
8.794e-05, # low
8.88e-05, # close
2255, # volume (in quote currency)
],
[
1559836800000, # 2019-06-05
8.88e-05,
8.942e-05,
8.88e-05,
8.893e-05,
9911,
],
[
1559923200000, # 2019-06-06
8.891e-05,
8.893e-05,
8.875e-05,
8.877e-05,
2251
],
[
1560009600000, # 2019-06-07
8.877e-05,
8.883e-05,
8.895e-05,
8.817e-05,
123551
]
]
caplog.set_level(logging.DEBUG)
data = ohlcv_to_dataframe(ticks, timeframe, pair="UNITTEST/BTC",
fill_missing=False, drop_incomplete=False)
2019-06-09 12:51:58 +00:00
assert len(data) == 4
2019-08-11 18:16:34 +00:00
assert not log_has("Dropping last candle", caplog)
2019-06-09 12:51:58 +00:00
# Drop last candle
data = ohlcv_to_dataframe(ticks, timeframe, pair="UNITTEST/BTC",
fill_missing=False, drop_incomplete=True)
2019-06-09 12:51:58 +00:00
assert len(data) == 3
2019-08-11 18:16:34 +00:00
assert log_has("Dropping last candle", caplog)
def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()
# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])
data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])
data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])
2019-12-28 09:45:26 +00:00
2020-04-01 05:23:43 +00:00
def test_trades_remove_duplicates(trades_history):
trades_history1 = trades_history * 3
assert len(trades_history1) == len(trades_history) * 3
res = trades_remove_duplicates(trades_history1)
assert len(res) == len(trades_history)
for i, t in enumerate(res):
assert t == trades_history[i]
def test_trades_dict_to_list(fetch_trades_result):
res = trades_dict_to_list(fetch_trades_result)
assert isinstance(res, list)
assert isinstance(res[0], list)
for i, t in enumerate(res):
assert t[0] == fetch_trades_result[i]['timestamp']
assert t[1] == fetch_trades_result[i]['id']
assert t[2] == fetch_trades_result[i]['type']
assert t[3] == fetch_trades_result[i]['side']
assert t[4] == fetch_trades_result[i]['price']
assert t[5] == fetch_trades_result[i]['amount']
assert t[6] == fetch_trades_result[i]['cost']
2019-12-28 09:45:26 +00:00
def test_convert_trades_format(mocker, default_conf, testdatadir):
2020-04-02 06:20:50 +00:00
files = [{'old': testdatadir / "XRP_ETH-trades.json.gz",
'new': testdatadir / "XRP_ETH-trades.json"},
{'old': testdatadir / "XRP_OLD-trades.json.gz",
'new': testdatadir / "XRP_OLD-trades.json"},
]
for file in files:
_backup_file(file['old'], copy_file=True)
assert not file['new'].exists()
2019-12-28 09:45:26 +00:00
2020-04-02 06:20:50 +00:00
default_conf['datadir'] = testdatadir
2019-12-28 09:45:26 +00:00
convert_trades_format(default_conf, convert_from='jsongz',
convert_to='json', erase=False)
2020-04-02 06:20:50 +00:00
for file in files:
assert file['new'].exists()
assert file['old'].exists()
2019-12-28 09:45:26 +00:00
2020-04-02 06:20:50 +00:00
# Remove original file
file['old'].unlink()
2019-12-28 09:45:26 +00:00
# Convert back
convert_trades_format(default_conf, convert_from='json',
convert_to='jsongz', erase=True)
2020-04-02 06:20:50 +00:00
for file in files:
assert file['old'].exists()
assert not file['new'].exists()
2019-12-28 09:45:26 +00:00
2020-04-02 06:20:50 +00:00
_clean_test_file(file['old'])
if file['new'].exists():
file['new'].unlink()
2019-12-28 09:54:10 +00:00
def test_convert_ohlcv_format(mocker, default_conf, testdatadir):
file1 = testdatadir / "XRP_ETH-5m.json"
file1_new = testdatadir / "XRP_ETH-5m.json.gz"
file2 = testdatadir / "XRP_ETH-1m.json"
file2_new = testdatadir / "XRP_ETH-1m.json.gz"
_backup_file(file1, copy_file=True)
_backup_file(file2, copy_file=True)
default_conf['datadir'] = testdatadir
default_conf['pairs'] = ['XRP_ETH']
default_conf['timeframes'] = ['1m', '5m']
assert not file1_new.exists()
assert not file2_new.exists()
convert_ohlcv_format(default_conf, convert_from='json',
convert_to='jsongz', erase=False)
assert file1_new.exists()
assert file2_new.exists()
assert file1.exists()
assert file2.exists()
# Remove original files
file1.unlink()
file2.unlink()
# Convert back
convert_ohlcv_format(default_conf, convert_from='jsongz',
convert_to='json', erase=True)
assert file1.exists()
assert file2.exists()
assert not file1_new.exists()
assert not file2_new.exists()
_clean_test_file(file1)
_clean_test_file(file2)
if file1_new.exists():
file1_new.unlink()
if file2_new.exists():
file2_new.unlink()