stable/tests/data/test_converter.py

# pragma pylint: disable=missing-docstring, C0103
import logging

from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (convert_trades_format,
                                      ohlcv_fill_up_missing_data,
                                      parse_ticker_dataframe, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data,
                                    load_pair_history, validate_backtest_data)
from tests.conftest import log_has
from tests.data.test_history import _backup_file, _clean_test_file


def test_dataframe_correct_columns(result):
    assert result.columns.tolist() == ['date', 'open', 'high', 'low', 'close', 'volume']


def test_parse_ticker_dataframe(ticker_history_list, caplog):
    columns = ['date', 'open', 'high', 'low', 'close', 'volume']

    caplog.set_level(logging.DEBUG)
    # Test file with BV data
    dataframe = parse_ticker_dataframe(ticker_history_list, '5m',
                                       pair="UNITTEST/BTC", fill_missing=True)
    assert dataframe.columns.tolist() == columns
    assert log_has('Parsing tickerlist to dataframe', caplog)


def test_ohlcv_fill_up_missing_data(testdatadir, caplog):
    data = load_pair_history(datadir=testdatadir,
                             timeframe='1m',
                             pair='UNITTEST/BTC',
                             fill_up_missing=False)
    caplog.set_level(logging.DEBUG)
    data2 = ohlcv_fill_up_missing_data(data, '1m', 'UNITTEST/BTC')
    assert len(data2) > len(data)
    # Column names should not change
    assert (data.columns == data2.columns).all()

    assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
                   f"{len(data)} - after: {len(data2)}", caplog)

    # Test fillup actually fixes invalid backtest data
    min_date, max_date = get_timerange({'UNITTEST/BTC': data})
    assert validate_backtest_data(data, 'UNITTEST/BTC', min_date, max_date, 1)
    assert not validate_backtest_data(data2, 'UNITTEST/BTC', min_date, max_date, 1)


def test_ohlcv_fill_up_missing_data2(caplog):
    timeframe = '5m'
    ticks = [[
            1511686200000,  # 8:50:00
            8.794e-05,  # open
            8.948e-05,  # high
            8.794e-05,  # low
            8.88e-05,  # close
            2255,  # volume (in quote currency)
        ],
        [
            1511686500000,  # 8:55:00
            8.88e-05,
            8.942e-05,
            8.88e-05,
            8.893e-05,
            9911,
        ],
        [
            1511687100000,  # 9:05:00
            8.891e-05,
            8.893e-05,
            8.875e-05,
            8.877e-05,
            2251
        ],
        [
            1511687400000,  # 9:10:00
            8.877e-05,
            8.883e-05,
            8.895e-05,
            8.817e-05,
            123551
    ]
    ]

    # Generate test-data without filling missing
    data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC", fill_missing=False)
    assert len(data) == 3
    caplog.set_level(logging.DEBUG)
    data2 = ohlcv_fill_up_missing_data(data, timeframe, "UNITTEST/BTC")
    assert len(data2) == 4
    # 3rd candle has been filled
    row = data2.loc[2, :]
    assert row['volume'] == 0
    # close shoult match close of previous candle
    assert row['close'] == data.loc[1, 'close']
    assert row['open'] == row['close']
    assert row['high'] == row['close']
    assert row['low'] == row['close']
    # Column names should not change
    assert (data.columns == data2.columns).all()

    assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "
                   f"{len(data)} - after: {len(data2)}", caplog)


def test_ohlcv_drop_incomplete(caplog):
    timeframe = '1d'
    ticks = [[
            1559750400000,  # 2019-06-04
            8.794e-05,  # open
            8.948e-05,  # high
            8.794e-05,  # low
            8.88e-05,  # close
            2255,  # volume (in quote currency)
        ],
        [
            1559836800000,  # 2019-06-05
            8.88e-05,
            8.942e-05,
            8.88e-05,
            8.893e-05,
            9911,
        ],
        [
            1559923200000,  # 2019-06-06
            8.891e-05,
            8.893e-05,
            8.875e-05,
            8.877e-05,
            2251
        ],
        [
            1560009600000,  # 2019-06-07
            8.877e-05,
            8.883e-05,
            8.895e-05,
            8.817e-05,
            123551
     ]
    ]
    caplog.set_level(logging.DEBUG)
    data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC",
                                  fill_missing=False, drop_incomplete=False)
    assert len(data) == 4
    assert not log_has("Dropping last candle", caplog)

    # Drop last candle
    data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC",
                                  fill_missing=False, drop_incomplete=True)
    assert len(data) == 3

    assert log_has("Dropping last candle", caplog)


def test_trim_dataframe(testdatadir) -> None:
    data = load_data(
        datadir=testdatadir,
        timeframe='1m',
        pairs=['UNITTEST/BTC']
    )['UNITTEST/BTC']
    min_date = int(data.iloc[0]['date'].timestamp())
    max_date = int(data.iloc[-1]['date'].timestamp())
    data_modify = data.copy()

    # Remove first 30 minutes (1800 s)
    tr = TimeRange('date', None, min_date + 1800, 0)
    data_modify = trim_dataframe(data_modify, tr)
    assert not data_modify.equals(data)
    assert len(data_modify) < len(data)
    assert len(data_modify) == len(data) - 30
    assert all(data_modify.iloc[-1] == data.iloc[-1])
    assert all(data_modify.iloc[0] == data.iloc[30])

    data_modify = data.copy()
    # Remove last 30 minutes (1800 s)
    tr = TimeRange(None, 'date', 0, max_date - 1800)
    data_modify = trim_dataframe(data_modify, tr)
    assert not data_modify.equals(data)
    assert len(data_modify) < len(data)
    assert len(data_modify) == len(data) - 30
    assert all(data_modify.iloc[0] == data.iloc[0])
    assert all(data_modify.iloc[-1] == data.iloc[-31])

    data_modify = data.copy()
    # Remove first 25 and last 30 minutes (1800 s)
    tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
    data_modify = trim_dataframe(data_modify, tr)
    assert not data_modify.equals(data)
    assert len(data_modify) < len(data)
    assert len(data_modify) == len(data) - 55
    # first row matches 25th original row
    assert all(data_modify.iloc[0] == data.iloc[25])


def test_convert_trades_format(mocker, default_conf, testdatadir):
    file = testdatadir / "XRP_ETH-trades.json.gz"
    file_new = testdatadir / "XRP_ETH-trades.json"
    _backup_file(file, copy_file=True)
    default_conf['datadir'] = testdatadir

    assert not file_new.exists()

    convert_trades_format(default_conf, convert_from='jsongz',
                          convert_to='json', erase=False)

    assert file_new.exists()
    assert file.exists()

    # Remove original file
    file.unlink()
    # Convert back
    convert_trades_format(default_conf, convert_from='json',
                          convert_to='jsongz', erase=True)

    assert file.exists()
    assert not file_new.exists()

    _clean_test_file(file)
    if file_new.exists():
        file_new.unlink()
Increase pylint score on test files 2018-01-28 07:38:41 +00:00			`# pragma pylint: disable=missing-docstring, C0103`
Adjust some tests to dataframe passing 2018-12-11 18:48:36 +00:00			`import logging`
Move Analyze to a class 2018-02-04 08:28:02 +00:00
Move trim_dataframe from history to converter 2019-12-25 14:47:04 +00:00			`from freqtrade.configuration.timerange import TimeRange`
Add test for convert_trades_Format 2019-12-28 09:45:26 +00:00			`from freqtrade.data.converter import (convert_trades_format,`
			`ohlcv_fill_up_missing_data,`
Move trim_dataframe from history to converter 2019-12-25 14:47:04 +00:00			`parse_ticker_dataframe, trim_dataframe)`
			`from freqtrade.data.history import (get_timerange, load_data,`
			`load_pair_history, validate_backtest_data)`
Adjust imports in tests to new path 2019-09-08 07:54:15 +00:00			`from tests.conftest import log_has`
Add test for convert_trades_Format 2019-12-28 09:45:26 +00:00			`from tests.data.test_history import _backup_file, _clean_test_file`
Move Analyze to a class 2018-02-04 08:28:02 +00:00

define common fixtures 2017-11-07 19:12:56 +00:00			`def test_dataframe_correct_columns(result):`
ADd test for data_interpolate 2018-12-31 08:18:22 +00:00			`assert result.columns.tolist() == ['date', 'open', 'high', 'low', 'close', 'volume']`
remove Test classes and use pytest fixtures 2017-10-01 08:02:47 +00:00
PEP8 linting 2017-10-30 23:36:35 +00:00
Implement missing_data_fillup to tests and operations 2018-12-31 18:15:05 +00:00			`def test_parse_ticker_dataframe(ticker_history_list, caplog):`
restore one analyze test 2018-07-16 05:59:14 +00:00			`columns = ['date', 'open', 'high', 'low', 'close', 'volume']`
refactor Analyze class methods to base Strategy class 2018-07-16 05:11:17 +00:00
Adjust some tests to dataframe passing 2018-12-11 18:48:36 +00:00			`caplog.set_level(logging.DEBUG)`
restore one analyze test 2018-07-16 05:59:14 +00:00			`# Test file with BV data`
Tests need to pass pair to parse_ticker_dataframe 2019-06-15 11:47:20 +00:00			`dataframe = parse_ticker_dataframe(ticker_history_list, '5m',`
			`pair="UNITTEST/BTC", fill_missing=True)`
restore one analyze test 2018-07-16 05:59:14 +00:00			`assert dataframe.columns.tolist() == columns`
Change log_has for some tests 2019-08-11 18:16:34 +00:00			`assert log_has('Parsing tickerlist to dataframe', caplog)`
ADd test for data_interpolate 2018-12-31 08:18:22 +00:00

Use fixture to determine test_data_dir 2019-09-07 18:56:03 +00:00			`def test_ohlcv_fill_up_missing_data(testdatadir, caplog):`
			`data = load_pair_history(datadir=testdatadir,`
Replace some occurances of ticker_interval with timeframe 2019-11-02 19:19:13 +00:00			`timeframe='1m',`
Implement missing_data_fillup to tests and operations 2018-12-31 18:15:05 +00:00			`pair='UNITTEST/BTC',`
			`fill_up_missing=False)`
ADd test for data_interpolate 2018-12-31 08:18:22 +00:00			`caplog.set_level(logging.DEBUG)`
Tests need to pass pair to parse_ticker_dataframe 2019-06-15 11:47:20 +00:00			`data2 = ohlcv_fill_up_missing_data(data, '1m', 'UNITTEST/BTC')`
ADd test for data_interpolate 2018-12-31 08:18:22 +00:00			`assert len(data2) > len(data)`
			`# Column names should not change`
			`assert (data.columns == data2.columns).all()`

Tests need to pass pair to parse_ticker_dataframe 2019-06-15 11:47:20 +00:00			`assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "`
Change log_has for some tests 2019-08-11 18:16:34 +00:00			`f"{len(data)} - after: {len(data2)}", caplog)`
add test to verify data does not contain missing data afterwards 2018-12-31 08:24:04 +00:00
			`# Test fillup actually fixes invalid backtest data`
Minor improvements in data.history 2019-12-17 22:06:03 +00:00			`min_date, max_date = get_timerange({'UNITTEST/BTC': data})`
Adapt tests for new validate_backtest signature 2019-06-15 11:32:05 +00:00			`assert validate_backtest_data(data, 'UNITTEST/BTC', min_date, max_date, 1)`
			`assert not validate_backtest_data(data2, 'UNITTEST/BTC', min_date, max_date, 1)`
Add explicit test for ohlcv fillup 2018-12-31 18:40:14 +00:00

			`def test_ohlcv_fill_up_missing_data2(caplog):`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`timeframe = '5m'`
Add explicit test for ohlcv fillup 2018-12-31 18:40:14 +00:00			`ticks = [[`
			`1511686200000, # 8:50:00`
			`8.794e-05, # open`
			`8.948e-05, # high`
			`8.794e-05, # low`
			`8.88e-05, # close`
			`2255, # volume (in quote currency)`
			`],`
			`[`
			`1511686500000, # 8:55:00`
			`8.88e-05,`
			`8.942e-05,`
			`8.88e-05,`
			`8.893e-05,`
			`9911,`
			`],`
			`[`
			`1511687100000, # 9:05:00`
			`8.891e-05,`
			`8.893e-05,`
			`8.875e-05,`
			`8.877e-05,`
			`2251`
			`],`
			`[`
			`1511687400000, # 9:10:00`
			`8.877e-05,`
			`8.883e-05,`
			`8.895e-05,`
			`8.817e-05,`
			`123551`
			`]`
			`]`

			`# Generate test-data without filling missing`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC", fill_missing=False)`
Add explicit test for ohlcv fillup 2018-12-31 18:40:14 +00:00			`assert len(data) == 3`
			`caplog.set_level(logging.DEBUG)`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`data2 = ohlcv_fill_up_missing_data(data, timeframe, "UNITTEST/BTC")`
Add explicit test for ohlcv fillup 2018-12-31 18:40:14 +00:00			`assert len(data2) == 4`
			`# 3rd candle has been filled`
			`row = data2.loc[2, :]`
			`assert row['volume'] == 0`
			`# close shoult match close of previous candle`
			`assert row['close'] == data.loc[1, 'close']`
			`assert row['open'] == row['close']`
			`assert row['high'] == row['close']`
			`assert row['low'] == row['close']`
			`# Column names should not change`
			`assert (data.columns == data2.columns).all()`

Fix Line too long error 2019-06-15 14:58:17 +00:00			`assert log_has(f"Missing data fillup for UNITTEST/BTC: before: "`
Change log_has for some tests 2019-08-11 18:16:34 +00:00			`f"{len(data)} - after: {len(data2)}", caplog)`
Add test for drop_incomplete option 2019-06-09 12:51:58 +00:00

			`def test_ohlcv_drop_incomplete(caplog):`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`timeframe = '1d'`
Add test for drop_incomplete option 2019-06-09 12:51:58 +00:00			`ticks = [[`
			`1559750400000, # 2019-06-04`
			`8.794e-05, # open`
			`8.948e-05, # high`
			`8.794e-05, # low`
			`8.88e-05, # close`
			`2255, # volume (in quote currency)`
			`],`
			`[`
			`1559836800000, # 2019-06-05`
			`8.88e-05,`
			`8.942e-05,`
			`8.88e-05,`
			`8.893e-05,`
			`9911,`
			`],`
			`[`
			`1559923200000, # 2019-06-06`
			`8.891e-05,`
			`8.893e-05,`
			`8.875e-05,`
			`8.877e-05,`
			`2251`
			`],`
			`[`
			`1560009600000, # 2019-06-07`
			`8.877e-05,`
			`8.883e-05,`
			`8.895e-05,`
			`8.817e-05,`
			`123551`
			`]`
			`]`
			`caplog.set_level(logging.DEBUG)`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC",`
Tests need to pass pair to parse_ticker_dataframe 2019-06-15 11:47:20 +00:00			`fill_missing=False, drop_incomplete=False)`
Add test for drop_incomplete option 2019-06-09 12:51:58 +00:00			`assert len(data) == 4`
Change log_has for some tests 2019-08-11 18:16:34 +00:00			`assert not log_has("Dropping last candle", caplog)`
Add test for drop_incomplete option 2019-06-09 12:51:58 +00:00
			`# Drop last candle`
Some more places with ticker_interval gone 2019-11-02 19:26:26 +00:00			`data = parse_ticker_dataframe(ticks, timeframe, pair="UNITTEST/BTC",`
Tests need to pass pair to parse_ticker_dataframe 2019-06-15 11:47:20 +00:00			`fill_missing=False, drop_incomplete=True)`
Add test for drop_incomplete option 2019-06-09 12:51:58 +00:00			`assert len(data) == 3`

Change log_has for some tests 2019-08-11 18:16:34 +00:00			`assert log_has("Dropping last candle", caplog)`
Move trim_dataframe from history to converter 2019-12-25 14:47:04 +00:00

			`def test_trim_dataframe(testdatadir) -> None:`
			`data = load_data(`
			`datadir=testdatadir,`
			`timeframe='1m',`
			`pairs=['UNITTEST/BTC']`
			`)['UNITTEST/BTC']`
			`min_date = int(data.iloc[0]['date'].timestamp())`
			`max_date = int(data.iloc[-1]['date'].timestamp())`
			`data_modify = data.copy()`

			`# Remove first 30 minutes (1800 s)`
			`tr = TimeRange('date', None, min_date + 1800, 0)`
			`data_modify = trim_dataframe(data_modify, tr)`
			`assert not data_modify.equals(data)`
			`assert len(data_modify) < len(data)`
			`assert len(data_modify) == len(data) - 30`
			`assert all(data_modify.iloc[-1] == data.iloc[-1])`
			`assert all(data_modify.iloc[0] == data.iloc[30])`

			`data_modify = data.copy()`
			`# Remove last 30 minutes (1800 s)`
			`tr = TimeRange(None, 'date', 0, max_date - 1800)`
			`data_modify = trim_dataframe(data_modify, tr)`
			`assert not data_modify.equals(data)`
			`assert len(data_modify) < len(data)`
			`assert len(data_modify) == len(data) - 30`
			`assert all(data_modify.iloc[0] == data.iloc[0])`
			`assert all(data_modify.iloc[-1] == data.iloc[-31])`

			`data_modify = data.copy()`
			`# Remove first 25 and last 30 minutes (1800 s)`
			`tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)`
			`data_modify = trim_dataframe(data_modify, tr)`
			`assert not data_modify.equals(data)`
			`assert len(data_modify) < len(data)`
			`assert len(data_modify) == len(data) - 55`
			`# first row matches 25th original row`
			`assert all(data_modify.iloc[0] == data.iloc[25])`
Add test for convert_trades_Format 2019-12-28 09:45:26 +00:00

			`def test_convert_trades_format(mocker, default_conf, testdatadir):`
			`file = testdatadir / "XRP_ETH-trades.json.gz"`
			`file_new = testdatadir / "XRP_ETH-trades.json"`
			`_backup_file(file, copy_file=True)`
			`default_conf['datadir'] = testdatadir`

			`assert not file_new.exists()`

			`convert_trades_format(default_conf, convert_from='jsongz',`
			`convert_to='json', erase=False)`

			`assert file_new.exists()`
			`assert file.exists()`

			`# Remove original file`
			`file.unlink()`
			`# Convert back`
			`convert_trades_format(default_conf, convert_from='json',`
			`convert_to='jsongz', erase=True)`

			`assert file.exists()`
			`assert not file_new.exists()`

			`_clean_test_file(file)`
			`if file_new.exists():`
			`file_new.unlink()`