Cleanup in data.history

This commit is contained in:
hroff-1902 2019-10-06 18:10:40 +03:00
parent 553a1b90ba
commit 211b9cbe04
2 changed files with 34 additions and 29 deletions

View File

@ -82,10 +82,19 @@ def store_tickerdata_file(datadir: Path, pair: str,
misc.file_dump_json(filename, data, is_zip=is_zip) misc.file_dump_json(filename, data, is_zip=is_zip)
def _validate_pairdata(pair, pairdata, timerange: TimeRange):
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
def load_pair_history(pair: str, def load_pair_history(pair: str,
ticker_interval: str, ticker_interval: str,
datadir: Path, datadir: Path,
timerange: TimeRange = TimeRange(None, None, 0, 0), timerange: Optional[TimeRange] = None,
refresh_pairs: bool = False, refresh_pairs: bool = False,
exchange: Optional[Exchange] = None, exchange: Optional[Exchange] = None,
fill_up_missing: bool = True, fill_up_missing: bool = True,
@ -116,13 +125,8 @@ def load_pair_history(pair: str,
pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange) pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
if pairdata: if pairdata:
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000: if timerange:
logger.warning('Missing data at start for pair %s, data starts at %s', _validate_pairdata(pair, pairdata, timerange)
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair,
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair, return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair,
fill_missing=fill_up_missing, fill_missing=fill_up_missing,
drop_incomplete=drop_incomplete) drop_incomplete=drop_incomplete)
@ -139,7 +143,7 @@ def load_data(datadir: Path,
pairs: List[str], pairs: List[str],
refresh_pairs: bool = False, refresh_pairs: bool = False,
exchange: Optional[Exchange] = None, exchange: Optional[Exchange] = None,
timerange: TimeRange = TimeRange(None, None, 0, 0), timerange: Optional[TimeRange] = None,
fill_up_missing: bool = True, fill_up_missing: bool = True,
) -> Dict[str, DataFrame]: ) -> Dict[str, DataFrame]:
""" """
@ -169,13 +173,14 @@ def pair_data_filename(datadir: Path, pair: str, ticker_interval: str) -> Path:
return filename return filename
def load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str, def _load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str,
timerange: Optional[TimeRange]) -> Tuple[List[Any], timerange: Optional[TimeRange]) -> Tuple[List[Any],
Optional[int]]: Optional[int]]:
""" """
Load cached data to download more data. Load cached data to download more data.
If timerange is passed in, checks wether data from an before the stored data will be downloaded. If timerange is passed in, checks whether data from an before the stored data will be
If that's the case than what's available should be completely overwritten. downloaded.
If that's the case then what's available should be completely overwritten.
Only used by download_pair_history(). Only used by download_pair_history().
""" """
@ -238,7 +243,7 @@ def download_pair_history(datadir: Path,
f'and store in {datadir}.' f'and store in {datadir}.'
) )
data, since_ms = load_cached_data_for_updating(datadir, pair, ticker_interval, timerange) data, since_ms = _load_cached_data_for_updating(datadir, pair, ticker_interval, timerange)
logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None') logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None') logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
@ -266,7 +271,7 @@ def download_pair_history(datadir: Path,
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str], def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str],
dl_path: Path, timerange: TimeRange, dl_path: Path, timerange: Optional[TimeRange] = None,
erase=False) -> List[str]: erase=False) -> List[str]:
""" """
Refresh stored ohlcv data for backtesting and hyperopt operations. Refresh stored ohlcv data for backtesting and hyperopt operations.

View File

@ -15,7 +15,7 @@ from freqtrade import OperationalException
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.data import history from freqtrade.data import history
from freqtrade.data.history import (download_pair_history, from freqtrade.data.history import (download_pair_history,
load_cached_data_for_updating, _load_cached_data_for_updating,
load_tickerdata_file, load_tickerdata_file,
refresh_backtest_ohlcv_data, refresh_backtest_ohlcv_data,
trim_tickerlist) trim_tickerlist)
@ -151,43 +151,43 @@ def test_load_cached_data_for_updating(mocker) -> None:
# timeframe starts earlier than the cached data # timeframe starts earlier than the cached data
# should fully update data # should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == [] assert data == []
assert start_ts == test_data[0][0] - 1000 assert start_ts == test_data[0][0] - 1000
# same with 'line' timeframe # same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120 num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m',
TimeRange(None, 'line', 0, -num_lines)) TimeRange(None, 'line', 0, -num_lines))
assert data == [] assert data == []
assert start_ts < test_data[0][0] - 1 assert start_ts < test_data[0][0] - 1
# timeframe starts in the center of the cached data # timeframe starts in the center of the cached data
# should return the chached data w/o the last item # should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe # same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30 num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# timeframe starts after the chached data # timeframe starts after the chached data
# should return the chached data w/o the last item # should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0) timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# Try loading last 30 lines. # Try loading last 30 lines.
# Not supported by load_cached_data_for_updating, we always need to get the full data. # Not supported by _load_cached_data_for_updating, we always need to get the full data.
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
@ -195,27 +195,27 @@ def test_load_cached_data_for_updating(mocker) -> None:
# should return the chached data w/o the last item # should return the chached data w/o the last item
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# no datafile exist # no datafile exist
# should return timestamp start time # should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0) timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
assert data == [] assert data == []
assert start_ts == (now_ts - 10000) * 1000 assert start_ts == (now_ts - 10000) * 1000
# same with 'line' timeframe # same with 'line' timeframe
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
assert data == [] assert data == []
assert start_ts == (now_ts - num_lines * 60) * 1000 assert start_ts == (now_ts - num_lines * 60) * 1000
# no datafile exist, no timeframe is set # no datafile exist, no timeframe is set
# should return an empty array and None # should return an empty array and None
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None) data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None)
assert data == [] assert data == []
assert start_ts is None assert start_ts is None