Cleanup in data.history

This commit is contained in:
hroff-1902 2019-10-06 18:10:40 +03:00
parent 553a1b90ba
commit 211b9cbe04
2 changed files with 34 additions and 29 deletions

View File

@ -82,10 +82,19 @@ def store_tickerdata_file(datadir: Path, pair: str,
misc.file_dump_json(filename, data, is_zip=is_zip)
def _validate_pairdata(pair, pairdata, timerange: TimeRange):
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
def load_pair_history(pair: str,
ticker_interval: str,
datadir: Path,
timerange: TimeRange = TimeRange(None, None, 0, 0),
timerange: Optional[TimeRange] = None,
refresh_pairs: bool = False,
exchange: Optional[Exchange] = None,
fill_up_missing: bool = True,
@ -116,13 +125,8 @@ def load_pair_history(pair: str,
pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
if pairdata:
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair,
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange:
_validate_pairdata(pair, pairdata, timerange)
return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair,
fill_missing=fill_up_missing,
drop_incomplete=drop_incomplete)
@ -139,7 +143,7 @@ def load_data(datadir: Path,
pairs: List[str],
refresh_pairs: bool = False,
exchange: Optional[Exchange] = None,
timerange: TimeRange = TimeRange(None, None, 0, 0),
timerange: Optional[TimeRange] = None,
fill_up_missing: bool = True,
) -> Dict[str, DataFrame]:
"""
@ -169,13 +173,14 @@ def pair_data_filename(datadir: Path, pair: str, ticker_interval: str) -> Path:
return filename
def load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str,
def _load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str,
timerange: Optional[TimeRange]) -> Tuple[List[Any],
Optional[int]]:
"""
Load cached data to download more data.
If timerange is passed in, checks wether data from an before the stored data will be downloaded.
If that's the case than what's available should be completely overwritten.
If timerange is passed in, checks whether data from an before the stored data will be
downloaded.
If that's the case then what's available should be completely overwritten.
Only used by download_pair_history().
"""
@ -238,7 +243,7 @@ def download_pair_history(datadir: Path,
f'and store in {datadir}.'
)
data, since_ms = load_cached_data_for_updating(datadir, pair, ticker_interval, timerange)
data, since_ms = _load_cached_data_for_updating(datadir, pair, ticker_interval, timerange)
logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
@ -266,7 +271,7 @@ def download_pair_history(datadir: Path,
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str],
dl_path: Path, timerange: TimeRange,
dl_path: Path, timerange: Optional[TimeRange] = None,
erase=False) -> List[str]:
"""
Refresh stored ohlcv data for backtesting and hyperopt operations.

View File

@ -15,7 +15,7 @@ from freqtrade import OperationalException
from freqtrade.configuration import TimeRange
from freqtrade.data import history
from freqtrade.data.history import (download_pair_history,
load_cached_data_for_updating,
_load_cached_data_for_updating,
load_tickerdata_file,
refresh_backtest_ohlcv_data,
trim_tickerlist)
@ -151,13 +151,13 @@ def test_load_cached_data_for_updating(mocker) -> None:
# timeframe starts earlier than the cached data
# should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == []
assert start_ts == test_data[0][0] - 1000
# same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m',
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m',
TimeRange(None, 'line', 0, -num_lines))
assert data == []
assert start_ts < test_data[0][0] - 1
@ -165,29 +165,29 @@ def test_load_cached_data_for_updating(mocker) -> None:
# timeframe starts in the center of the cached data
# should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30
timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# timeframe starts after the chached data
# should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# Try loading last 30 lines.
# Not supported by load_cached_data_for_updating, we always need to get the full data.
# Not supported by _load_cached_data_for_updating, we always need to get the full data.
num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
@ -195,27 +195,27 @@ def test_load_cached_data_for_updating(mocker) -> None:
# should return the chached data w/o the last item
num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# no datafile exist
# should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
assert data == []
assert start_ts == (now_ts - 10000) * 1000
# same with 'line' timeframe
num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
assert data == []
assert start_ts == (now_ts - num_lines * 60) * 1000
# no datafile exist, no timeframe is set
# should return an empty array and None
data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None)
data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None)
assert data == []
assert start_ts is None