From 211b9cbe047b9c5aac935929e010031c15146a0b Mon Sep 17 00:00:00 2001 From: hroff-1902 Date: Sun, 6 Oct 2019 18:10:40 +0300 Subject: [PATCH] Cleanup in data.history --- freqtrade/data/history.py | 37 +++++++++++++++++++++---------------- tests/data/test_history.py | 26 +++++++++++++------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/freqtrade/data/history.py b/freqtrade/data/history.py index 865289f38..991a639ca 100644 --- a/freqtrade/data/history.py +++ b/freqtrade/data/history.py @@ -82,10 +82,19 @@ def store_tickerdata_file(datadir: Path, pair: str, misc.file_dump_json(filename, data, is_zip=is_zip) +def _validate_pairdata(pair, pairdata, timerange: TimeRange): + if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000: + logger.warning('Missing data at start for pair %s, data starts at %s', + pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) + if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000: + logger.warning('Missing data at end for pair %s, data ends at %s', + pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) + + def load_pair_history(pair: str, ticker_interval: str, datadir: Path, - timerange: TimeRange = TimeRange(None, None, 0, 0), + timerange: Optional[TimeRange] = None, refresh_pairs: bool = False, exchange: Optional[Exchange] = None, fill_up_missing: bool = True, @@ -116,13 +125,8 @@ def load_pair_history(pair: str, pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange) if pairdata: - if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000: - logger.warning('Missing data at start for pair %s, data starts at %s', - pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) - if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000: - logger.warning('Missing data at end for pair %s, data ends at %s', - pair, - arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) + if timerange: + _validate_pairdata(pair, pairdata, timerange) return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair, fill_missing=fill_up_missing, drop_incomplete=drop_incomplete) @@ -139,7 +143,7 @@ def load_data(datadir: Path, pairs: List[str], refresh_pairs: bool = False, exchange: Optional[Exchange] = None, - timerange: TimeRange = TimeRange(None, None, 0, 0), + timerange: Optional[TimeRange] = None, fill_up_missing: bool = True, ) -> Dict[str, DataFrame]: """ @@ -169,13 +173,14 @@ def pair_data_filename(datadir: Path, pair: str, ticker_interval: str) -> Path: return filename -def load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str, - timerange: Optional[TimeRange]) -> Tuple[List[Any], - Optional[int]]: +def _load_cached_data_for_updating(datadir: Path, pair: str, ticker_interval: str, + timerange: Optional[TimeRange]) -> Tuple[List[Any], + Optional[int]]: """ Load cached data to download more data. - If timerange is passed in, checks wether data from an before the stored data will be downloaded. - If that's the case than what's available should be completely overwritten. + If timerange is passed in, checks whether data from an before the stored data will be + downloaded. + If that's the case then what's available should be completely overwritten. Only used by download_pair_history(). """ @@ -238,7 +243,7 @@ def download_pair_history(datadir: Path, f'and store in {datadir}.' ) - data, since_ms = load_cached_data_for_updating(datadir, pair, ticker_interval, timerange) + data, since_ms = _load_cached_data_for_updating(datadir, pair, ticker_interval, timerange) logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None') logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None') @@ -266,7 +271,7 @@ def download_pair_history(datadir: Path, def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str], - dl_path: Path, timerange: TimeRange, + dl_path: Path, timerange: Optional[TimeRange] = None, erase=False) -> List[str]: """ Refresh stored ohlcv data for backtesting and hyperopt operations. diff --git a/tests/data/test_history.py b/tests/data/test_history.py index e386c3506..f8b630775 100644 --- a/tests/data/test_history.py +++ b/tests/data/test_history.py @@ -15,7 +15,7 @@ from freqtrade import OperationalException from freqtrade.configuration import TimeRange from freqtrade.data import history from freqtrade.data.history import (download_pair_history, - load_cached_data_for_updating, + _load_cached_data_for_updating, load_tickerdata_file, refresh_backtest_ohlcv_data, trim_tickerlist) @@ -151,43 +151,43 @@ def test_load_cached_data_for_updating(mocker) -> None: # timeframe starts earlier than the cached data # should fully update data timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == [] assert start_ts == test_data[0][0] - 1000 # same with 'line' timeframe num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120 - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', - TimeRange(None, 'line', 0, -num_lines)) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', + TimeRange(None, 'line', 0, -num_lines)) assert data == [] assert start_ts < test_data[0][0] - 1 # timeframe starts in the center of the cached data # should return the chached data w/o the last item timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == test_data[:-1] assert test_data[-2][0] < start_ts < test_data[-1][0] # same with 'line' timeframe num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30 timerange = TimeRange(None, 'line', 0, -num_lines) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == test_data[:-1] assert test_data[-2][0] < start_ts < test_data[-1][0] # timeframe starts after the chached data # should return the chached data w/o the last item timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == test_data[:-1] assert test_data[-2][0] < start_ts < test_data[-1][0] # Try loading last 30 lines. - # Not supported by load_cached_data_for_updating, we always need to get the full data. + # Not supported by _load_cached_data_for_updating, we always need to get the full data. num_lines = 30 timerange = TimeRange(None, 'line', 0, -num_lines) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == test_data[:-1] assert test_data[-2][0] < start_ts < test_data[-1][0] @@ -195,27 +195,27 @@ def test_load_cached_data_for_updating(mocker) -> None: # should return the chached data w/o the last item num_lines = 30 timerange = TimeRange(None, 'line', 0, -num_lines) - data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange) assert data == test_data[:-1] assert test_data[-2][0] < start_ts < test_data[-1][0] # no datafile exist # should return timestamp start time timerange = TimeRange('date', None, now_ts - 10000, 0) - data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) assert data == [] assert start_ts == (now_ts - 10000) * 1000 # same with 'line' timeframe num_lines = 30 timerange = TimeRange(None, 'line', 0, -num_lines) - data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) + data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange) assert data == [] assert start_ts == (now_ts - num_lines * 60) * 1000 # no datafile exist, no timeframe is set # should return an empty array and None - data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None) + data, start_ts = _load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None) assert data == [] assert start_ts is None