diff --git a/docs/backtesting.md b/docs/backtesting.md index 184112f33..b3783a665 100644 --- a/docs/backtesting.md +++ b/docs/backtesting.md @@ -36,7 +36,7 @@ python3 ./freqtrade/main.py backtesting --realistic-simulation python3 ./freqtrade/main.py backtesting --realistic-simulation --ticker-interval 1m ``` -**Reload your testdata files** +**Update cached pairs with the latest data** ```bash python3 ./freqtrade/main.py backtesting --realistic-simulation --refresh-pairs-cached ``` diff --git a/docs/bot-usage.md b/docs/bot-usage.md index 76e693592..ce5bb46a9 100644 --- a/docs/bot-usage.md +++ b/docs/bot-usage.md @@ -124,7 +124,7 @@ optional arguments: world limitations -r, --refresh-pairs-cached refresh the pairs files in tests/testdata with - the latest data from Bittrex. Use it if you want + the latest data from the exchange. Use it if you want to run your backtesting with up-to-date data. ``` diff --git a/freqtrade/arguments.py b/freqtrade/arguments.py index 8b3cbf72d..e82ec05b5 100644 --- a/freqtrade/arguments.py +++ b/freqtrade/arguments.py @@ -118,7 +118,7 @@ class Arguments(object): ) parser.add_argument( '-r', '--refresh-pairs-cached', - help='refresh the pairs files in tests/testdata with the latest data from Bittrex. \ + help='refresh the pairs files in tests/testdata with the latest data from the exchange. \ Use it if you want to run your backtesting with up-to-date data.', action='store_true', dest='refresh_pairs', diff --git a/freqtrade/exchange/__init__.py b/freqtrade/exchange/__init__.py index dc8e817f5..2601cb836 100644 --- a/freqtrade/exchange/__init__.py +++ b/freqtrade/exchange/__init__.py @@ -8,7 +8,7 @@ import ccxt import arrow from freqtrade import OperationalException, DependencyException, NetworkException - +from freqtrade.constants import Constants logger = logging.getLogger(__name__) @@ -269,15 +269,21 @@ def get_ticker(pair: str, refresh: Optional[bool] = True) -> dict: @retrier -def get_ticker_history(pair: str, tick_interval: str, since: Optional[int] = None) -> List[Dict]: +def get_ticker_history(pair: str, tick_interval: str, since_ms: Optional[int] = None) -> List[Dict]: try: - # download data until it reaches today now time - # + # last item should be in the time interval [now - tick_interval, now] + till_time_ms = arrow.utcnow().shift( + minutes=-Constants.TICKER_INTERVAL_MINUTES[tick_interval] + ).timestamp * 1000 # it looks as if some exchanges return cached data - # and update it with some delay so 10 mins interval is added + # and they update it one in several minute, so 10 mins interval + # is necessary to skeep downloading of an empty array when all + # chached data was already downloaded + till_time_ms = min(till_time_ms, arrow.utcnow().shift(minutes=-10).timestamp * 1000) + data = [] - while not since or since < arrow.utcnow().shift(minutes=-10).timestamp * 1000: - data_part = _API.fetch_ohlcv(pair, timeframe=tick_interval, since=since) + while not since_ms or since_ms < till_time_ms: + data_part = _API.fetch_ohlcv(pair, timeframe=tick_interval, since=since_ms) if not data_part: break @@ -287,7 +293,7 @@ def get_ticker_history(pair: str, tick_interval: str, since: Optional[int] = Non arrow.get(data_part[-1][0] / 1000).format()) data.extend(data_part) - since = data[-1][0] + 1 + since_ms = data[-1][0] + 1 return data diff --git a/freqtrade/optimize/__init__.py b/freqtrade/optimize/__init__.py index 1f32ae2d5..fcbe0dc5b 100644 --- a/freqtrade/optimize/__init__.py +++ b/freqtrade/optimize/__init__.py @@ -103,7 +103,10 @@ def load_data(datadir: str, pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange) if not pairdata: # download the tickerdata from exchange - download_backtesting_testdata(datadir, pair=pair, interval=ticker_interval) + download_backtesting_testdata(datadir, + pair=pair, + tick_interval=ticker_interval, + timerange=timerange) # and retry reading the pair pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange) result[pair] = pairdata @@ -127,7 +130,7 @@ def download_pairs(datadir, pairs: List[str], try: download_backtesting_testdata(datadir, pair=pair, - interval=ticker_interval, + tick_interval=ticker_interval, timerange=timerange) except BaseException: logger.info( @@ -139,70 +142,81 @@ def download_pairs(datadir, pairs: List[str], return True -def get_start_ts_from_timerange(timerange: Tuple[Tuple, int, int], interval: str) -> int: - if not timerange: - return None +def load_cached_data_for_updating(filename: str, + tick_interval: str, + timerange: Optional[Tuple[Tuple, int, int]]) -> Tuple[list, int]: + """ + Load cached data and choose what part of the data should be updated + """ - if timerange[0][0] == 'date': - return timerange[1] * 1000 + since_ms = None - if timerange[0][1] == 'line': - num_minutes = timerange[2] * Constants.TICKER_INTERVAL_MINUTES[interval] - return arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000 + # user sets timerange, so find the start time + if timerange: + if timerange[0][0] == 'date': + since_ms = timerange[1] * 1000 + elif timerange[0][1] == 'line': + num_minutes = timerange[2] * Constants.TICKER_INTERVAL_MINUTES[tick_interval] + since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000 - return None + # read the cached file + if os.path.isfile(filename): + with open(filename, "rt") as file: + data = json.load(file) + # remove the last item, because we are not sure if it is correct + # it could be fetched when the candle was incompleted + if data: + data.pop() + else: + data = [] + + if data: + if since_ms and since_ms < data[0][0]: + # the data is requested for earlier period than the cache has + # so fully redownload all the data + data = [] + else: + # a part of the data was already downloaded, so + # download unexist data only + since_ms = data[-1][0] + 1 + + return (data, since_ms) # FIX: 20180110, suggest rename interval to tick_interval def download_backtesting_testdata(datadir: str, pair: str, - interval: str = '5m', + tick_interval: str = '5m', timerange: Optional[Tuple[Tuple, int, int]] = None) -> bool: """ Download the latest ticker intervals from the exchange for the pairs passed in parameters + The data is downloaded starting from the last correct ticker interval data that + esists in a cache. If timerange starts earlier than the data in the cache, + the full data will be redownloaded + Based on @Rybolov work: https://github.com/rybolov/freqtrade-data :param pairs: list of pairs to download - :param interval: ticker interval + :param tick_interval: ticker interval :param timerange: range of time to download :return: bool """ path = make_testdata_path(datadir) + filepair = pair.replace("/", "_") + filename = os.path.join(path, f'{filepair}-{tick_interval}.json') + logger.info( 'Download the pair: "%s", Interval: %s', pair, - interval + tick_interval ) - filepair = pair.replace("/", "_") - filename = os.path.join(path, '{pair}-{interval}.json'.format( - pair=filepair, - interval=interval, - )) - - since = get_start_ts_from_timerange(timerange, interval) - - if os.path.isfile(filename): - with open(filename, "rt") as file: - data = json.load(file) - - if since: - if since < data[0][0]: - # fully update the data - data = [] - else: - # download unexist data only - since = max(since, data[-1][0] + 1) - else: - # download unexist data only - since = data[-1][0] + 1 - else: - data = [] + data, since_ms = load_cached_data_for_updating(filename, tick_interval, timerange) logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None') logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None') - new_data = get_ticker_history(pair=pair, tick_interval=interval, since=since) + new_data = get_ticker_history(pair=pair, tick_interval=tick_interval, since_ms=since_ms) data.extend(new_data) logger.debug("New Start: %s", misc.format_ms_time(data[0][0])) diff --git a/freqtrade/tests/optimize/test_optimize.py b/freqtrade/tests/optimize/test_optimize.py index e58f5bb2d..f0c429792 100644 --- a/freqtrade/tests/optimize/test_optimize.py +++ b/freqtrade/tests/optimize/test_optimize.py @@ -10,7 +10,7 @@ from freqtrade import optimize from freqtrade.misc import file_dump_json from freqtrade.optimize.__init__ import make_testdata_path, download_pairs, \ download_backtesting_testdata, load_tickerdata_file, trim_tickerlist, \ - get_start_ts_from_timerange + load_cached_data_for_updating from freqtrade.tests.conftest import log_has # Change this if modifying UNITTEST/BTC testdatafile @@ -147,26 +147,107 @@ def test_download_pairs(ticker_history, mocker) -> None: _clean_test_file(file2_5) -def test_get_start_ts_from_timerange(mocker) -> None: - start = get_start_ts_from_timerange(None, '1m') - assert start is None +def test_load_cached_data_for_updating(mocker) -> None: + datadir = os.path.join(os.path.dirname(__file__), '..', 'testdata') - # check 'date' - start = get_start_ts_from_timerange((('date', 'date'), 1000, 2000), '1m') - assert start == 1000 * 1000 + test_data = None + test_filename = os.path.join(datadir, 'UNITTEST_BTC-1m.json') + with open(test_filename, "rt") as file: + test_data = json.load(file) - start = get_start_ts_from_timerange((('date', 'date'), 1000, 2000), '5m') - assert start == 1000 * 1000 + # change now time to test 'line' cases + # now = last cached item + 1 hour + now_ts = test_data[-1][0] / 1000 + 60 * 60 + mocker.patch('arrow.utcnow', return_value=arrow.get(now_ts)) - # check 'line' - mock_now = arrow.get(1367900664) - mocker.patch('arrow.utcnow', return_value=mock_now) + # timeframe starts earlier than the cached data + # should fully update data + timerange = (('date', None), test_data[0][0] / 1000 - 1, None) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == [] + assert start_ts == test_data[0][0] - 1000 - start = get_start_ts_from_timerange(((None, 'line'), None, -200), '1m') - assert start == (1367900664 - 200 * 60) * 1000 + # same with 'line' timeframe + num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120 + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + ((None, 'line'), None, -num_lines)) + assert data == [] + assert start_ts < test_data[0][0] - 1 - start = get_start_ts_from_timerange(((None, 'line'), None, -200), '5m') - assert start == (1367900664 - 5 * 200 * 60) * 1000 + # timeframe starts in the center of the cached data + # should return the chached data w/o the last item + timerange = (('date', None), test_data[0][0] / 1000 + 1, None) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == test_data[:-1] + assert test_data[-2][0] < start_ts < test_data[-1][0] + + # same with 'line' timeframe + num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30 + timerange = ((None, 'line'), None, -num_lines) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == test_data[:-1] + assert test_data[-2][0] < start_ts < test_data[-1][0] + + # timeframe starts after the chached data + # should return the chached data w/o the last item + timerange = (('date', None), test_data[-1][0] / 1000 + 1, None) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == test_data[:-1] + assert test_data[-2][0] < start_ts < test_data[-1][0] + + # same with 'line' timeframe + num_lines = 30 + timerange = ((None, 'line'), None, -num_lines) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == test_data[:-1] + assert test_data[-2][0] < start_ts < test_data[-1][0] + + # no timeframe is set + # should return the chached data w/o the last item + num_lines = 30 + timerange = ((None, 'line'), None, -num_lines) + data, start_ts = load_cached_data_for_updating(test_filename, + '1m', + timerange) + assert data == test_data[:-1] + assert test_data[-2][0] < start_ts < test_data[-1][0] + + # no datafile exist + # should return timestamp start time + timerange = (('date', None), now_ts - 10000, None) + data, start_ts = load_cached_data_for_updating(test_filename + 'unexist', + '1m', + timerange) + assert data == [] + assert start_ts == (now_ts - 10000) * 1000 + + # same with 'line' timeframe + num_lines = 30 + timerange = ((None, 'line'), None, -num_lines) + data, start_ts = load_cached_data_for_updating(test_filename + 'unexist', + '1m', + timerange) + assert data == [] + assert start_ts == (now_ts - num_lines * 60) * 1000 + + # no datafile exist, no timeframe is set + # should return an empty array and None + data, start_ts = load_cached_data_for_updating(test_filename + 'unexist', + '1m', + None) + assert data == [] + assert start_ts is None def test_download_pairs_exception(ticker_history, mocker, caplog) -> None: @@ -192,7 +273,7 @@ def test_download_backtesting_testdata(ticker_history, mocker) -> None: # Download a 1 min ticker file file1 = os.path.join(os.path.dirname(__file__), '..', 'testdata', 'XEL_BTC-1m.json') _backup_file(file1) - download_backtesting_testdata(None, pair="XEL/BTC", interval='1m') + download_backtesting_testdata(None, pair="XEL/BTC", tick_interval='1m') assert os.path.isfile(file1) is True _clean_test_file(file1) @@ -200,7 +281,7 @@ def test_download_backtesting_testdata(ticker_history, mocker) -> None: file2 = os.path.join(os.path.dirname(__file__), '..', 'testdata', 'STORJ_BTC-5m.json') _backup_file(file2) - download_backtesting_testdata(None, pair="STORJ/BTC", interval='5m') + download_backtesting_testdata(None, pair="STORJ/BTC", tick_interval='5m') assert os.path.isfile(file2) is True _clean_test_file(file2) @@ -212,8 +293,8 @@ def test_download_backtesting_testdata2(mocker) -> None: ] mocker.patch('freqtrade.misc.file_dump_json', return_value=None) mocker.patch('freqtrade.optimize.__init__.get_ticker_history', return_value=tick) - assert download_backtesting_testdata(None, pair="UNITTEST/BTC", interval='1m') - assert download_backtesting_testdata(None, pair="UNITTEST/BTC", interval='3m') + assert download_backtesting_testdata(None, pair="UNITTEST/BTC", tick_interval='1m') + assert download_backtesting_testdata(None, pair="UNITTEST/BTC", tick_interval='3m') def test_load_tickerdata_file() -> None: diff --git a/scripts/download_backtest_data.py b/scripts/download_backtest_data.py index 472442efb..1c73eae03 100755 --- a/scripts/download_backtest_data.py +++ b/scripts/download_backtest_data.py @@ -43,7 +43,7 @@ for pair in PAIRS: for tick_interval in TICKER_INTERVALS: print(f'downloading pair {pair}, interval {tick_interval}') - data = exchange.get_ticker_history(pair, tick_interval, since=since_time) + data = exchange.get_ticker_history(pair, tick_interval, since_ms=since_time) if not data: print('\tNo data was downloaded') break