Merge pull request #637 from arudov/fix/dl-testdata-period2

Time-range download of backtesting data
This commit is contained in:
Michael Egger 2018-05-07 17:19:54 +02:00 committed by GitHub
commit 1dbdb880e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 352 additions and 76 deletions

View File

@ -36,7 +36,7 @@ python3 ./freqtrade/main.py backtesting --realistic-simulation
python3 ./freqtrade/main.py backtesting --realistic-simulation --ticker-interval 1m
```
**Reload your testdata files**
**Update cached pairs with the latest data**
```bash
python3 ./freqtrade/main.py backtesting --realistic-simulation --refresh-pairs-cached
```
@ -80,12 +80,9 @@ The full timerange specification:
- Use last 123 tickframes of data: `--timerange=-123`
- Use first 123 tickframes of data: `--timerange=123-`
- Use tickframes from line 123 through 456: `--timerange=123-456`
Incoming feature, not implemented yet:
- `--timerange=-20180131`
- `--timerange=20180101-`
- `--timerange=20180101-20181231`
- Use tickframes till 2018/01/31: `--timerange=-20180131`
- Use tickframes since 2018/01/31: `--timerange=20180131-`
- Use tickframes since 2018/01/31 till 2018/03/01 : `--timerange=20180131-20180301`
**Update testdata directory**

View File

@ -129,7 +129,7 @@ optional arguments:
world limitations
-r, --refresh-pairs-cached
refresh the pairs files in tests/testdata with
the latest data from Bittrex. Use it if you want
the latest data from the exchange. Use it if you want
to run your backtesting with up-to-date data.
```

View File

@ -6,6 +6,7 @@ import argparse
import logging
import os
import re
import arrow
from typing import List, Tuple, Optional
from freqtrade import __version__, constants
@ -123,7 +124,7 @@ class Arguments(object):
)
parser.add_argument(
'-r', '--refresh-pairs-cached',
help='refresh the pairs files in tests/testdata with the latest data from Bittrex. \
help='refresh the pairs files in tests/testdata with the latest data from the exchange. \
Use it if you want to run your backtesting with up-to-date data.',
action='store_true',
dest='refresh_pairs',
@ -234,12 +235,16 @@ class Arguments(object):
stop = None
if stype[0]:
start = rvals[index]
if stype[0] != 'date':
if stype[0] == 'date':
start = arrow.get(start, 'YYYYMMDD').timestamp
else:
start = int(start)
index += 1
if stype[1]:
stop = rvals[index]
if stype[1] != 'date':
if stype[1] == 'date':
stop = arrow.get(stop, 'YYYYMMDD').timestamp
else:
stop = int(stop)
return stype, start, stop
raise Exception('Incorrect syntax for timerange "%s"' % text)
@ -271,3 +276,17 @@ class Arguments(object):
help='Export files to given dir',
dest='export',
default=None)
self.parser.add_argument(
'--days',
help='Download data for number of days',
dest='days',
type=int,
default=None)
self.parser.add_argument(
'--exchange',
help='Exchange name',
dest='exchange',
type=str,
default='bittrex')

View File

@ -8,8 +8,7 @@ from datetime import datetime
import ccxt
import arrow
from freqtrade import OperationalException, DependencyException, TemporaryError
from freqtrade import constants, OperationalException, DependencyException, TemporaryError
logger = logging.getLogger(__name__)
@ -279,9 +278,33 @@ def get_ticker(pair: str, refresh: Optional[bool] = True) -> dict:
@retrier
def get_ticker_history(pair: str, tick_interval: str) -> List[Dict]:
def get_ticker_history(pair: str, tick_interval: str, since_ms: Optional[int] = None) -> List[Dict]:
try:
return _API.fetch_ohlcv(pair, timeframe=tick_interval)
# last item should be in the time interval [now - tick_interval, now]
till_time_ms = arrow.utcnow().shift(
minutes=-constants.TICKER_INTERVAL_MINUTES[tick_interval]
).timestamp * 1000
# it looks as if some exchanges return cached data
# and they update it one in several minute, so 10 mins interval
# is necessary to skeep downloading of an empty array when all
# chached data was already downloaded
till_time_ms = min(till_time_ms, arrow.utcnow().shift(minutes=-10).timestamp * 1000)
data = []
while not since_ms or since_ms < till_time_ms:
data_part = _API.fetch_ohlcv(pair, timeframe=tick_interval, since=since_ms)
if not data_part:
break
logger.info('Downloaded data for time range [%s, %s]',
arrow.get(data_part[0][0] / 1000).format(),
arrow.get(data_part[-1][0] / 1000).format())
data.extend(data_part)
since_ms = data[-1][0] + 1
return data
except ccxt.NotSupported as e:
raise OperationalException(
'Exchange {} does not support fetching historical candlestick data.'

View File

@ -4,25 +4,46 @@ import gzip
import json
import logging
import os
import arrow
from typing import Optional, List, Dict, Tuple
from freqtrade import misc
from freqtrade import misc, constants
from freqtrade.exchange import get_ticker_history
from user_data.hyperopt_conf import hyperopt_optimize_conf
logger = logging.getLogger(__name__)
def trim_tickerlist(tickerlist: List[Dict], timerange: Tuple[Tuple, int, int]) -> List[Dict]:
stype, start, stop = timerange
if stype == (None, 'line'):
return tickerlist[stop:]
elif stype == ('line', None):
return tickerlist[0:start]
elif stype == ('index', 'index'):
return tickerlist[start:stop]
if not tickerlist:
return tickerlist
return tickerlist
stype, start, stop = timerange
start_index = 0
stop_index = len(tickerlist)
if stype[0] == 'line':
stop_index = start
if stype[0] == 'index':
start_index = start
elif stype[0] == 'date':
while tickerlist[start_index][0] < start * 1000:
start_index += 1
if stype[1] == 'line':
start_index = len(tickerlist) + stop
if stype[1] == 'index':
stop_index = stop
elif stype[1] == 'date':
while tickerlist[stop_index-1][0] > stop * 1000:
stop_index -= 1
if start_index > stop_index:
raise ValueError(f'The timerange [{start},{stop}] is incorrect')
return tickerlist[start_index:stop_index]
def load_tickerdata_file(
@ -75,13 +96,16 @@ def load_data(datadir: str,
# If the user force the refresh of pairs
if refresh_pairs:
logger.info('Download data for all pairs and store them in %s', datadir)
download_pairs(datadir, _pairs, ticker_interval)
download_pairs(datadir, _pairs, ticker_interval, timerange=timerange)
for pair in _pairs:
pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
if not pairdata:
# download the tickerdata from exchange
download_backtesting_testdata(datadir, pair=pair, interval=ticker_interval)
download_backtesting_testdata(datadir,
pair=pair,
tick_interval=ticker_interval,
timerange=timerange)
# and retry reading the pair
pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
result[pair] = pairdata
@ -97,11 +121,16 @@ def make_testdata_path(datadir: str) -> str:
)
def download_pairs(datadir, pairs: List[str], ticker_interval: str) -> bool:
def download_pairs(datadir, pairs: List[str],
ticker_interval: str,
timerange: Optional[Tuple[Tuple, int, int]] = None) -> bool:
"""For each pairs passed in parameters, download the ticker intervals"""
for pair in pairs:
try:
download_backtesting_testdata(datadir, pair=pair, interval=ticker_interval)
download_backtesting_testdata(datadir,
pair=pair,
tick_interval=ticker_interval,
timerange=timerange)
except BaseException:
logger.info(
'Failed to download the pair: "%s", Interval: %s',
@ -112,39 +141,85 @@ def download_pairs(datadir, pairs: List[str], ticker_interval: str) -> bool:
return True
# FIX: 20180110, suggest rename interval to tick_interval
def download_backtesting_testdata(datadir: str, pair: str, interval: str = '5m') -> None:
def load_cached_data_for_updating(filename: str,
tick_interval: str,
timerange: Optional[Tuple[Tuple, int, int]]) -> Tuple[list, int]:
"""
Download the latest 1 and 5 ticker intervals from Bittrex for the pairs passed in parameters
Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
Load cached data and choose what part of the data should be updated
"""
path = make_testdata_path(datadir)
logger.info(
'Download the pair: "%s", Interval: %s', pair, interval
)
since_ms = None
filename = os.path.join(path, '{pair}-{interval}.json'.format(
pair=pair.replace("/", "_"),
interval=interval,
))
# user sets timerange, so find the start time
if timerange:
if timerange[0][0] == 'date':
since_ms = timerange[1] * 1000
elif timerange[0][1] == 'line':
num_minutes = timerange[2] * constants.TICKER_INTERVAL_MINUTES[tick_interval]
since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000
# read the cached file
if os.path.isfile(filename):
with open(filename, "rt") as file:
data = json.load(file)
# remove the last item, because we are not sure if it is correct
# it could be fetched when the candle was incompleted
if data:
data.pop()
else:
data = []
logger.debug('Current Start: %s', data[0][0] if data else None)
logger.debug('Current End: %s', data[-1:][0][0] if data else None)
if data:
if since_ms and since_ms < data[0][0]:
# the data is requested for earlier period than the cache has
# so fully redownload all the data
data = []
else:
# a part of the data was already downloaded, so
# download unexist data only
since_ms = data[-1][0] + 1
# Extend data with new ticker history
data.extend([
row for row in get_ticker_history(pair=pair, tick_interval=interval)
if row not in data
])
return (data, since_ms)
def download_backtesting_testdata(datadir: str,
pair: str,
tick_interval: str = '5m',
timerange: Optional[Tuple[Tuple, int, int]] = None) -> None:
"""
Download the latest ticker intervals from the exchange for the pairs passed in parameters
The data is downloaded starting from the last correct ticker interval data that
esists in a cache. If timerange starts earlier than the data in the cache,
the full data will be redownloaded
Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
:param pairs: list of pairs to download
:param tick_interval: ticker interval
:param timerange: range of time to download
:return: None
"""
path = make_testdata_path(datadir)
filepair = pair.replace("/", "_")
filename = os.path.join(path, f'{filepair}-{tick_interval}.json')
logger.info(
'Download the pair: "%s", Interval: %s',
pair,
tick_interval
)
data, since_ms = load_cached_data_for_updating(filename, tick_interval, timerange)
logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
new_data = get_ticker_history(pair=pair, tick_interval=tick_interval, since_ms=since_ms)
data.extend(new_data)
logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))
logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))
data = sorted(data, key=lambda _data: _data[0])
logger.debug('New Start: %s', data[0][0])
logger.debug('New End: %s', data[-1:][0][0])
misc.file_dump_json(filename, data)

View File

@ -324,6 +324,15 @@ def test_get_ticker(default_conf, mocker):
get_ticker(pair='ETH/BTC', refresh=True)
def make_fetch_ohlcv_mock(data):
def fetch_ohlcv_mock(pair, timeframe, since):
if since:
assert since > data[-1][0]
return []
return data
return fetch_ohlcv_mock
def test_get_ticker_history(default_conf, mocker):
api_mock = MagicMock()
tick = [
@ -337,7 +346,7 @@ def test_get_ticker_history(default_conf, mocker):
]
]
type(api_mock).has = PropertyMock(return_value={'fetchOHLCV': True})
api_mock.fetch_ohlcv = MagicMock(return_value=tick)
api_mock.fetch_ohlcv = MagicMock(side_effect=make_fetch_ohlcv_mock(tick))
mocker.patch('freqtrade.exchange._API', api_mock)
# retrieve original ticker
@ -360,7 +369,7 @@ def test_get_ticker_history(default_conf, mocker):
10, # volume (in quote currency)
]
]
api_mock.fetch_ohlcv = MagicMock(return_value=new_tick)
api_mock.fetch_ohlcv = MagicMock(side_effect=make_fetch_ohlcv_mock(new_tick))
mocker.patch('freqtrade.exchange._API', api_mock)
ticks = get_ticker_history('ETH/BTC', default_conf['ticker_interval'])

View File

@ -3,12 +3,14 @@
import json
import os
import uuid
import arrow
from shutil import copyfile
from freqtrade import optimize
from freqtrade.misc import file_dump_json
from freqtrade.optimize.__init__ import make_testdata_path, download_pairs, \
download_backtesting_testdata, load_tickerdata_file, trim_tickerlist
download_backtesting_testdata, load_tickerdata_file, trim_tickerlist, \
load_cached_data_for_updating
from freqtrade.tests.conftest import log_has
# Change this if modifying UNITTEST/BTC testdatafile
@ -145,6 +147,109 @@ def test_download_pairs(ticker_history, mocker) -> None:
_clean_test_file(file2_5)
def test_load_cached_data_for_updating(mocker) -> None:
datadir = os.path.join(os.path.dirname(__file__), '..', 'testdata')
test_data = None
test_filename = os.path.join(datadir, 'UNITTEST_BTC-1m.json')
with open(test_filename, "rt") as file:
test_data = json.load(file)
# change now time to test 'line' cases
# now = last cached item + 1 hour
now_ts = test_data[-1][0] / 1000 + 60 * 60
mocker.patch('arrow.utcnow', return_value=arrow.get(now_ts))
# timeframe starts earlier than the cached data
# should fully update data
timerange = (('date', None), test_data[0][0] / 1000 - 1, None)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == []
assert start_ts == test_data[0][0] - 1000
# same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
((None, 'line'), None, -num_lines))
assert data == []
assert start_ts < test_data[0][0] - 1
# timeframe starts in the center of the cached data
# should return the chached data w/o the last item
timerange = (('date', None), test_data[0][0] / 1000 + 1, None)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30
timerange = ((None, 'line'), None, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# timeframe starts after the chached data
# should return the chached data w/o the last item
timerange = (('date', None), test_data[-1][0] / 1000 + 1, None)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe
num_lines = 30
timerange = ((None, 'line'), None, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# no timeframe is set
# should return the chached data w/o the last item
num_lines = 30
timerange = ((None, 'line'), None, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename,
'1m',
timerange)
assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0]
# no datafile exist
# should return timestamp start time
timerange = (('date', None), now_ts - 10000, None)
data, start_ts = load_cached_data_for_updating(test_filename + 'unexist',
'1m',
timerange)
assert data == []
assert start_ts == (now_ts - 10000) * 1000
# same with 'line' timeframe
num_lines = 30
timerange = ((None, 'line'), None, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename + 'unexist',
'1m',
timerange)
assert data == []
assert start_ts == (now_ts - num_lines * 60) * 1000
# no datafile exist, no timeframe is set
# should return an empty array and None
data, start_ts = load_cached_data_for_updating(test_filename + 'unexist',
'1m',
None)
assert data == []
assert start_ts is None
def test_download_pairs_exception(ticker_history, mocker, caplog) -> None:
mocker.patch('freqtrade.optimize.__init__.get_ticker_history', return_value=ticker_history)
mocker.patch('freqtrade.optimize.__init__.download_backtesting_testdata',
@ -168,7 +273,7 @@ def test_download_backtesting_testdata(ticker_history, mocker) -> None:
# Download a 1 min ticker file
file1 = os.path.join(os.path.dirname(__file__), '..', 'testdata', 'XEL_BTC-1m.json')
_backup_file(file1)
download_backtesting_testdata(None, pair="XEL/BTC", interval='1m')
download_backtesting_testdata(None, pair="XEL/BTC", tick_interval='1m')
assert os.path.isfile(file1) is True
_clean_test_file(file1)
@ -176,7 +281,7 @@ def test_download_backtesting_testdata(ticker_history, mocker) -> None:
file2 = os.path.join(os.path.dirname(__file__), '..', 'testdata', 'STORJ_BTC-5m.json')
_backup_file(file2)
download_backtesting_testdata(None, pair="STORJ/BTC", interval='5m')
download_backtesting_testdata(None, pair="STORJ/BTC", tick_interval='5m')
assert os.path.isfile(file2) is True
_clean_test_file(file2)
@ -188,8 +293,9 @@ def test_download_backtesting_testdata2(mocker) -> None:
]
json_dump_mock = mocker.patch('freqtrade.misc.file_dump_json', return_value=None)
mocker.patch('freqtrade.optimize.__init__.get_ticker_history', return_value=tick)
download_backtesting_testdata(None, pair="UNITTEST/BTC", interval='1m')
download_backtesting_testdata(None, pair="UNITTEST/BTC", interval='3m')
download_backtesting_testdata(None, pair="UNITTEST/BTC", tick_interval='1m')
download_backtesting_testdata(None, pair="UNITTEST/BTC", tick_interval='3m')
assert json_dump_mock.call_count == 2
@ -222,12 +328,12 @@ def test_trim_tickerlist() -> None:
ticker_list_len = len(ticker_list)
# Test the pattern ^(-\d+)$
# This pattern remove X element from the beginning
timerange = ((None, 'line'), None, 5)
# This pattern uses the latest N elements
timerange = ((None, 'line'), None, -5)
ticker = trim_tickerlist(ticker_list, timerange)
ticker_len = len(ticker)
assert ticker_list_len == ticker_len + 5
assert ticker_len == 5
assert ticker_list[0] is not ticker[0] # The first element should be different
assert ticker_list[-1] is ticker[-1] # The last element must be the same
@ -252,6 +358,37 @@ def test_trim_tickerlist() -> None:
assert ticker_list[5] is ticker[0] # The list starts at the index 5
assert ticker_list[9] is ticker[-1] # The list ends at the index 9 (5 elements)
# Test the pattern ^(\d{8})-(\d{8})$
# This pattern extract a window between the dates
timerange = (('date', 'date'), ticker_list[5][0] / 1000, ticker_list[10][0] / 1000 - 1)
ticker = trim_tickerlist(ticker_list, timerange)
ticker_len = len(ticker)
assert ticker_len == 5
assert ticker_list[0] is not ticker[0] # The first element should be different
assert ticker_list[5] is ticker[0] # The list starts at the index 5
assert ticker_list[9] is ticker[-1] # The list ends at the index 9 (5 elements)
# Test the pattern ^-(\d{8})$
# This pattern extracts elements from the start to the date
timerange = ((None, 'date'), None, ticker_list[10][0] / 1000 - 1)
ticker = trim_tickerlist(ticker_list, timerange)
ticker_len = len(ticker)
assert ticker_len == 10
assert ticker_list[0] is ticker[0] # The start of the list is included
assert ticker_list[9] is ticker[-1] # The element 10 is not included
# Test the pattern ^(\d{8})-$
# This pattern extracts elements from the date to now
timerange = (('date', None), ticker_list[10][0] / 1000 - 1, None)
ticker = trim_tickerlist(ticker_list, timerange)
ticker_len = len(ticker)
assert ticker_len == ticker_list_len - 10
assert ticker_list[10] is ticker[0] # The first element is element #10
assert ticker_list[-1] is ticker[-1] # The last element is the same
# Test a wrong pattern
# This pattern must return the list unchanged
timerange = ((None, None), None, 5)

View File

@ -109,6 +109,13 @@ def test_parse_args_dynamic_whitelist_invalid_values() -> None:
def test_parse_timerange_incorrect() -> None:
assert ((None, 'line'), None, -200) == Arguments.parse_timerange('-200')
assert (('line', None), 200, None) == Arguments.parse_timerange('200-')
assert (('index', 'index'), 200, 500) == Arguments.parse_timerange('200-500')
assert (('date', None), 1274486400, None) == Arguments.parse_timerange('20100522-')
assert ((None, 'date'), None, 1274486400) == Arguments.parse_timerange('-20100522')
timerange = Arguments.parse_timerange('20100522-20150730')
assert timerange == (('date', 'date'), 1274486400, 1438214400)
with pytest.raises(Exception, match=r'Incorrect syntax.*'):
Arguments.parse_timerange('-')

View File

@ -21,9 +21,8 @@ from typing import List, Dict
import gzip
from freqtrade.arguments import Arguments
from freqtrade import misc
from freqtrade import misc, constants
from pandas import DataFrame
from freqtrade.constants import Constants
import dateutil.parser
@ -139,7 +138,7 @@ def convert_main(args: Namespace) -> None:
# default to adding 'm' to end of minutes for new interval name
interval = str(minutes) + 'm'
# but check if there is a mapping between int and string also
for str_interval, minutes_interval in Constants.TICKER_INTERVAL_MINUTES.items():
for str_interval, minutes_interval in constants.TICKER_INTERVAL_MINUTES.items():
if minutes_interval == minutes:
interval = str_interval
break

View File

@ -4,6 +4,7 @@
import json
import sys
import os
import arrow
from freqtrade import (exchange, arguments, misc)
@ -25,17 +26,33 @@ dl_path = DEFAULT_DL_PATH
if args.export and os.path.exists(args.export):
dl_path = args.export
since_time = None
if args.days:
since_time = arrow.utcnow().shift(days=-args.days).timestamp * 1000
print(f'About to download pairs: {PAIRS} to {dl_path}')
# Init Bittrex exchange
# Init exchange
exchange._API = exchange.init_ccxt({'key': '',
'secret': '',
'name': 'bittrex'})
'name': args.exchange})
for pair in PAIRS:
for tick_interval in TICKER_INTERVALS:
print(f'downloading pair {pair}, interval {tick_interval}')
data = exchange.get_ticker_history(pair, tick_interval)
data = exchange.get_ticker_history(pair, tick_interval, since_ms=since_time)
if not data:
print('\tNo data was downloaded')
break
print('\tData was downloaded for period %s - %s' % (
arrow.get(data[0][0] / 1000).format(),
arrow.get(data[-1][0] / 1000).format()))
# save data
pair_print = pair.replace('/', '_')
filename = f'{pair_print}-{tick_interval}.json'
misc.file_dump_json(os.path.join(dl_path, filename), data)

View File

@ -24,21 +24,14 @@ import plotly.graph_objs as go
from freqtrade.arguments import Arguments
from freqtrade.configuration import Configuration
from freqtrade.analyze import Analyze
<<<<<<< HEAD
from freqtrade.constants import Constants
=======
>>>>>>> bddf009a2b6d0e1a19cca558887ce972e99a6238
from freqtradeimport constants
import freqtrade.optimize as optimize
import freqtrade.misc as misc
<<<<<<< HEAD
logger = logging.getLogger('freqtrade')
=======
logger = logging.getLogger(__name__)
>>>>>>> bddf009a2b6d0e1a19cca558887ce972e99a6238
# data:: [ pair, profit-%, enter, exit, time, duration]
# data:: ["ETH/BTC", 0.0023975, "1515598200", "1515602100", "2018-01-10 07:30:00+00:00", 65]
@ -198,7 +191,7 @@ def define_index(min_date: int, max_date: int, interval: str) -> int:
"""
Return the index of a specific date
"""
interval_minutes = Constants.TICKER_INTERVAL_MINUTES[interval]
interval_minutes = constants.TICKER_INTERVAL_MINUTES[interval]
return int((max_date - min_date) / (interval_minutes * 60))