stable/freqtrade/optimize/__init__.py

294 lines
10 KiB
Python
Raw Normal View History

# pragma pylint: disable=missing-docstring
2018-03-17 21:44:47 +00:00
import gzip
try:
import ujson as json
_UJSON = True
except ImportError:
# see mypy/issues/1153
import json # type: ignore
_UJSON = False
2018-03-25 19:37:14 +00:00
import logging
import os
2018-10-18 17:42:54 +00:00
from datetime import datetime
2018-05-30 20:38:09 +00:00
from typing import Optional, List, Dict, Tuple, Any
import operator
import arrow
from pandas import DataFrame
2018-01-18 07:10:48 +00:00
from freqtrade import misc, constants, OperationalException
2018-06-17 10:41:33 +00:00
from freqtrade.exchange import Exchange
2018-06-05 21:34:26 +00:00
from freqtrade.arguments import TimeRange
2018-03-25 19:37:14 +00:00
logger = logging.getLogger(__name__)
def json_load(data):
"""Try to load data with ujson"""
if _UJSON:
return json.load(data, precise_float=True)
else:
return json.load(data)
2018-06-05 21:34:26 +00:00
def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
2018-04-27 22:40:48 +00:00
if not tickerlist:
return tickerlist
start_index = 0
stop_index = len(tickerlist)
2018-06-05 21:34:26 +00:00
if timerange.starttype == 'line':
stop_index = timerange.startts
if timerange.starttype == 'index':
start_index = timerange.startts
elif timerange.starttype == 'date':
while (start_index < len(tickerlist) and
tickerlist[start_index][0] < timerange.startts * 1000):
start_index += 1
2018-06-05 21:34:26 +00:00
if timerange.stoptype == 'line':
start_index = len(tickerlist) + timerange.stopts
if timerange.stoptype == 'index':
stop_index = timerange.stopts
elif timerange.stoptype == 'date':
while (stop_index > 0 and
tickerlist[stop_index-1][0] > timerange.stopts * 1000):
stop_index -= 1
if start_index > stop_index:
2018-06-05 21:34:26 +00:00
raise ValueError(f'The timerange [{timerange.startts},{timerange.stopts}] is incorrect')
return tickerlist[start_index:stop_index]
2018-01-15 21:25:02 +00:00
def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
"""
Get the maximum timeframe for the given backtest data
:param data: dictionary with preprocessed backtesting data
:return: tuple containing min_date, max_date
"""
timeframe = [
(arrow.get(frame['date'].min()), arrow.get(frame['date'].max()))
for frame in data.values()
]
return min(timeframe, key=operator.itemgetter(0))[0], \
max(timeframe, key=operator.itemgetter(1))[1]
2018-10-18 17:42:54 +00:00
def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime,
max_date: datetime, ticker_interval_mins: int) -> bool:
2018-10-18 17:42:54 +00:00
"""
Validates preprocessed backtesting data for missing values and shows warnings about it that.
:param data: dictionary with preprocessed backtesting data
:param min_date: start-date of the data
:param max_date: end-date of the data
:param ticker_interval_mins: ticker interval in minutes
"""
# total difference in minutes / interval-minutes
expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins)
found_missing = False
2018-10-18 17:42:54 +00:00
for pair, df in data.items():
2018-10-18 18:05:57 +00:00
dflen = len(df)
if dflen < expected_frames:
found_missing = True
2018-10-18 18:05:57 +00:00
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
pair, expected_frames, dflen, expected_frames - dflen)
return found_missing
2018-10-18 17:42:54 +00:00
2018-03-17 21:43:36 +00:00
def load_tickerdata_file(
datadir: str, pair: str,
ticker_interval: str,
2018-06-05 21:53:49 +00:00
timerange: Optional[TimeRange] = None) -> Optional[List[Dict]]:
2018-01-05 09:20:48 +00:00
"""
Load a pair from file,
:return dict OR empty if unsuccesful
"""
path = make_testdata_path(datadir)
2018-06-14 05:31:29 +00:00
pair_s = pair.replace('/', '_')
file = os.path.join(path, f'{pair_s}-{ticker_interval}.json')
gzipfile = file + '.gz'
# If the file does not exist we download it when None is returned.
# If file exists, read the file, load the json
if os.path.isfile(gzipfile):
logger.debug('Loading ticker data from file %s', gzipfile)
with gzip.open(gzipfile) as tickerdata:
pairdata = json.load(tickerdata)
elif os.path.isfile(file):
logger.debug('Loading ticker data from file %s', file)
with open(file) as tickerdata:
pairdata = json.load(tickerdata)
else:
2018-01-05 09:20:48 +00:00
return None
2018-01-15 21:25:02 +00:00
if timerange:
pairdata = trim_tickerlist(pairdata, timerange)
2018-01-05 09:20:48 +00:00
return pairdata
def load_data(datadir: str,
ticker_interval: str,
2018-06-15 07:45:19 +00:00
pairs: List[str],
2018-03-17 21:43:36 +00:00
refresh_pairs: Optional[bool] = False,
exchange: Optional[Exchange] = None,
timerange: TimeRange = TimeRange(None, None, 0, 0)) -> Dict[str, List]:
"""
Loads ticker history data for the given parameters
:return: dict
"""
result = {}
# If the user force the refresh of pairs
if refresh_pairs:
logger.info('Download data for all pairs and store them in %s', datadir)
if not exchange:
raise OperationalException("Exchange needs to be initialized when "
"calling load_data with refresh_pairs=True")
download_pairs(datadir, exchange, pairs, ticker_interval, timerange=timerange)
2018-06-15 07:45:19 +00:00
for pair in pairs:
2018-01-15 21:25:02 +00:00
pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
if pairdata:
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair,
arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair,
arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
result[pair] = pairdata
else:
logger.warning(
'No data for pair: "%s", Interval: %s. '
'Use --refresh-pairs-cached to download the data',
pair,
ticker_interval
)
2018-05-21 20:15:01 +00:00
return result
def make_testdata_path(datadir: str) -> str:
"""Return the path where testdata files are stored"""
return datadir or os.path.abspath(
os.path.join(
os.path.dirname(__file__), '..', 'tests', 'testdata'
)
)
def download_pairs(datadir, exchange: Exchange, pairs: List[str],
ticker_interval: str,
timerange: TimeRange = TimeRange(None, None, 0, 0)) -> bool:
"""For each pairs passed in parameters, download the ticker intervals"""
for pair in pairs:
try:
2018-04-27 21:30:42 +00:00
download_backtesting_testdata(datadir,
exchange=exchange,
2018-04-27 21:30:42 +00:00
pair=pair,
tick_interval=ticker_interval,
2018-04-27 21:30:42 +00:00
timerange=timerange)
except BaseException:
2018-03-02 15:22:00 +00:00
logger.info(
'Failed to download the pair: "%s", Interval: %s',
2018-03-02 15:22:00 +00:00
pair,
ticker_interval
)
return False
return True
def load_cached_data_for_updating(filename: str,
tick_interval: str,
2018-06-05 21:34:26 +00:00
timerange: Optional[TimeRange]) -> Tuple[
2018-05-30 20:38:09 +00:00
List[Any],
Optional[int]]:
"""
Load cached data and choose what part of the data should be updated
"""
since_ms = None
# user sets timerange, so find the start time
if timerange:
2018-06-05 21:34:26 +00:00
if timerange.starttype == 'date':
since_ms = timerange.startts * 1000
elif timerange.stoptype == 'line':
num_minutes = timerange.stopts * constants.TICKER_INTERVAL_MINUTES[tick_interval]
since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000
# read the cached file
if os.path.isfile(filename):
with open(filename, "rt") as file:
data = json_load(file)
# remove the last item, because we are not sure if it is correct
# it could be fetched when the candle was incompleted
if data:
data.pop()
else:
data = []
2018-04-27 21:30:42 +00:00
if data:
if since_ms and since_ms < data[0][0]:
# the data is requested for earlier period than the cache has
# so fully redownload all the data
data = []
else:
# a part of the data was already downloaded, so
# download unexist data only
since_ms = data[-1][0] + 1
return (data, since_ms)
def download_backtesting_testdata(datadir: str,
2018-06-17 10:41:33 +00:00
exchange: Exchange,
pair: str,
tick_interval: str = '5m',
2018-06-05 21:34:26 +00:00
timerange: Optional[TimeRange] = None) -> None:
2018-05-03 08:16:29 +00:00
"""
2018-08-10 08:19:26 +00:00
Download the latest ticker intervals from the exchange for the pair passed in parameters
The data is downloaded starting from the last correct ticker interval data that
2018-08-10 08:19:26 +00:00
exists in a cache. If timerange starts earlier than the data in the cache,
the full data will be redownloaded
Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
2018-08-10 08:19:26 +00:00
:param pair: pair to download
:param tick_interval: ticker interval
:param timerange: range of time to download
2018-05-03 08:16:29 +00:00
:return: None
"""
path = make_testdata_path(datadir)
filepair = pair.replace("/", "_")
filename = os.path.join(path, f'{filepair}-{tick_interval}.json')
2018-03-02 15:22:00 +00:00
logger.info(
'Download the pair: "%s", Interval: %s',
2018-03-02 15:22:00 +00:00
pair,
tick_interval
2018-03-02 15:22:00 +00:00
)
data, since_ms = load_cached_data_for_updating(filename, tick_interval, timerange)
logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
# Default since_ms to 30 days if nothing is given
2018-08-10 09:08:28 +00:00
new_data = exchange.get_history(pair=pair, tick_interval=tick_interval,
since_ms=since_ms if since_ms
else
int(arrow.utcnow().shift(days=-30).float_timestamp) * 1000)
data.extend(new_data)
2018-03-25 11:38:17 +00:00
logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))
logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))
2018-01-11 14:49:04 +00:00
misc.file_dump_json(filename, data)