stable/freqtrade/data/history.py

432 lines
17 KiB
Python
Raw Normal View History

2018-12-16 08:58:46 +00:00
"""
Handle historic data (ohlcv).
Includes:
2018-12-16 08:58:46 +00:00
* load data for a pair (or a list of pairs) from disk
* download data from exchange and store to disk
"""
import logging
import operator
from datetime import datetime, timezone
2018-12-15 12:54:35 +00:00
from pathlib import Path
2019-12-27 09:25:17 +00:00
from typing import Dict, List, Optional, Tuple
import arrow
2018-12-15 13:28:37 +00:00
from pandas import DataFrame
from freqtrade import OperationalException, misc
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
from freqtrade.data.converter import parse_ticker_dataframe, trades_to_ohlcv
2019-12-25 15:34:27 +00:00
from freqtrade.data.datahandlers import get_datahandler
from freqtrade.data.datahandlers.idatahandler import IDataHandler
2019-12-27 09:25:17 +00:00
from freqtrade.exchange import Exchange
logger = logging.getLogger(__name__)
def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
"""
Trim tickerlist based on given timerange
"""
if not tickerlist:
return tickerlist
start_index = 0
stop_index = len(tickerlist)
if timerange.starttype == 'date':
while (start_index < len(tickerlist) and
tickerlist[start_index][0] < timerange.startts * 1000):
start_index += 1
if timerange.stoptype == 'date':
while (stop_index > 0 and
tickerlist[stop_index-1][0] > timerange.stopts * 1000):
stop_index -= 1
if start_index > stop_index:
raise ValueError(f'The timerange [{timerange.startts},{timerange.stopts}] is incorrect')
return tickerlist[start_index:stop_index]
def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
2019-12-17 22:06:03 +00:00
timerange: Optional[TimeRange] = None) -> List[Dict]:
"""
Load a pair from file, either .json.gz or .json
2019-09-05 20:00:16 +00:00
:return: tickerlist or None if unsuccessful
"""
filename = pair_data_filename(datadir, pair, timeframe)
2019-05-21 17:49:02 +00:00
pairdata = misc.file_load_json(filename)
if not pairdata:
return []
if timerange:
pairdata = trim_tickerlist(pairdata, timerange)
return pairdata
def load_pair_history(pair: str,
timeframe: str,
datadir: Path, *,
2019-10-06 15:10:40 +00:00
timerange: Optional[TimeRange] = None,
2019-06-09 12:40:45 +00:00
fill_up_missing: bool = True,
2019-10-20 12:02:53 +00:00
drop_incomplete: bool = True,
startup_candles: int = 0,
2019-12-25 15:12:20 +00:00
data_format: str = None,
data_handler: IDataHandler = None,
2018-12-16 09:17:11 +00:00
) -> DataFrame:
"""
Load cached ticker history for the given pair.
2019-06-09 12:40:45 +00:00
:param pair: Pair to load data for
:param timeframe: Ticker timeframe (e.g. "5m")
2019-06-09 12:40:45 +00:00
:param datadir: Path to the data storage location.
2019-12-25 15:12:20 +00:00
:param data_format: Format of the data. Ignored if data_handler is set.
2019-06-09 12:40:45 +00:00
:param timerange: Limit data to be loaded to this timerange
:param fill_up_missing: Fill missing values with "No action"-candles
:param drop_incomplete: Drop last candle assuming it may be incomplete.
2019-10-20 12:02:53 +00:00
:param startup_candles: Additional candles to load at the start of the period
:param data_handler: Initialized data-handler to use.
Will be initialized from data_format if not set
:return: DataFrame with ohlcv data, or empty DataFrame
"""
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format, data_handler)
return data_handler.ohlcv_load(pair=pair,
timeframe=timeframe,
timerange=timerange,
fill_missing=fill_up_missing,
drop_incomplete=drop_incomplete,
startup_candles=startup_candles,
)
def load_data(datadir: Path,
timeframe: str,
2019-12-25 15:12:20 +00:00
pairs: List[str], *,
2019-10-06 15:10:40 +00:00
timerange: Optional[TimeRange] = None,
fill_up_missing: bool = True,
2019-10-20 12:02:53 +00:00
startup_candles: int = 0,
fail_without_data: bool = False,
data_format: str = 'json',
) -> Dict[str, DataFrame]:
"""
Load ticker history data for a list of pairs.
2019-10-20 12:02:53 +00:00
:param datadir: Path to the data storage location.
:param timeframe: Ticker Timeframe (e.g. "5m")
2019-10-20 12:02:53 +00:00
:param pairs: List of pairs to load
:param timerange: Limit data to be loaded to this timerange
:param fill_up_missing: Fill missing values with "No action"-candles
:param startup_candles: Additional candles to load at the start of the period
:param fail_without_data: Raise OperationalException if no data is found.
:param data_handler: Initialized data-handler to use.
2019-10-20 12:02:53 +00:00
:return: dict(<pair>:<Dataframe>)
"""
result: Dict[str, DataFrame] = {}
if startup_candles > 0 and timerange:
logger.info(f'Using indicator startup period: {startup_candles} ...')
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
for pair in pairs:
hist = load_pair_history(pair=pair, timeframe=timeframe,
datadir=datadir, timerange=timerange,
fill_up_missing=fill_up_missing,
startup_candles=startup_candles,
data_handler=data_handler
)
if not hist.empty:
result[pair] = hist
if fail_without_data and not result:
raise OperationalException("No data found. Terminating.")
return result
def refresh_data(datadir: Path,
timeframe: str,
pairs: List[str],
exchange: Exchange,
2019-12-25 15:12:20 +00:00
data_format: str = None,
timerange: Optional[TimeRange] = None,
) -> None:
"""
Refresh ticker history data for a list of pairs.
:param datadir: Path to the data storage location.
:param timeframe: Ticker Timeframe (e.g. "5m")
:param pairs: List of pairs to load
:param exchange: Exchange object
:param timerange: Limit data to be loaded to this timerange
"""
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
for pair in pairs:
2019-12-17 11:06:21 +00:00
_download_pair_history(pair=pair, timeframe=timeframe,
datadir=datadir, timerange=timerange,
2019-12-25 15:12:20 +00:00
exchange=exchange, data_handler=data_handler)
def pair_data_filename(datadir: Path, pair: str, timeframe: str) -> Path:
2019-05-21 17:49:02 +00:00
pair_s = pair.replace("/", "_")
filename = datadir.joinpath(f'{pair_s}-{timeframe}.json')
2019-05-21 17:49:02 +00:00
return filename
2019-12-27 09:12:56 +00:00
def _load_cached_data_for_updating(pair: str, timeframe: str, timerange: Optional[TimeRange],
data_handler: IDataHandler) -> Tuple[DataFrame, Optional[int]]:
"""
Load cached data to download more data.
2019-10-06 15:10:40 +00:00
If timerange is passed in, checks whether data from an before the stored data will be
downloaded.
If that's the case then what's available should be completely overwritten.
2019-12-27 09:12:56 +00:00
Otherwise downloads always start at the end of the available data to avoid data gaps.
Note: Only used by download_pair_history().
"""
start = None
if timerange:
if timerange.starttype == 'date':
# TODO: convert to date for conversation
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
# Intentionally don't pass timerange in - since we need to load the full dataset.
data = data_handler.ohlcv_load(pair, timeframe=timeframe,
timerange=None, fill_missing=False,
drop_incomplete=True, warn_no_data=False)
if not data.empty:
2019-12-27 06:07:27 +00:00
if start and start < data.iloc[0]['date']:
# Earlier data than existing data requested, redownload all
2019-12-27 09:11:49 +00:00
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
else:
start = data.iloc[-1]['date']
2019-12-27 06:07:27 +00:00
start_ms = int(start.timestamp() * 1000) if start else None
return data, start_ms
def _download_pair_history(datadir: Path,
2019-12-17 22:06:03 +00:00
exchange: Exchange,
2019-12-25 15:12:20 +00:00
pair: str, *,
timeframe: str = '5m',
2019-12-25 15:12:20 +00:00
timerange: Optional[TimeRange] = None,
data_handler: IDataHandler = None) -> bool:
"""
2019-11-13 10:28:26 +00:00
Download latest candles from the exchange for the pair and timeframe passed in parameters
The data is downloaded starting from the last correct data that
exists in a cache. If timerange starts earlier than the data in the cache,
the full data will be redownloaded
Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
2019-05-17 16:05:36 +00:00
:param pair: pair to download
:param timeframe: Ticker Timeframe (e.g 5m)
:param timerange: range of time to download
2018-12-16 13:14:17 +00:00
:return: bool with success state
"""
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir)
2018-12-16 09:29:53 +00:00
try:
2019-05-17 16:05:36 +00:00
logger.info(
f'Download history data for pair: "{pair}", timeframe: {timeframe} '
2019-05-17 16:05:36 +00:00
f'and store in {datadir}.'
)
# data, since_ms = _load_cached_data_for_updating_old(datadir, pair, timeframe, timerange)
data, since_ms = _load_cached_data_for_updating(pair, timeframe, timerange,
data_handler=data_handler)
logger.debug("Current Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
logger.debug("Current End: %s",
f"{data.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
2018-12-16 09:29:53 +00:00
# Default since_ms to 30 days if nothing is given
2019-12-16 19:12:26 +00:00
new_data = exchange.get_historic_ohlcv(pair=pair,
timeframe=timeframe,
since_ms=since_ms if since_ms else
int(arrow.utcnow().shift(
2019-12-16 19:12:26 +00:00
days=-30).float_timestamp) * 1000
)
# TODO: Maybe move parsing to exchange class (?)
new_dataframe = parse_ticker_dataframe(new_data, timeframe, pair,
fill_missing=False, drop_incomplete=True)
if data.empty:
data = new_dataframe
else:
data = data.append(new_dataframe)
logger.debug("New Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
logger.debug("New End: %s",
f"{data.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
data_handler.ohlcv_store(pair, timeframe, data=data)
2018-12-16 09:29:53 +00:00
return True
2019-05-17 16:05:36 +00:00
2019-05-21 17:49:02 +00:00
except Exception as e:
2019-05-17 16:05:36 +00:00
logger.error(
f'Failed to download history data for pair: "{pair}", timeframe: {timeframe}. '
2019-05-21 17:49:02 +00:00
f'Error: {e}'
2019-05-17 16:05:36 +00:00
)
2019-01-31 05:51:03 +00:00
return False
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str],
2019-12-16 18:43:33 +00:00
datadir: Path, timerange: Optional[TimeRange] = None,
2019-12-25 15:12:20 +00:00
erase=False, data_format: str = None) -> List[str]:
"""
Refresh stored ohlcv data for backtesting and hyperopt operations.
2019-12-16 18:43:33 +00:00
Used by freqtrade download-data subcommand.
:return: List of pairs that are not available.
"""
pairs_not_available = []
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
for pair in pairs:
if pair not in exchange.markets:
pairs_not_available.append(pair)
logger.info(f"Skipping pair {pair}...")
continue
for timeframe in timeframes:
2019-12-16 18:43:33 +00:00
dl_file = pair_data_filename(datadir, pair, timeframe)
if erase and dl_file.exists():
logger.info(
f'Deleting existing data for pair {pair}, interval {timeframe}.')
dl_file.unlink()
logger.info(f'Downloading pair {pair}, interval {timeframe}.')
_download_pair_history(datadir=datadir, exchange=exchange,
pair=pair, timeframe=str(timeframe),
2019-12-25 15:12:20 +00:00
timerange=timerange, data_handler=data_handler)
return pairs_not_available
2019-12-25 15:34:27 +00:00
def _download_trades_history(exchange: Exchange,
pair: str, *,
timerange: Optional[TimeRange] = None,
data_handler: IDataHandler
) -> bool:
2019-08-25 12:30:09 +00:00
"""
Download trade history from the exchange.
Appends to previously downloaded trades data.
"""
2019-08-16 08:51:04 +00:00
try:
since = timerange.startts * 1000 if timerange and timerange.starttype == 'date' else None
2019-12-25 15:34:27 +00:00
trades = data_handler.trades_load(pair)
2019-08-16 08:51:04 +00:00
from_id = trades[-1]['id'] if trades else None
2019-08-25 12:14:31 +00:00
logger.debug("Current Start: %s", trades[0]['datetime'] if trades else 'None')
2019-08-16 08:51:04 +00:00
logger.debug("Current End: %s", trades[-1]['datetime'] if trades else 'None')
2019-12-16 19:12:26 +00:00
# Default since_ms to 30 days if nothing is given
2019-08-25 12:14:31 +00:00
new_trades = exchange.get_historic_trades(pair=pair,
2019-08-25 12:30:09 +00:00
since=since if since else
int(arrow.utcnow().shift(
days=-30).float_timestamp) * 1000,
from_id=from_id,
)
2019-08-25 12:14:31 +00:00
trades.extend(new_trades[1])
2019-12-25 15:34:27 +00:00
data_handler.trades_store(pair, data=trades)
2019-08-16 08:51:04 +00:00
logger.debug("New Start: %s", trades[0]['datetime'])
logger.debug("New End: %s", trades[-1]['datetime'])
logger.info(f"New Amount of trades: {len(trades)}")
return True
2019-08-16 08:51:04 +00:00
except Exception as e:
logger.error(
f'Failed to download historic trades for pair: "{pair}". '
f'Error: {e}'
)
return False
def refresh_backtest_trades_data(exchange: Exchange, pairs: List[str], datadir: Path,
2019-12-25 15:34:27 +00:00
timerange: TimeRange, erase=False,
data_format: str = 'jsongz') -> List[str]:
2019-08-27 05:13:50 +00:00
"""
2019-12-16 18:43:33 +00:00
Refresh stored trades data for backtesting and hyperopt operations.
Used by freqtrade download-data subcommand.
:return: List of pairs that are not available.
2019-08-27 05:13:50 +00:00
"""
pairs_not_available = []
2019-12-25 15:34:27 +00:00
data_handler = get_datahandler(datadir, data_format=data_format)
2019-08-27 05:13:50 +00:00
for pair in pairs:
if pair not in exchange.markets:
pairs_not_available.append(pair)
logger.info(f"Skipping pair {pair}...")
continue
if erase:
if data_handler.trades_purge(pair):
logger.info(f'Deleting existing data for pair {pair}.')
2019-08-27 05:13:50 +00:00
logger.info(f'Downloading trades for pair {pair}.')
2019-12-26 09:22:38 +00:00
_download_trades_history(exchange=exchange,
pair=pair,
2019-12-25 15:34:27 +00:00
timerange=timerange,
data_handler=data_handler)
2019-08-27 05:13:50 +00:00
return pairs_not_available
2019-10-13 17:21:27 +00:00
def convert_trades_to_ohlcv(pairs: List[str], timeframes: List[str],
datadir: Path, timerange: TimeRange, erase=False) -> None:
"""
Convert stored trades data to ohlcv data
"""
2019-12-25 15:34:27 +00:00
data_handler_trades = get_datahandler(datadir, data_format='jsongz')
data_handler_ohlcv = get_datahandler(datadir, data_format='json')
for pair in pairs:
2019-12-25 15:34:27 +00:00
trades = data_handler_trades.trades_load(pair)
for timeframe in timeframes:
2019-12-26 09:22:38 +00:00
if erase:
if data_handler_ohlcv.ohlcv_purge(pair, timeframe):
logger.info(f'Deleting existing data for pair {pair}, interval {timeframe}.')
2019-10-13 17:21:27 +00:00
ohlcv = trades_to_ohlcv(trades, timeframe)
# Store ohlcv
2019-12-25 15:34:27 +00:00
data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv)
2019-12-17 22:06:03 +00:00
def get_timerange(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
"""
2019-12-17 22:06:03 +00:00
Get the maximum common timerange for the given backtest data.
:param data: dictionary with preprocessed backtesting data
:return: tuple containing min_date, max_date
"""
2019-12-17 22:06:03 +00:00
timeranges = [
(arrow.get(frame['date'].min()), arrow.get(frame['date'].max()))
for frame in data.values()
]
2019-12-17 22:06:03 +00:00
return (min(timeranges, key=operator.itemgetter(0))[0],
max(timeranges, key=operator.itemgetter(1))[1])
def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime,
2019-12-11 06:12:37 +00:00
max_date: datetime, timeframe_min: int) -> bool:
"""
Validates preprocessed backtesting data for missing values and shows warnings about it that.
:param data: preprocessed backtesting data (as DataFrame)
:param pair: pair used for log output.
:param min_date: start-date of the data
:param max_date: end-date of the data
2019-12-11 06:12:37 +00:00
:param timeframe_min: ticker Timeframe in minutes
"""
# total difference in minutes / timeframe-minutes
2019-12-11 06:12:37 +00:00
expected_frames = int((max_date - min_date).total_seconds() // 60 // timeframe_min)
found_missing = False
dflen = len(data)
if dflen < expected_frames:
found_missing = True
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
pair, expected_frames, dflen, expected_frames - dflen)
return found_missing