stable/freqtrade/data/history/history_utils.py

481 lines
21 KiB
Python
Raw Normal View History

import logging
import operator
from datetime import datetime, timezone
2018-12-15 12:54:35 +00:00
from pathlib import Path
2019-12-27 09:25:17 +00:00
from typing import Dict, List, Optional, Tuple
import arrow
2022-02-01 18:11:51 +00:00
from pandas import DataFrame, concat
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
2020-09-28 17:39:41 +00:00
from freqtrade.data.converter import (clean_ohlcv_dataframe, ohlcv_to_dataframe,
trades_remove_duplicates, trades_to_ohlcv)
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
2021-12-03 13:11:24 +00:00
from freqtrade.enums import CandleType
from freqtrade.exceptions import OperationalException
2019-12-27 09:25:17 +00:00
from freqtrade.exchange import Exchange
2020-04-01 18:31:21 +00:00
from freqtrade.misc import format_ms_time
2020-09-28 17:39:41 +00:00
logger = logging.getLogger(__name__)
def load_pair_history(pair: str,
timeframe: str,
datadir: Path, *,
2019-10-06 15:10:40 +00:00
timerange: Optional[TimeRange] = None,
2019-06-09 12:40:45 +00:00
fill_up_missing: bool = True,
2019-10-20 12:02:53 +00:00
drop_incomplete: bool = True,
startup_candles: int = 0,
2019-12-25 15:12:20 +00:00
data_format: str = None,
data_handler: IDataHandler = None,
2021-12-03 11:23:35 +00:00
candle_type: CandleType = CandleType.SPOT
2018-12-16 09:17:11 +00:00
) -> DataFrame:
"""
Load cached ohlcv history for the given pair.
2019-06-09 12:40:45 +00:00
:param pair: Pair to load data for
:param timeframe: Timeframe (e.g. "5m")
2019-06-09 12:40:45 +00:00
:param datadir: Path to the data storage location.
2019-12-25 15:12:20 +00:00
:param data_format: Format of the data. Ignored if data_handler is set.
2019-06-09 12:40:45 +00:00
:param timerange: Limit data to be loaded to this timerange
:param fill_up_missing: Fill missing values with "No action"-candles
:param drop_incomplete: Drop last candle assuming it may be incomplete.
2019-10-20 12:02:53 +00:00
:param startup_candles: Additional candles to load at the start of the period
:param data_handler: Initialized data-handler to use.
Will be initialized from data_format if not set
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame
"""
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format, data_handler)
return data_handler.ohlcv_load(pair=pair,
timeframe=timeframe,
timerange=timerange,
fill_missing=fill_up_missing,
drop_incomplete=drop_incomplete,
startup_candles=startup_candles,
candle_type=candle_type,
)
def load_data(datadir: Path,
timeframe: str,
2019-12-25 15:12:20 +00:00
pairs: List[str], *,
2019-10-06 15:10:40 +00:00
timerange: Optional[TimeRange] = None,
fill_up_missing: bool = True,
2019-10-20 12:02:53 +00:00
startup_candles: int = 0,
fail_without_data: bool = False,
data_format: str = 'json',
candle_type: CandleType = CandleType.SPOT,
2022-05-17 21:32:17 +00:00
user_futures_funding_rate: int = None,
) -> Dict[str, DataFrame]:
"""
Load ohlcv history data for a list of pairs.
2019-10-20 12:02:53 +00:00
:param datadir: Path to the data storage location.
:param timeframe: Timeframe (e.g. "5m")
2019-10-20 12:02:53 +00:00
:param pairs: List of pairs to load
:param timerange: Limit data to be loaded to this timerange
:param fill_up_missing: Fill missing values with "No action"-candles
:param startup_candles: Additional candles to load at the start of the period
:param fail_without_data: Raise OperationalException if no data is found.
2019-12-28 13:57:39 +00:00
:param data_format: Data format which should be used. Defaults to json
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
2019-10-20 12:02:53 +00:00
:return: dict(<pair>:<Dataframe>)
"""
result: Dict[str, DataFrame] = {}
if startup_candles > 0 and timerange:
logger.info(f'Using indicator startup period: {startup_candles} ...')
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
for pair in pairs:
hist = load_pair_history(pair=pair, timeframe=timeframe,
datadir=datadir, timerange=timerange,
fill_up_missing=fill_up_missing,
startup_candles=startup_candles,
data_handler=data_handler,
candle_type=candle_type,
)
if not hist.empty:
result[pair] = hist
else:
if candle_type is CandleType.FUNDING_RATE and user_futures_funding_rate is not None:
logger.warn(f"{pair} using user specified [{user_futures_funding_rate}]")
2022-08-18 05:20:49 +00:00
elif candle_type not in (CandleType.SPOT, CandleType.FUTURES):
result[pair] = DataFrame(columns=["date", "open", "close", "high", "low", "volume"])
if fail_without_data and not result:
raise OperationalException("No data found. Terminating.")
return result
def refresh_data(*, datadir: Path,
timeframe: str,
pairs: List[str],
exchange: Exchange,
2019-12-25 15:12:20 +00:00
data_format: str = None,
timerange: Optional[TimeRange] = None,
candle_type: CandleType,
) -> None:
"""
Refresh ohlcv history data for a list of pairs.
:param datadir: Path to the data storage location.
:param timeframe: Timeframe (e.g. "5m")
:param pairs: List of pairs to load
:param exchange: Exchange object
2021-06-25 17:13:31 +00:00
:param data_format: dataformat to use
:param timerange: Limit data to be loaded to this timerange
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
"""
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
for idx, pair in enumerate(pairs):
process = f'{idx}/{len(pairs)}'
_download_pair_history(pair=pair, process=process,
timeframe=timeframe, datadir=datadir,
timerange=timerange, exchange=exchange, data_handler=data_handler,
candle_type=candle_type)
def _load_cached_data_for_updating(
pair: str,
timeframe: str,
timerange: Optional[TimeRange],
data_handler: IDataHandler,
2022-04-30 13:28:01 +00:00
candle_type: CandleType,
prepend: bool = False,
) -> Tuple[DataFrame, Optional[int], Optional[int]]:
"""
Load cached data to download more data.
2019-10-06 15:10:40 +00:00
If timerange is passed in, checks whether data from an before the stored data will be
downloaded.
If that's the case then what's available should be completely overwritten.
2019-12-27 09:12:56 +00:00
Otherwise downloads always start at the end of the available data to avoid data gaps.
Note: Only used by download_pair_history().
"""
start = None
2022-04-30 13:28:01 +00:00
end = None
if timerange:
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
2022-04-30 13:28:01 +00:00
if timerange.stoptype == 'date':
end = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
# Intentionally don't pass timerange in - since we need to load the full dataset.
data = data_handler.ohlcv_load(pair, timeframe=timeframe,
timerange=None, fill_missing=False,
drop_incomplete=True, warn_no_data=False,
candle_type=candle_type)
if not data.empty:
2022-04-30 13:28:01 +00:00
if not prepend and start and start < data.iloc[0]['date']:
# Earlier data than existing data requested, redownload all
2019-12-27 09:11:49 +00:00
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
else:
2022-04-30 13:28:01 +00:00
if prepend:
end = data.iloc[0]['date']
else:
start = data.iloc[-1]['date']
start_ms = int(start.timestamp() * 1000) if start else None
2022-04-30 13:28:01 +00:00
end_ms = int(end.timestamp() * 1000) if end else None
return data, start_ms, end_ms
def _download_pair_history(pair: str, *,
datadir: Path,
2019-12-17 22:06:03 +00:00
exchange: Exchange,
timeframe: str = '5m',
process: str = '',
new_pairs_days: int = 30,
data_handler: IDataHandler = None,
timerange: Optional[TimeRange] = None,
candle_type: CandleType,
2022-04-10 07:46:23 +00:00
erase: bool = False,
2022-04-30 15:24:57 +00:00
prepend: bool = False,
2021-12-03 11:23:35 +00:00
) -> bool:
"""
2019-11-13 10:28:26 +00:00
Download latest candles from the exchange for the pair and timeframe passed in parameters
The data is downloaded starting from the last correct data that
exists in a cache. If timerange starts earlier than the data in the cache,
the full data will be redownloaded
:param pair: pair to download
:param timeframe: Timeframe (e.g "5m")
:param timerange: range of time to download
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
2022-04-10 07:46:23 +00:00
:param erase: Erase existing data
2018-12-16 13:14:17 +00:00
:return: bool with success state
"""
2019-12-27 12:46:25 +00:00
data_handler = get_datahandler(datadir, data_handler=data_handler)
2019-12-25 15:12:20 +00:00
2018-12-16 09:29:53 +00:00
try:
2022-04-10 07:46:23 +00:00
if erase:
if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(f'Deleting existing data for pair {pair}, {timeframe}, {candle_type}.')
2022-04-30 13:28:01 +00:00
data, since_ms, until_ms = _load_cached_data_for_updating(
pair, timeframe, timerange,
data_handler=data_handler,
candle_type=candle_type,
2022-04-30 15:24:57 +00:00
prepend=prepend)
2022-04-30 15:35:11 +00:00
logger.info(f'({process}) - Download history data for "{pair}", {timeframe}, '
f'{candle_type} and store in {datadir}. '
2022-04-30 15:24:57 +00:00
f'From {format_ms_time(since_ms) if since_ms else "start"} to '
f'{format_ms_time(until_ms) if until_ms else "now"}'
)
logger.debug("Current Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
logger.debug("Current End: %s",
f"{data.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
2018-12-16 09:29:53 +00:00
# Default since_ms to 30 days if nothing is given
2019-12-16 19:12:26 +00:00
new_data = exchange.get_historic_ohlcv(pair=pair,
timeframe=timeframe,
since_ms=since_ms if since_ms else
arrow.utcnow().shift(
days=-new_pairs_days).int_timestamp * 1000,
is_new_pair=data.empty,
candle_type=candle_type,
2022-04-30 13:28:01 +00:00
until_ms=until_ms if until_ms else None
2019-12-16 19:12:26 +00:00
)
# TODO: Maybe move parsing to exchange class (?)
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,
fill_missing=False, drop_incomplete=True)
if data.empty:
data = new_dataframe
else:
2020-07-25 15:06:58 +00:00
# Run cleaning again to ensure there were no duplicate candles
# Especially between existing and new data.
2022-02-01 18:11:51 +00:00
data = clean_ohlcv_dataframe(concat([data, new_dataframe], axis=0), timeframe, pair,
fill_missing=False, drop_incomplete=False)
logger.debug("New Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
logger.debug("New End: %s",
f"{data.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
data_handler.ohlcv_store(pair, timeframe, data=data, candle_type=candle_type)
2018-12-16 09:29:53 +00:00
return True
2019-05-17 16:05:36 +00:00
except Exception:
logger.exception(
f'Failed to download history data for pair: "{pair}", timeframe: {timeframe}.'
2019-05-17 16:05:36 +00:00
)
2019-01-31 05:51:03 +00:00
return False
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str],
datadir: Path, trading_mode: str,
timerange: Optional[TimeRange] = None,
new_pairs_days: int = 30, erase: bool = False,
data_format: str = None,
2022-04-30 15:24:57 +00:00
prepend: bool = False,
2021-12-03 11:23:35 +00:00
) -> List[str]:
"""
Refresh stored ohlcv data for backtesting and hyperopt operations.
2019-12-16 18:43:33 +00:00
Used by freqtrade download-data subcommand.
:return: List of pairs that are not available.
"""
pairs_not_available = []
2019-12-25 15:12:20 +00:00
data_handler = get_datahandler(datadir, data_format)
2021-12-08 13:10:08 +00:00
candle_type = CandleType.get_default(trading_mode)
process = ''
for idx, pair in enumerate(pairs, start=1):
if pair not in exchange.markets:
pairs_not_available.append(pair)
logger.info(f"Skipping pair {pair}...")
continue
for timeframe in timeframes:
logger.info(f'Downloading pair {pair}, interval {timeframe}.')
process = f'{idx}/{len(pairs)}'
_download_pair_history(pair=pair, process=process,
datadir=datadir, exchange=exchange,
timerange=timerange, data_handler=data_handler,
timeframe=str(timeframe), new_pairs_days=new_pairs_days,
2022-04-10 07:46:23 +00:00
candle_type=candle_type,
2022-04-30 15:24:57 +00:00
erase=erase, prepend=prepend)
if trading_mode == 'futures':
# Predefined candletype (and timeframe) depending on exchange
# Downloads what is necessary to backtest based on futures data.
tf_mark = exchange.get_option('mark_ohlcv_timeframe')
fr_candle_type = CandleType.from_string(exchange.get_option('mark_ohlcv_price'))
# All exchanges need FundingRate for futures trading.
# The timeframe is aligned to the mark-price timeframe.
for funding_candle_type in (CandleType.FUNDING_RATE, fr_candle_type):
_download_pair_history(pair=pair, process=process,
datadir=datadir, exchange=exchange,
timerange=timerange, data_handler=data_handler,
2022-04-10 07:46:23 +00:00
timeframe=str(tf_mark), new_pairs_days=new_pairs_days,
candle_type=funding_candle_type,
2022-04-30 15:24:57 +00:00
erase=erase, prepend=prepend)
return pairs_not_available
2019-12-25 15:34:27 +00:00
def _download_trades_history(exchange: Exchange,
pair: str, *,
new_pairs_days: int = 30,
2019-12-25 15:34:27 +00:00
timerange: Optional[TimeRange] = None,
data_handler: IDataHandler
) -> bool:
2019-08-25 12:30:09 +00:00
"""
Download trade history from the exchange.
Appends to previously downloaded trades data.
"""
2019-08-16 08:51:04 +00:00
try:
until = None
2022-04-30 15:44:57 +00:00
if timerange:
if timerange.starttype == 'date':
since = timerange.startts * 1000
2021-05-15 11:20:36 +00:00
if timerange.stoptype == 'date':
until = timerange.stopts * 1000
else:
since = arrow.utcnow().shift(days=-new_pairs_days).int_timestamp * 1000
2019-08-16 08:51:04 +00:00
2019-12-25 15:34:27 +00:00
trades = data_handler.trades_load(pair)
2019-08-16 08:51:04 +00:00
# TradesList columns are defined in constants.DEFAULT_TRADES_COLUMNS
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
# DEFAULT_TRADES_COLUMNS: 1 -> id
2020-03-31 18:46:42 +00:00
if trades and since < trades[0][0]:
# since is before the first trade
logger.info(f"Start earlier than available data. Redownloading trades for {pair}...")
trades = []
from_id = trades[-1][1] if trades else None
2020-03-31 18:46:42 +00:00
if trades and since < trades[-1][0]:
# Reset since to the last available point
2020-04-01 18:04:36 +00:00
# - 5 seconds (to ensure we're getting all trades)
2020-04-01 18:50:00 +00:00
since = trades[-1][0] - (5 * 1000)
2020-04-01 18:31:21 +00:00
logger.info(f"Using last trade date -5s - Downloading trades for {pair} "
f"since: {format_ms_time(since)}.")
2019-08-16 08:51:04 +00:00
2020-04-01 18:31:21 +00:00
logger.debug(f"Current Start: {format_ms_time(trades[0][0]) if trades else 'None'}")
logger.debug(f"Current End: {format_ms_time(trades[-1][0]) if trades else 'None'}")
logger.info(f"Current Amount of trades: {len(trades)}")
2019-08-16 08:51:04 +00:00
2019-12-16 19:12:26 +00:00
# Default since_ms to 30 days if nothing is given
2019-08-25 12:14:31 +00:00
new_trades = exchange.get_historic_trades(pair=pair,
2020-04-02 06:20:50 +00:00
since=since,
until=until,
2019-08-25 12:30:09 +00:00
from_id=from_id,
)
2019-08-25 12:14:31 +00:00
trades.extend(new_trades[1])
2020-04-01 05:58:39 +00:00
# Remove duplicates to make sure we're not storing data we don't need
trades = trades_remove_duplicates(trades)
2019-12-25 15:34:27 +00:00
data_handler.trades_store(pair, data=trades)
2019-08-16 08:51:04 +00:00
2020-04-01 18:31:21 +00:00
logger.debug(f"New Start: {format_ms_time(trades[0][0])}")
logger.debug(f"New End: {format_ms_time(trades[-1][0])}")
2019-08-16 08:51:04 +00:00
logger.info(f"New Amount of trades: {len(trades)}")
return True
2019-08-16 08:51:04 +00:00
except Exception:
logger.exception(
2019-08-16 08:51:04 +00:00
f'Failed to download historic trades for pair: "{pair}". '
)
return False
def refresh_backtest_trades_data(exchange: Exchange, pairs: List[str], datadir: Path,
timerange: TimeRange, new_pairs_days: int = 30,
erase: bool = False, data_format: str = 'jsongz') -> List[str]:
2019-08-27 05:13:50 +00:00
"""
2019-12-16 18:43:33 +00:00
Refresh stored trades data for backtesting and hyperopt operations.
Used by freqtrade download-data subcommand.
:return: List of pairs that are not available.
2019-08-27 05:13:50 +00:00
"""
pairs_not_available = []
2019-12-25 15:34:27 +00:00
data_handler = get_datahandler(datadir, data_format=data_format)
2019-08-27 05:13:50 +00:00
for pair in pairs:
if pair not in exchange.markets:
pairs_not_available.append(pair)
logger.info(f"Skipping pair {pair}...")
continue
if erase:
if data_handler.trades_purge(pair):
logger.info(f'Deleting existing data for pair {pair}.')
2019-08-27 05:13:50 +00:00
logger.info(f'Downloading trades for pair {pair}.')
2019-12-26 09:22:38 +00:00
_download_trades_history(exchange=exchange,
pair=pair,
new_pairs_days=new_pairs_days,
2019-12-25 15:34:27 +00:00
timerange=timerange,
data_handler=data_handler)
2019-08-27 05:13:50 +00:00
return pairs_not_available
def convert_trades_to_ohlcv(
pairs: List[str],
timeframes: List[str],
datadir: Path,
timerange: TimeRange,
erase: bool = False,
data_format_ohlcv: str = 'json',
data_format_trades: str = 'jsongz',
2021-12-03 11:23:35 +00:00
candle_type: CandleType = CandleType.SPOT
) -> None:
"""
Convert stored trades data to ohlcv data
"""
2019-12-27 12:46:25 +00:00
data_handler_trades = get_datahandler(datadir, data_format=data_format_trades)
data_handler_ohlcv = get_datahandler(datadir, data_format=data_format_ohlcv)
2019-12-25 15:34:27 +00:00
for pair in pairs:
2019-12-25 15:34:27 +00:00
trades = data_handler_trades.trades_load(pair)
for timeframe in timeframes:
2019-12-26 09:22:38 +00:00
if erase:
if data_handler_ohlcv.ohlcv_purge(pair, timeframe, candle_type=candle_type):
2019-12-26 09:22:38 +00:00
logger.info(f'Deleting existing data for pair {pair}, interval {timeframe}.')
try:
ohlcv = trades_to_ohlcv(trades, timeframe)
# Store ohlcv
data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv, candle_type=candle_type)
except ValueError:
logger.exception(f'Could not convert {pair} to OHLCV.')
def get_timerange(data: Dict[str, DataFrame]) -> Tuple[datetime, datetime]:
"""
2019-12-17 22:06:03 +00:00
Get the maximum common timerange for the given backtest data.
:param data: dictionary with preprocessed backtesting data
:return: tuple containing min_date, max_date
"""
2019-12-17 22:06:03 +00:00
timeranges = [
(frame['date'].min().to_pydatetime(), frame['date'].max().to_pydatetime())
for frame in data.values()
]
2019-12-17 22:06:03 +00:00
return (min(timeranges, key=operator.itemgetter(0))[0],
max(timeranges, key=operator.itemgetter(1))[1])
def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime,
2019-12-11 06:12:37 +00:00
max_date: datetime, timeframe_min: int) -> bool:
"""
Validates preprocessed backtesting data for missing values and shows warnings about it that.
:param data: preprocessed backtesting data (as DataFrame)
:param pair: pair used for log output.
:param min_date: start-date of the data
:param max_date: end-date of the data
:param timeframe_min: Timeframe in minutes
"""
# total difference in minutes / timeframe-minutes
2019-12-11 06:12:37 +00:00
expected_frames = int((max_date - min_date).total_seconds() // 60 // timeframe_min)
found_missing = False
dflen = len(data)
if dflen < expected_frames:
found_missing = True
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values",
pair, expected_frames, dflen, expected_frames - dflen)
return found_missing