2019-12-23 13:56:48 +00:00
|
|
|
"""
|
|
|
|
Abstract datahandler interface.
|
|
|
|
It's subclasses handle and storing data from disk.
|
|
|
|
|
|
|
|
"""
|
2019-12-25 10:09:29 +00:00
|
|
|
import logging
|
2021-11-28 14:03:55 +00:00
|
|
|
import re
|
2019-12-25 10:09:59 +00:00
|
|
|
from abc import ABC, abstractclassmethod, abstractmethod
|
|
|
|
from copy import deepcopy
|
2019-12-25 14:13:17 +00:00
|
|
|
from datetime import datetime, timezone
|
2019-12-28 08:59:47 +00:00
|
|
|
from pathlib import Path
|
2020-03-31 18:20:10 +00:00
|
|
|
from typing import List, Optional, Type
|
2019-12-28 08:59:47 +00:00
|
|
|
|
2019-12-23 13:56:48 +00:00
|
|
|
from pandas import DataFrame
|
|
|
|
|
|
|
|
from freqtrade.configuration import TimeRange
|
2020-11-21 09:52:15 +00:00
|
|
|
from freqtrade.constants import ListPairsWithTimeframes, TradeList
|
2020-09-28 17:39:41 +00:00
|
|
|
from freqtrade.data.converter import clean_ohlcv_dataframe, trades_remove_duplicates, trim_dataframe
|
2019-12-25 10:09:29 +00:00
|
|
|
from freqtrade.exchange import timeframe_to_seconds
|
|
|
|
|
2020-09-28 17:39:41 +00:00
|
|
|
|
2019-12-25 10:09:29 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
2019-12-23 13:56:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
class IDataHandler(ABC):
|
|
|
|
|
2021-11-28 14:03:55 +00:00
|
|
|
_OHLCV_REGEX = r'^([a-zA-Z_-]+)\-(\d+\S)\-?([a-zA-Z_]*)?(?=\.)'
|
2021-11-28 13:33:46 +00:00
|
|
|
|
2019-12-25 10:09:29 +00:00
|
|
|
def __init__(self, datadir: Path) -> None:
|
2019-12-23 13:56:48 +00:00
|
|
|
self._datadir = datadir
|
|
|
|
|
2020-07-12 07:50:53 +00:00
|
|
|
@abstractclassmethod
|
|
|
|
def ohlcv_get_available_data(cls, datadir: Path) -> ListPairsWithTimeframes:
|
|
|
|
"""
|
|
|
|
Returns a list of all pairs with ohlcv data available in this datadir
|
|
|
|
:param datadir: Directory to search for ohlcv files
|
2020-07-12 08:23:09 +00:00
|
|
|
:return: List of Tuples of (pair, timeframe)
|
2020-07-12 07:50:53 +00:00
|
|
|
"""
|
|
|
|
|
2019-12-28 10:10:31 +00:00
|
|
|
@abstractclassmethod
|
2021-11-28 14:53:13 +00:00
|
|
|
def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: str = '') -> List[str]:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Returns a list of all pairs with ohlcv data available in this datadir
|
|
|
|
for the specified timeframe
|
|
|
|
:param datadir: Directory to search for ohlcv files
|
|
|
|
:param timeframe: Timeframe to search pairs for
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-28 10:10:31 +00:00
|
|
|
:return: List of Pairs
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2021-11-07 06:35:27 +00:00
|
|
|
def ohlcv_store(
|
|
|
|
self,
|
|
|
|
pair: str,
|
|
|
|
timeframe: str,
|
|
|
|
data: DataFrame,
|
2021-11-21 06:21:10 +00:00
|
|
|
candle_type: str = ''
|
2021-11-07 06:35:27 +00:00
|
|
|
) -> None:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
2020-07-12 18:17:21 +00:00
|
|
|
Store ohlcv data.
|
2019-12-28 10:10:31 +00:00
|
|
|
:param pair: Pair - used to generate filename
|
2021-06-25 17:13:31 +00:00
|
|
|
:param timeframe: Timeframe - used to generate filename
|
|
|
|
:param data: Dataframe containing OHLCV data
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-28 10:10:31 +00:00
|
|
|
:return: None
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def _ohlcv_load(self, pair: str, timeframe: str,
|
|
|
|
timerange: Optional[TimeRange] = None,
|
2021-11-21 06:21:10 +00:00
|
|
|
candle_type: str = ''
|
2019-12-28 10:10:31 +00:00
|
|
|
) -> DataFrame:
|
|
|
|
"""
|
|
|
|
Internal method used to load data for one pair from disk.
|
2020-01-05 08:55:02 +00:00
|
|
|
Implements the loading and conversion to a Pandas dataframe.
|
2019-12-28 10:10:31 +00:00
|
|
|
Timerange trimming and dataframe validation happens outside of this method.
|
|
|
|
:param pair: Pair to load data
|
2020-03-08 10:35:31 +00:00
|
|
|
:param timeframe: Timeframe (e.g. "5m")
|
2019-12-28 10:10:31 +00:00
|
|
|
:param timerange: Limit data to be loaded to this timerange.
|
|
|
|
Optionally implemented by subclasses to avoid loading
|
|
|
|
all data where possible.
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-28 10:10:31 +00:00
|
|
|
:return: DataFrame with ohlcv data, or empty DataFrame
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2021-11-21 06:21:10 +00:00
|
|
|
def ohlcv_purge(self, pair: str, timeframe: str, candle_type: str = '') -> bool:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Remove data for this pair
|
|
|
|
:param pair: Delete data for this pair.
|
2020-03-08 10:35:31 +00:00
|
|
|
:param timeframe: Timeframe (e.g. "5m")
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-28 10:10:31 +00:00
|
|
|
:return: True when deleted, false if file did not exist.
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2021-11-07 06:35:27 +00:00
|
|
|
def ohlcv_append(
|
|
|
|
self,
|
|
|
|
pair: str,
|
|
|
|
timeframe: str,
|
|
|
|
data: DataFrame,
|
2021-11-21 06:21:10 +00:00
|
|
|
candle_type: str = ''
|
2021-11-07 06:35:27 +00:00
|
|
|
) -> None:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Append data to existing data structures
|
|
|
|
:param pair: Pair
|
|
|
|
:param timeframe: Timeframe this ohlcv data is for
|
|
|
|
:param data: Data to append.
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractclassmethod
|
|
|
|
def trades_get_pairs(cls, datadir: Path) -> List[str]:
|
|
|
|
"""
|
|
|
|
Returns a list of all pairs for which trade data is available in this
|
|
|
|
:param datadir: Directory to search for ohlcv files
|
|
|
|
:return: List of Pairs
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2020-03-31 18:20:10 +00:00
|
|
|
def trades_store(self, pair: str, data: TradeList) -> None:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Store trades data (list of Dicts) to file
|
|
|
|
:param pair: Pair - used for filename
|
2020-03-31 18:20:10 +00:00
|
|
|
:param data: List of Lists containing trade data,
|
|
|
|
column sequence as in DEFAULT_TRADES_COLUMNS
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2020-03-31 18:20:10 +00:00
|
|
|
def trades_append(self, pair: str, data: TradeList):
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Append data to existing files
|
|
|
|
:param pair: Pair - used for filename
|
2020-03-31 18:20:10 +00:00
|
|
|
:param data: List of Lists containing trade data,
|
|
|
|
column sequence as in DEFAULT_TRADES_COLUMNS
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
2020-04-01 05:58:39 +00:00
|
|
|
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
2019-12-28 10:10:31 +00:00
|
|
|
"""
|
|
|
|
Load a pair from file, either .json.gz or .json
|
|
|
|
:param pair: Load trades for this pair
|
|
|
|
:param timerange: Timerange to load trades for - currently not implemented
|
|
|
|
:return: List of trades
|
|
|
|
"""
|
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def trades_purge(self, pair: str) -> bool:
|
|
|
|
"""
|
|
|
|
Remove data for this pair
|
|
|
|
:param pair: Delete data for this pair.
|
|
|
|
:return: True when deleted, false if file did not exist.
|
|
|
|
"""
|
2019-12-25 10:09:29 +00:00
|
|
|
|
2020-04-01 05:58:39 +00:00
|
|
|
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
|
|
|
"""
|
|
|
|
Load a pair from file, either .json.gz or .json
|
|
|
|
Removes duplicates in the process.
|
|
|
|
:param pair: Load trades for this pair
|
|
|
|
:param timerange: Timerange to load trades for - currently not implemented
|
|
|
|
:return: List of trades
|
|
|
|
"""
|
|
|
|
return trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
|
|
|
|
|
2021-11-28 14:03:55 +00:00
|
|
|
@staticmethod
|
|
|
|
def rebuild_pair_from_filename(pair: str) -> str:
|
|
|
|
"""
|
|
|
|
Rebuild pair name from filename
|
|
|
|
Assumes a asset name of max. 7 length to also support BTC-PERP and BTC-PERP:USD names.
|
|
|
|
"""
|
2021-11-28 14:25:57 +00:00
|
|
|
res = re.sub(r'^(([A-Za-z]{1,10})|^([A-Za-z\-]{1,6}))(_)', r'\g<1>/', pair, 1)
|
2021-11-28 14:03:55 +00:00
|
|
|
res = re.sub('_', ':', res, 1)
|
|
|
|
return res
|
|
|
|
|
2019-12-25 10:09:29 +00:00
|
|
|
def ohlcv_load(self, pair, timeframe: str,
|
|
|
|
timerange: Optional[TimeRange] = None,
|
2019-12-25 10:09:59 +00:00
|
|
|
fill_missing: bool = True,
|
2019-12-25 10:09:29 +00:00
|
|
|
drop_incomplete: bool = True,
|
|
|
|
startup_candles: int = 0,
|
2021-11-07 06:35:27 +00:00
|
|
|
warn_no_data: bool = True,
|
2021-11-21 06:21:10 +00:00
|
|
|
candle_type: str = ''
|
2019-12-25 10:09:29 +00:00
|
|
|
) -> DataFrame:
|
|
|
|
"""
|
2020-03-08 10:35:31 +00:00
|
|
|
Load cached candle (OHLCV) data for the given pair.
|
2019-12-25 10:09:29 +00:00
|
|
|
|
|
|
|
:param pair: Pair to load data for
|
2020-03-08 10:35:31 +00:00
|
|
|
:param timeframe: Timeframe (e.g. "5m")
|
2019-12-25 10:09:29 +00:00
|
|
|
:param timerange: Limit data to be loaded to this timerange
|
2019-12-25 14:07:49 +00:00
|
|
|
:param fill_missing: Fill missing values with "No action"-candles
|
2019-12-25 10:09:29 +00:00
|
|
|
:param drop_incomplete: Drop last candle assuming it may be incomplete.
|
|
|
|
:param startup_candles: Additional candles to load at the start of the period
|
2019-12-27 05:58:29 +00:00
|
|
|
:param warn_no_data: Log a warning message when no data is found
|
2021-11-27 08:55:42 +00:00
|
|
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
2019-12-25 10:09:29 +00:00
|
|
|
:return: DataFrame with ohlcv data, or empty DataFrame
|
|
|
|
"""
|
|
|
|
# Fix startup period
|
|
|
|
timerange_startup = deepcopy(timerange)
|
|
|
|
if startup_candles > 0 and timerange_startup:
|
|
|
|
timerange_startup.subtract_start(timeframe_to_seconds(timeframe) * startup_candles)
|
|
|
|
|
2021-11-07 06:35:27 +00:00
|
|
|
pairdf = self._ohlcv_load(
|
|
|
|
pair,
|
|
|
|
timeframe,
|
|
|
|
timerange=timerange_startup,
|
|
|
|
candle_type=candle_type
|
|
|
|
)
|
2021-11-21 04:43:25 +00:00
|
|
|
if self._check_empty_df(pairdf, pair, timeframe, warn_no_data):
|
2019-12-26 08:56:42 +00:00
|
|
|
return pairdf
|
|
|
|
else:
|
2020-03-13 17:26:14 +00:00
|
|
|
enddate = pairdf.iloc[-1]['date']
|
2019-12-26 08:56:42 +00:00
|
|
|
|
|
|
|
if timerange_startup:
|
|
|
|
self._validate_pairdata(pair, pairdf, timerange_startup)
|
|
|
|
pairdf = trim_dataframe(pairdf, timerange_startup)
|
2021-11-28 14:53:13 +00:00
|
|
|
if self._check_empty_df(pairdf, pair, timeframe, warn_no_data):
|
2020-03-09 06:39:23 +00:00
|
|
|
return pairdf
|
2019-12-26 08:56:42 +00:00
|
|
|
|
|
|
|
# incomplete candles should only be dropped if we didn't trim the end beforehand.
|
2020-03-12 18:50:46 +00:00
|
|
|
pairdf = clean_ohlcv_dataframe(pairdf, timeframe,
|
|
|
|
pair=pair,
|
|
|
|
fill_missing=fill_missing,
|
|
|
|
drop_incomplete=(drop_incomplete and
|
2021-11-08 03:37:57 +00:00
|
|
|
enddate == pairdf.iloc[-1]['date']))
|
2021-11-21 04:43:25 +00:00
|
|
|
self._check_empty_df(pairdf, pair, timeframe, warn_no_data)
|
2020-03-12 18:50:46 +00:00
|
|
|
return pairdf
|
2019-12-25 10:09:29 +00:00
|
|
|
|
2021-11-28 14:53:13 +00:00
|
|
|
def _check_empty_df(self, pairdf: DataFrame, pair: str, timeframe: str, warn_no_data: bool):
|
2020-03-11 18:53:28 +00:00
|
|
|
"""
|
|
|
|
Warn on empty dataframe
|
|
|
|
"""
|
|
|
|
if pairdf.empty:
|
|
|
|
if warn_no_data:
|
|
|
|
logger.warning(
|
|
|
|
f'No history data for pair: "{pair}", timeframe: {timeframe}. '
|
|
|
|
'Use `freqtrade download-data` to download the data'
|
|
|
|
)
|
|
|
|
return True
|
|
|
|
return False
|
2019-12-25 10:09:29 +00:00
|
|
|
|
2021-11-28 14:53:13 +00:00
|
|
|
def _validate_pairdata(self, pair, pairdata: DataFrame, timerange: TimeRange):
|
2019-12-25 10:09:29 +00:00
|
|
|
"""
|
|
|
|
Validates pairdata for missing data at start end end and logs warnings.
|
|
|
|
:param pairdata: Dataframe to validate
|
|
|
|
:param timerange: Timerange specified for start and end dates
|
|
|
|
"""
|
|
|
|
|
2019-12-25 14:35:59 +00:00
|
|
|
if timerange.starttype == 'date':
|
2019-12-25 14:13:17 +00:00
|
|
|
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
|
|
|
if pairdata.iloc[0]['date'] > start:
|
|
|
|
logger.warning(f"Missing data at start for pair {pair}, "
|
2019-12-25 14:35:59 +00:00
|
|
|
f"data starts at {pairdata.iloc[0]['date']:%Y-%m-%d %H:%M:%S}")
|
|
|
|
if timerange.stoptype == 'date':
|
2019-12-25 14:13:17 +00:00
|
|
|
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
|
|
|
if pairdata.iloc[-1]['date'] < stop:
|
2019-12-25 14:35:59 +00:00
|
|
|
logger.warning(f"Missing data at end for pair {pair}, "
|
|
|
|
f"data ends at {pairdata.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}")
|
2019-12-28 08:59:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_datahandlerclass(datatype: str) -> Type[IDataHandler]:
|
|
|
|
"""
|
|
|
|
Get datahandler class.
|
|
|
|
Could be done using Resolvers, but since this may be called often and resolvers
|
|
|
|
are rather expensive, doing this directly should improve performance.
|
|
|
|
:param datatype: datatype to use.
|
|
|
|
:return: Datahandler class
|
|
|
|
"""
|
|
|
|
|
|
|
|
if datatype == 'json':
|
|
|
|
from .jsondatahandler import JsonDataHandler
|
|
|
|
return JsonDataHandler
|
|
|
|
elif datatype == 'jsongz':
|
|
|
|
from .jsondatahandler import JsonGzDataHandler
|
|
|
|
return JsonGzDataHandler
|
2020-07-12 18:17:21 +00:00
|
|
|
elif datatype == 'hdf5':
|
2020-07-24 17:23:37 +00:00
|
|
|
from .hdf5datahandler import HDF5DataHandler
|
|
|
|
return HDF5DataHandler
|
2019-12-28 08:59:47 +00:00
|
|
|
else:
|
|
|
|
raise ValueError(f"No datahandler for datatype {datatype} available.")
|
|
|
|
|
|
|
|
|
|
|
|
def get_datahandler(datadir: Path, data_format: str = None,
|
|
|
|
data_handler: IDataHandler = None) -> IDataHandler:
|
|
|
|
"""
|
|
|
|
:param datadir: Folder to save data
|
2021-06-25 17:13:31 +00:00
|
|
|
:param data_format: dataformat to use
|
|
|
|
:param data_handler: returns this datahandler if it exists or initializes a new one
|
2019-12-28 08:59:47 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
if not data_handler:
|
|
|
|
HandlerClass = get_datahandlerclass(data_format or 'json')
|
|
|
|
data_handler = HandlerClass(datadir)
|
|
|
|
return data_handler
|