stable/freqtrade/data/history/hdf5datahandler.py

163 lines
6.0 KiB
Python
Raw Permalink Normal View History

import logging
2022-09-18 14:57:03 +00:00
from typing import Optional
import numpy as np
import pandas as pd
from freqtrade.configuration import TimeRange
2022-08-19 11:44:39 +00:00
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TradeList
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
2020-09-28 17:39:41 +00:00
logger = logging.getLogger(__name__)
2020-07-24 17:23:37 +00:00
class HDF5DataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store(
2021-12-07 19:30:58 +00:00
self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType) -> None:
"""
Store data in hdf5 file.
:param pair: Pair - used to generate filename
2021-06-25 17:13:31 +00:00
:param timeframe: Timeframe - used to generate filename
:param data: Dataframe containing OHLCV data
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: None
"""
key = self._pair_ohlcv_key(pair, timeframe)
_data = data.copy()
2022-05-16 17:53:01 +00:00
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
self.create_dir_if_needed(filename)
2020-07-25 15:06:58 +00:00
2021-12-01 19:32:23 +00:00
_data.loc[:, self._columns].to_hdf(
filename, key, mode='a', complevel=9, complib='blosc',
format='table', data_columns=['date']
)
def _ohlcv_load(self, pair: str, timeframe: str,
2021-12-07 19:30:58 +00:00
timerange: Optional[TimeRange], candle_type: CandleType
2021-12-03 11:23:35 +00:00
) -> pd.DataFrame:
"""
Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe.
Timerange trimming and dataframe validation happens outside of this method.
:param pair: Pair to load data
:param timeframe: Timeframe (e.g. "5m")
:param timerange: Limit data to be loaded to this timerange.
Optionally implemented by subclasses to avoid loading
all data where possible.
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame
"""
key = self._pair_ohlcv_key(pair, timeframe)
filename = self._pair_data_filename(
self._datadir,
pair,
2022-05-16 17:53:01 +00:00
timeframe,
candle_type=candle_type
)
if not filename.exists():
# Fallback mode for 1M files
filename = self._pair_data_filename(
2022-05-16 17:53:01 +00:00
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
if not filename.exists():
return pd.DataFrame(columns=self._columns)
where = []
if timerange:
if timerange.starttype == 'date':
where.append(f"date >= Timestamp({timerange.startts * 1e9})")
if timerange.stoptype == 'date':
where.append(f"date <= Timestamp({timerange.stopts * 1e9})")
pairdata = pd.read_hdf(filename, key=key, mode="r", where=where)
if list(pairdata.columns) != self._columns:
raise ValueError("Wrong dataframe format")
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
'low': 'float', 'close': 'float', 'volume': 'float'})
pairdata = pairdata.reset_index(drop=True)
return pairdata
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: pd.DataFrame,
2021-12-03 11:23:35 +00:00
candle_type: CandleType
) -> None:
"""
Append data to existing data structures
:param pair: Pair
:param timeframe: Timeframe this ohlcv data is for
:param data: Data to append.
2021-12-03 11:23:35 +00:00
:param candle_type: Any of the enum CandleType (must match trading mode!)
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
key = self._pair_trades_key(pair)
2020-07-25 15:06:58 +00:00
2021-12-01 19:32:23 +00:00
pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS).to_hdf(
self._pair_trades_filename(self._datadir, pair), key,
mode='a', complevel=9, complib='blosc',
format='table', data_columns=['timestamp']
)
def trades_append(self, pair: str, data: TradeList):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
"""
2020-07-12 18:41:25 +00:00
Load a pair from h5 file.
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
2020-07-12 18:41:25 +00:00
key = self._pair_trades_key(pair)
filename = self._pair_trades_filename(self._datadir, pair)
if not filename.exists():
return []
where = []
if timerange:
if timerange.starttype == 'date':
where.append(f"timestamp >= {timerange.startts * 1e3}")
if timerange.stoptype == 'date':
where.append(f"timestamp < {timerange.stopts * 1e3}")
trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where)
trades[['id', 'type']] = trades[['id', 'type']].replace({np.nan: None})
2020-07-12 18:41:25 +00:00
return trades.values.tolist()
@classmethod
def _get_file_extension(cls):
return "h5"
@classmethod
2020-07-25 15:19:41 +00:00
def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str:
# Escape futures pairs to avoid warnings
pair_esc = pair.replace(':', '_')
return f"{pair_esc}/ohlcv/tf_{timeframe}"
@classmethod
2020-07-25 15:19:41 +00:00
def _pair_trades_key(cls, pair: str) -> str:
return f"{pair}/trades"