diff --git a/freqtrade/data/history/hdf5datahandler.py b/freqtrade/data/history/hdf5datahandler.py new file mode 100644 index 000000000..21f521cc3 --- /dev/null +++ b/freqtrade/data/history/hdf5datahandler.py @@ -0,0 +1,184 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional + +import pandas as pd + +from freqtrade import misc +from freqtrade.configuration import TimeRange +from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS + +from .idatahandler import IDataHandler, TradeList + +logger = logging.getLogger(__name__) + + +class HDF5Handler(IDataHandler): + + _columns = DEFAULT_DATAFRAME_COLUMNS + + @classmethod + def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]: + """ + Returns a list of all pairs with ohlcv data available in this datadir + for the specified timeframe + :param datadir: Directory to search for ohlcv files + :param timeframe: Timeframe to search pairs for + :return: List of Pairs + """ + + _tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.h5)', p.name) + for p in datadir.glob(f"*{timeframe}.{cls._get_file_extension()}")] + # Check if regex found something and only return these results + return [match[0].replace('_', '/') for match in _tmp if match] + + def ohlcv_store(self, pair: str, timeframe: str, data: pd.DataFrame) -> None: + """ + Store data in hdf5 file. + :param pair: Pair - used to generate filename + :timeframe: Timeframe - used to generate filename + :data: Dataframe containing OHLCV data + :return: None + """ + key = self._pair_ohlcv_key(pair, timeframe) + _data = data.copy() + # Convert date to int + # _data['date'] = _data['date'].astype(np.int64) // 1000 // 1000 + + filename = self._pair_data_filename(self._datadir, pair, timeframe) + ds = pd.HDFStore(filename, mode='a', complevel=9) + ds.put(key, _data.loc[:, self._columns], format='table', data_columns=['date']) + + ds.close() + + def _ohlcv_load(self, pair: str, timeframe: str, + timerange: Optional[TimeRange] = None) -> pd.DataFrame: + """ + Internal method used to load data for one pair from disk. + Implements the loading and conversion to a Pandas dataframe. + Timerange trimming and dataframe validation happens outside of this method. + :param pair: Pair to load data + :param timeframe: Timeframe (e.g. "5m") + :param timerange: Limit data to be loaded to this timerange. + Optionally implemented by subclasses to avoid loading + all data where possible. + :return: DataFrame with ohlcv data, or empty DataFrame + """ + key = self._pair_ohlcv_key(pair, timeframe) + filename = self._pair_data_filename(self._datadir, pair, timeframe) + + if not filename.exists(): + return pd.DataFrame(columns=self._columns) + where = [] + if timerange: + if timerange.starttype == 'date': + where.append(f"date >= Timestamp({timerange.startts * 1e9})") + if timerange.stoptype == 'date': + where.append(f"date < Timestamp({timerange.stopts * 1e9})") + + pairdata = pd.read_hdf(filename, key=key, mode="r", where=where) + + if list(pairdata.columns) != self._columns: + raise ValueError("Wrong dataframe format") + pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', + 'low': 'float', 'close': 'float', 'volume': 'float'}) + return pairdata + + def ohlcv_purge(self, pair: str, timeframe: str) -> bool: + """ + Remove data for this pair + :param pair: Delete data for this pair. + :param timeframe: Timeframe (e.g. "5m") + :return: True when deleted, false if file did not exist. + """ + filename = self._pair_data_filename(self._datadir, pair, timeframe) + if filename.exists(): + filename.unlink() + return True + return False + + def ohlcv_append(self, pair: str, timeframe: str, data: pd.DataFrame) -> None: + """ + Append data to existing data structures + :param pair: Pair + :param timeframe: Timeframe this ohlcv data is for + :param data: Data to append. + """ + raise NotImplementedError() + + @classmethod + def trades_get_pairs(cls, datadir: Path) -> List[str]: + """ + Returns a list of all pairs for which trade data is available in this + :param datadir: Directory to search for ohlcv files + :return: List of Pairs + """ + _tmp = [re.search(r'^(\S+)(?=\-trades.h5)', p.name) + for p in datadir.glob(f"*trades.{cls._get_file_extension()}")] + # Check if regex found something and only return these results to avoid exceptions. + return [match[0].replace('_', '/') for match in _tmp if match] + + def trades_store(self, pair: str, data: TradeList) -> None: + """ + Store trades data (list of Dicts) to file + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + key = self._pair_trades_key(pair) + ds = pd.HDFStore(self.filename_trades, mode='a', complevel=9) + ds.put(key, pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS), + format='table', data_columns=['timestamp']) + ds.close() + + def trades_append(self, pair: str, data: TradeList): + """ + Append data to existing files + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + raise NotImplementedError() + + def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList: + """ + Load a pair from file, either .json.gz or .json + # TODO: respect timerange ... + :param pair: Load trades for this pair + :param timerange: Timerange to load trades for - currently not implemented + :return: List of trades + """ + raise NotImplementedError() + + def trades_purge(self, pair: str) -> bool: + """ + Remove data for this pair + :param pair: Delete data for this pair. + :return: True when deleted, false if file did not exist. + """ + filename = self._pair_trades_filename(self._datadir, pair) + if filename.exists(): + filename.unlink() + return True + return False + + @classmethod + def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> Path: + return f"{pair}/ohlcv/tf_{timeframe}" + + @classmethod + def _pair_trades_key(cls, pair: str) -> Path: + return f"{pair}/trades" + + @classmethod + def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path: + pair_s = misc.pair_to_filename(pair) + filename = datadir.joinpath(f'{pair_s}-{timeframe}.h5') + return filename + + @classmethod + def _pair_trades_filename(cls, datadir: Path, pair: str) -> Path: + pair_s = misc.pair_to_filename(pair) + filename = datadir.joinpath(f'{pair_s}-trades.h5') + return filename diff --git a/freqtrade/data/history/idatahandler.py b/freqtrade/data/history/idatahandler.py index 96d288e01..be3e34e04 100644 --- a/freqtrade/data/history/idatahandler.py +++ b/freqtrade/data/history/idatahandler.py @@ -50,9 +50,7 @@ class IDataHandler(ABC): @abstractmethod def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None: """ - Store data in json format "values". - format looks as follows: - [[,,,,]] + Store ohlcv data. :param pair: Pair - used to generate filename :timeframe: Timeframe - used to generate filename :data: Dataframe containing OHLCV data @@ -239,6 +237,9 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]: elif datatype == 'jsongz': from .jsondatahandler import JsonGzDataHandler return JsonGzDataHandler + elif datatype == 'hdf5': + from .hdf5datahandler import HDF5Handler + return HDF5Handler else: raise ValueError(f"No datahandler for datatype {datatype} available.")