stable/freqtrade/data/history/hdf5datahandler.py

import logging
import re
from pathlib import Path
from typing import List, Optional

import numpy as np
import pandas as pd

from freqtrade.configuration import TimeRange
from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS,
                                 ListPairsWithTimeframes, TradeList)
from freqtrade.enums import CandleType, TradingMode

from .idatahandler import IDataHandler


logger = logging.getLogger(__name__)


class HDF5DataHandler(IDataHandler):

    _columns = DEFAULT_DATAFRAME_COLUMNS

    @classmethod
    def ohlcv_get_available_data(
            cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        :param datadir: Directory to search for ohlcv files
        :param trading_mode: trading-mode to be used
        :return: List of Tuples of (pair, timeframe)
        """
        if trading_mode == TradingMode.FUTURES:
            datadir = datadir.joinpath('futures')
        _tmp = [
            re.search(
                cls._OHLCV_REGEX, p.name
            ) for p in datadir.glob("*.h5")
        ]
        return [
            (
                cls.rebuild_pair_from_filename(match[1]),
                cls.rebuild_timeframe_from_filename(match[2]),
                CandleType.from_string(match[3])
            ) for match in _tmp if match and len(match.groups()) > 1]

    @classmethod
    def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        for the specified timeframe
        :param datadir: Directory to search for ohlcv files
        :param timeframe: Timeframe to search pairs for
        :param candle_type: Any of the enum CandleType (must match trading mode!)
        :return: List of Pairs
        """
        candle = ""
        if candle_type != CandleType.SPOT:
            datadir = datadir.joinpath('futures')
            candle = f"-{candle_type}"

        _tmp = [re.search(r'^(\S+)(?=\-' + timeframe + candle + '.h5)', p.name)
                for p in datadir.glob(f"*{timeframe}{candle}.h5")]
        # Check if regex found something and only return these results
        return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]

    def ohlcv_store(
            self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType) -> None:
        """
        Store data in hdf5 file.
        :param pair: Pair - used to generate filename
        :param timeframe: Timeframe - used to generate filename
        :param data: Dataframe containing OHLCV data
        :param candle_type: Any of the enum CandleType (must match trading mode!)
        :return: None
        """
        key = self._pair_ohlcv_key(pair, timeframe)
        _data = data.copy()

        filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
        self.create_dir_if_needed(filename)

        _data.loc[:, self._columns].to_hdf(
            filename, key, mode='a', complevel=9, complib='blosc',
            format='table', data_columns=['date']
        )

    def _ohlcv_load(self, pair: str, timeframe: str,
                    timerange: Optional[TimeRange], candle_type: CandleType
                    ) -> pd.DataFrame:
        """
        Internal method used to load data for one pair from disk.
        Implements the loading and conversion to a Pandas dataframe.
        Timerange trimming and dataframe validation happens outside of this method.
        :param pair: Pair to load data
        :param timeframe: Timeframe (e.g. "5m")
        :param timerange: Limit data to be loaded to this timerange.
                        Optionally implemented by subclasses to avoid loading
                        all data where possible.
        :param candle_type: Any of the enum CandleType (must match trading mode!)
        :return: DataFrame with ohlcv data, or empty DataFrame
        """
        key = self._pair_ohlcv_key(pair, timeframe)
        filename = self._pair_data_filename(
            self._datadir,
            pair,
            timeframe,
            candle_type=candle_type
        )

        if not filename.exists():
            # Fallback mode for 1M files
            filename = self._pair_data_filename(
                self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
            if not filename.exists():
                return pd.DataFrame(columns=self._columns)
        where = []
        if timerange:
            if timerange.starttype == 'date':
                where.append(f"date >= Timestamp({timerange.startts * 1e9})")
            if timerange.stoptype == 'date':
                where.append(f"date <= Timestamp({timerange.stopts * 1e9})")

        pairdata = pd.read_hdf(filename, key=key, mode="r", where=where)

        if list(pairdata.columns) != self._columns:
            raise ValueError("Wrong dataframe format")
        pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
                                          'low': 'float', 'close': 'float', 'volume': 'float'})
        return pairdata

    def ohlcv_append(
        self,
        pair: str,
        timeframe: str,
        data: pd.DataFrame,
        candle_type: CandleType
    ) -> None:
        """
        Append data to existing data structures
        :param pair: Pair
        :param timeframe: Timeframe this ohlcv data is for
        :param data: Data to append.
        :param candle_type: Any of the enum CandleType (must match trading mode!)
        """
        raise NotImplementedError()

    @classmethod
    def trades_get_pairs(cls, datadir: Path) -> List[str]:
        """
        Returns a list of all pairs for which trade data is available in this
        :param datadir: Directory to search for ohlcv files
        :return: List of Pairs
        """
        _tmp = [re.search(r'^(\S+)(?=\-trades.h5)', p.name)
                for p in datadir.glob("*trades.h5")]
        # Check if regex found something and only return these results to avoid exceptions.
        return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]

    def trades_store(self, pair: str, data: TradeList) -> None:
        """
        Store trades data (list of Dicts) to file
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        key = self._pair_trades_key(pair)

        pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS).to_hdf(
            self._pair_trades_filename(self._datadir, pair), key,
            mode='a', complevel=9, complib='blosc',
            format='table', data_columns=['timestamp']
        )

    def trades_append(self, pair: str, data: TradeList):
        """
        Append data to existing files
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        raise NotImplementedError()

    def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
        """
        Load a pair from h5 file.
        :param pair: Load trades for this pair
        :param timerange: Timerange to load trades for - currently not implemented
        :return: List of trades
        """
        key = self._pair_trades_key(pair)
        filename = self._pair_trades_filename(self._datadir, pair)

        if not filename.exists():
            return []
        where = []
        if timerange:
            if timerange.starttype == 'date':
                where.append(f"timestamp >= {timerange.startts * 1e3}")
            if timerange.stoptype == 'date':
                where.append(f"timestamp < {timerange.stopts * 1e3}")

        trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where)
        trades[['id', 'type']] = trades[['id', 'type']].replace({np.nan: None})
        return trades.values.tolist()

    @classmethod
    def _get_file_extension(cls):
        return "h5"

    @classmethod
    def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str:
        # Escape futures pairs to avoid warnings
        pair_esc = pair.replace(':', '_')
        return f"{pair_esc}/ohlcv/tf_{timeframe}"

    @classmethod
    def _pair_trades_key(cls, pair: str) -> str:
        return f"{pair}/trades"
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`import logging`
			`import re`
			`from pathlib import Path`
			`from typing import List, Optional`

Convert np to None when loading hdf5 trades to allow duplicate detection 2020-11-19 06:30:28 +00:00			`import numpy as np`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`import pandas as pd`

			`from freqtrade.configuration import TimeRange`
Apply isort to freqtrade codebase 2020-09-28 17:39:41 +00:00			`from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS,`
Fix #3967, move TradeList type to constants 2020-11-21 09:52:15 +00:00			`ListPairsWithTimeframes, TradeList)`
Use parsed TradingMode from config 2022-03-03 06:06:13 +00:00			`from freqtrade.enums import CandleType, TradingMode`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
Fix #3967, move TradeList type to constants 2020-11-21 09:52:15 +00:00			`from .idatahandler import IDataHandler`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
Apply isort to freqtrade codebase 2020-09-28 17:39:41 +00:00
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`logger = logging.getLogger(__name__)`


Rename hdf5handler to hdf5DataHandler 2020-07-24 17:23:37 +00:00			`class HDF5DataHandler(IDataHandler):`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`_columns = DEFAULT_DATAFRAME_COLUMNS`

Implement get_available_data 2020-07-24 15:30:16 +00:00			`@classmethod`
Use parsed TradingMode from config 2022-03-03 06:06:13 +00:00			`def ohlcv_get_available_data(`
			`cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes:`
Implement get_available_data 2020-07-24 15:30:16 +00:00			`"""`
			`Returns a list of all pairs with ohlcv data available in this datadir`
			`:param datadir: Directory to search for ohlcv files`
Futures candles should go into a subdirectory 2021-12-03 06:04:53 +00:00			`:param trading_mode: trading-mode to be used`
Implement get_available_data 2020-07-24 15:30:16 +00:00			`:return: List of Tuples of (pair, timeframe)`
			`"""`
Use parsed TradingMode from config 2022-03-03 06:06:13 +00:00			`if trading_mode == TradingMode.FUTURES:`
Futures candles should go into a subdirectory 2021-12-03 06:04:53 +00:00			`datadir = datadir.joinpath('futures')`
Added candle type to ohlcv_get_available_data 2021-11-21 04:46:47 +00:00			`_tmp = [`
			`re.search(`
Add explicit tests for ohlcv regex 2021-11-28 13:33:46 +00:00			`cls._OHLCV_REGEX, p.name`
Added candle type to ohlcv_get_available_data 2021-11-21 04:46:47 +00:00			`) for p in datadir.glob("*.h5")`
			`]`
More candletype changes 2021-12-03 12:04:31 +00:00			`return [`
			`(`
			`cls.rebuild_pair_from_filename(match[1]),`
Store monthly candles as "Mo" 2022-05-01 15:00:00 +00:00			`cls.rebuild_timeframe_from_filename(match[2]),`
More candletype changes 2021-12-03 12:04:31 +00:00			`CandleType.from_string(match[3])`
			`) for match in _tmp if match and len(match.groups()) > 1]`
Implement get_available_data 2020-07-24 15:30:16 +00:00
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`@classmethod`
Remove unnecessary default parameters 2021-12-07 19:30:58 +00:00			`def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]:`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`"""`
			`Returns a list of all pairs with ohlcv data available in this datadir`
			`for the specified timeframe`
			`:param datadir: Directory to search for ohlcv files`
			`:param timeframe: Timeframe to search pairs for`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`:param candle_type: Any of the enum CandleType (must match trading mode!)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`:return: List of Pairs`
			`"""`
Add candleType enum 2021-12-03 11:12:33 +00:00			`candle = ""`
Remove SPOT_ candletype 2021-12-08 13:35:15 +00:00			`if candle_type != CandleType.SPOT:`
Align tests to have futures data in futures/ directory 2021-12-03 06:20:00 +00:00			`datadir = datadir.joinpath('futures')`
Add candleType enum 2021-12-03 11:12:33 +00:00			`candle = f"-{candle_type}"`
updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00
Add candleType enum 2021-12-03 11:12:33 +00:00			`_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + candle + '.h5)', p.name)`
			`for p in datadir.glob(f"*{timeframe}{candle}.h5")]`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`# Check if regex found something and only return these results`
Use pair-reconstruction method wherever possible 2021-12-07 19:12:44 +00:00			`return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00			`def ohlcv_store(`
Remove unnecessary default parameters 2021-12-07 19:30:58 +00:00			`self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType) -> None:`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`"""`
			`Store data in hdf5 file.`
			`:param pair: Pair - used to generate filename`
unexpected docstring params 2021-06-25 17:13:31 +00:00			`:param timeframe: Timeframe - used to generate filename`
			`:param data: Dataframe containing OHLCV data`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`:param candle_type: Any of the enum CandleType (must match trading mode!)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`:return: None`
			`"""`
			`key = self._pair_ohlcv_key(pair, timeframe)`
			`_data = data.copy()`

Simplify timeframe-transition 2022-05-16 17:53:01 +00:00			`filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)`
hdf5 datahandler should also create directory 2022-03-02 18:41:14 +00:00			`self.create_dir_if_needed(filename)`
Document hdf5 dataformat 2020-07-25 15:06:58 +00:00
use to_hdf instead of HDFStore 2021-12-01 19:32:23 +00:00			`_data.loc[:, self._columns].to_hdf(`
			`filename, key, mode='a', complevel=9, complib='blosc',`
			`format='table', data_columns=['date']`
			`)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`def _ohlcv_load(self, pair: str, timeframe: str,`
Remove unnecessary default parameters 2021-12-07 19:30:58 +00:00			`timerange: Optional[TimeRange], candle_type: CandleType`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`) -> pd.DataFrame:`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`"""`
			`Internal method used to load data for one pair from disk.`
			`Implements the loading and conversion to a Pandas dataframe.`
			`Timerange trimming and dataframe validation happens outside of this method.`
			`:param pair: Pair to load data`
			`:param timeframe: Timeframe (e.g. "5m")`
			`:param timerange: Limit data to be loaded to this timerange.`
			`Optionally implemented by subclasses to avoid loading`
			`all data where possible.`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`:param candle_type: Any of the enum CandleType (must match trading mode!)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`:return: DataFrame with ohlcv data, or empty DataFrame`
			`"""`
			`key = self._pair_ohlcv_key(pair, timeframe)`
updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00			`filename = self._pair_data_filename(`
			`self._datadir,`
			`pair,`
Simplify timeframe-transition 2022-05-16 17:53:01 +00:00			`timeframe,`
updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00			`candle_type=candle_type`
			`)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`if not filename.exists():`
Add fallback to load 1M files as well as 1Mo files 2022-05-01 17:51:25 +00:00			`# Fallback mode for 1M files`
			`filename = self._pair_data_filename(`
Simplify timeframe-transition 2022-05-16 17:53:01 +00:00			`self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)`
Add fallback to load 1M files as well as 1Mo files 2022-05-01 17:51:25 +00:00			`if not filename.exists():`
			`return pd.DataFrame(columns=self._columns)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`where = []`
			`if timerange:`
			`if timerange.starttype == 'date':`
			`where.append(f"date >= Timestamp({timerange.startts * 1e9})")`
			`if timerange.stoptype == 'date':`
hdf5 handler should include the end-date 2021-04-24 18:26:37 +00:00			`where.append(f"date <= Timestamp({timerange.stopts * 1e9})")`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`pairdata = pd.read_hdf(filename, key=key, mode="r", where=where)`

			`if list(pairdata.columns) != self._columns:`
			`raise ValueError("Wrong dataframe format")`
			`pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',`
			`'low': 'float', 'close': 'float', 'volume': 'float'})`
			`return pairdata`

updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00			`def ohlcv_append(`
			`self,`
			`pair: str,`
			`timeframe: str,`
			`data: pd.DataFrame,`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`candle_type: CandleType`
updated historic data filenames to include the candle type 2021-11-07 06:35:27 +00:00			`) -> None:`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`"""`
			`Append data to existing data structures`
			`:param pair: Pair`
			`:param timeframe: Timeframe this ohlcv data is for`
			`:param data: Data to append.`
Candle_type to enum 2021-12-03 11:23:35 +00:00			`:param candle_type: Any of the enum CandleType (must match trading mode!)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`"""`
			`raise NotImplementedError()`

			`@classmethod`
			`def trades_get_pairs(cls, datadir: Path) -> List[str]:`
			`"""`
			`Returns a list of all pairs for which trade data is available in this`
			`:param datadir: Directory to search for ohlcv files`
			`:return: List of Pairs`
			`"""`
			`_tmp = [re.search(r'^(\S+)(?=\-trades.h5)', p.name)`
Add trades_load method 2020-07-12 18:41:25 +00:00			`for p in datadir.glob("*trades.h5")]`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`# Check if regex found something and only return these results to avoid exceptions.`
Use pair-reconstruction method wherever possible 2021-12-07 19:12:44 +00:00			`return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`def trades_store(self, pair: str, data: TradeList) -> None:`
			`"""`
			`Store trades data (list of Dicts) to file`
			`:param pair: Pair - used for filename`
			`:param data: List of Lists containing trade data,`
			`column sequence as in DEFAULT_TRADES_COLUMNS`
			`"""`
			`key = self._pair_trades_key(pair)`
Document hdf5 dataformat 2020-07-25 15:06:58 +00:00
use to_hdf instead of HDFStore 2021-12-01 19:32:23 +00:00			`pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS).to_hdf(`
			`self._pair_trades_filename(self._datadir, pair), key,`
			`mode='a', complevel=9, complib='blosc',`
			`format='table', data_columns=['timestamp']`
			`)`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`def trades_append(self, pair: str, data: TradeList):`
			`"""`
			`Append data to existing files`
			`:param pair: Pair - used for filename`
			`:param data: List of Lists containing trade data,`
			`column sequence as in DEFAULT_TRADES_COLUMNS`
			`"""`
			`raise NotImplementedError()`

			`def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:`
			`"""`
Add trades_load method 2020-07-12 18:41:25 +00:00			`Load a pair from h5 file.`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`:param pair: Load trades for this pair`
			`:param timerange: Timerange to load trades for - currently not implemented`
			`:return: List of trades`
			`"""`
Add trades_load method 2020-07-12 18:41:25 +00:00			`key = self._pair_trades_key(pair)`
			`filename = self._pair_trades_filename(self._datadir, pair)`

			`if not filename.exists():`
			`return []`
			`where = []`
			`if timerange:`
			`if timerange.starttype == 'date':`
			`where.append(f"timestamp >= {timerange.startts * 1e3}")`
			`if timerange.stoptype == 'date':`
			`where.append(f"timestamp < {timerange.stopts * 1e3}")`

Convert np to None when loading hdf5 trades to allow duplicate detection 2020-11-19 06:30:28 +00:00			`trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where)`
			`trades[['id', 'type']] = trades[['id', 'type']].replace({np.nan: None})`
Add trades_load method 2020-07-12 18:41:25 +00:00			`return trades.values.tolist()`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
Simplify datahandler classes by exploiting commonalities 2021-12-02 19:19:22 +00:00			`@classmethod`
			`def _get_file_extension(cls):`
			`return "h5"`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`@classmethod`
Fix mypy error 2020-07-25 15:19:41 +00:00			`def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str:`
Update hdf5 to not raise naturalNaming warnings 2022-03-27 14:38:12 +00:00			`# Escape futures pairs to avoid warnings`
			`pair_esc = pair.replace(':', '_')`
			`return f"{pair_esc}/ohlcv/tf_{timeframe}"`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00
			`@classmethod`
Fix mypy error 2020-07-25 15:19:41 +00:00			`def _pair_trades_key(cls, pair: str) -> str:`
First version of hdf5handler - no proper support for trades yet 2020-07-12 18:17:21 +00:00			`return f"{pair}/trades"`