Merge pull request #3626 from freqtrade/feat/hdf5

Introduce HDF5 Datahandler
2020-08-31 16:10:24 +02:00
parent 8b664644c0 24df8d6bf5
commit 38c52c7eee
12 changed files with 450 additions and 62 deletions
@@ -15,61 +15,91 @@ Otherwise `--exchange` becomes mandatory.
 ### Usage
 ```
-usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]]
+usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
-                               [--pairs-file FILE] [--days INT] [--dl-trades] [--exchange EXCHANGE]
+                               [-d PATH] [--userdir PATH]
                               [-p PAIRS [PAIRS ...]] [--pairs-file FILE]
                               [--days INT] [--dl-trades]
                               [--exchange EXCHANGE]
                               [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...]]
-                               [--erase] [--data-format-ohlcv {json,jsongz}] [--data-format-trades {json,jsongz}]
+                               [--erase]
                               [--data-format-ohlcv {json,jsongz,hdf5}]
                               [--data-format-trades {json,jsongz,hdf5}]
 optional arguments:
  -h, --help            show this help message and exit
  -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
-                        Show profits for only these pairs. Pairs are space-separated.
+                        Show profits for only these pairs. Pairs are space-
                        separated.
  --pairs-file FILE     File containing a list of pairs to download.
  --days INT            Download data for given number of days.
-  --dl-trades           Download trades instead of OHLCV data. The bot will resample trades to the desired timeframe as specified as
+  --dl-trades           Download trades instead of OHLCV data. The bot will
-                        --timeframes/-t.
+                        resample trades to the desired timeframe as specified
-  --exchange EXCHANGE   Exchange name (default: `bittrex`). Only valid if no config is provided.
+                        as --timeframes/-t.
  --exchange EXCHANGE   Exchange name (default: `bittrex`). Only valid if no
                        config is provided.
  -t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...]
-                        Specify which tickers to download. Space-separated list. Default: `1m 5m`.
+                        Specify which tickers to download. Space-separated
-  --erase               Clean all existing data for the selected exchange/pairs/timeframes.
+                        list. Default: `1m 5m`.
-  --data-format-ohlcv {json,jsongz}
+  --erase               Clean all existing data for the selected
-                        Storage format for downloaded candle (OHLCV) data. (default: `json`).
+                        exchange/pairs/timeframes.
-  --data-format-trades {json,jsongz}
+  --data-format-ohlcv {json,jsongz,hdf5}
-                        Storage format for downloaded trades data. (default: `jsongz`).
+                        Storage format for downloaded candle (OHLCV) data.
                        (default: `json`).
  --data-format-trades {json,jsongz,hdf5}
                        Storage format for downloaded trades data. (default:
                        `jsongz`).
 Common arguments:
  -v, --verbose         Verbose mode (-vv for more, -vvv to get all messages).
-  --logfile FILE        Log to the file specified. Special values are: 'syslog', 'journald'. See the documentation for more details.
+  --logfile FILE        Log to the file specified. Special values are:
                        'syslog', 'journald'. See the documentation for more
                        details.
  -V, --version         show program's version number and exit
  -c PATH, --config PATH
-                        Specify configuration file (default: `config.json`). Multiple --config options may be used. Can be set to `-`
+                        Specify configuration file (default:
-                        to read config from stdin.
+                        `userdir/config.json` or `config.json` whichever
                        exists). Multiple --config options may be used. Can be
                        set to `-` to read config from stdin.
  -d PATH, --datadir PATH
                        Path to directory with historical backtesting data.
  --userdir PATH, --user-data-dir PATH
                        Path to userdata directory.
 ```
 ### Data format
-Freqtrade currently supports 2 dataformats, `json` (plain "text" json files) and `jsongz` (a gzipped version of json files).
+Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
 * `json` (plain "text" json files)
 * `jsongz` (a gzip-zipped version of json files)
 * `hdf5` (a high performance datastore)
 By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
-This can be changed via the `--data-format-ohlcv` and `--data-format-trades` parameters respectivly.
+This can be changed via the `--data-format-ohlcv` and `--data-format-trades` command line arguments respectively.
 To persist this change, you can should also add the following snippet to your configuration, so you don't have to insert the above arguments each time:
-If the default dataformat has been changed during download, then the keys `dataformat_ohlcv` and `dataformat_trades` in the configuration file need to be adjusted to the selected dataformat as well.
+``` jsonc
    // ...
    "dataformat_ohlcv": "hdf5",
    "dataformat_trades": "hdf5",
    // ...
 ```
 If the default data-format has been changed during download, then the keys `dataformat_ohlcv` and `dataformat_trades` in the configuration file need to be adjusted to the selected dataformat as well.
 !!! Note
-    You can convert between data-formats using the [convert-data](#subcommand-convert-data) and [convert-trade-data](#subcommand-convert-trade-data) methods.
+    You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
-#### Subcommand convert data
+#### Sub-command convert data
 ```
 usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
                              [-d PATH] [--userdir PATH]
                              [-p PAIRS [PAIRS ...]] --format-from
-                              {json,jsongz} --format-to {json,jsongz}
+                              {json,jsongz,hdf5} --format-to
-                              [--erase]
+                              {json,jsongz,hdf5} [--erase]
                              [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...]]
 optional arguments:
@@ -77,9 +107,9 @@ optional arguments:
  -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
                        Show profits for only these pairs. Pairs are space-
                        separated.
-  --format-from {json,jsongz}
+  --format-from {json,jsongz,hdf5}
                        Source format for data conversion.
-  --format-to {json,jsongz}
+  --format-to {json,jsongz,hdf5}
                        Destination format for data conversion.
  --erase               Clean all existing data for the selected
                        exchange/pairs/timeframes.
@@ -94,9 +124,10 @@ Common arguments:
                        details.
  -V, --version         show program's version number and exit
  -c PATH, --config PATH
-                        Specify configuration file (default: `config.json`).
+                        Specify configuration file (default:
-                        Multiple --config options may be used. Can be set to
+                        `userdir/config.json` or `config.json` whichever
-                        `-` to read config from stdin.
+                        exists). Multiple --config options may be used. Can be
                        set to `-` to read config from stdin.
  -d PATH, --datadir PATH
                        Path to directory with historical backtesting data.
  --userdir PATH, --user-data-dir PATH
@@ -112,23 +143,23 @@ It'll also remove original json data files (`--erase` parameter).
 freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtrade/data/binance -t 5m 15m --erase
 ```
-#### Subcommand convert-trade data
+#### Sub-command convert trade data
 ```
 usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
                                    [-d PATH] [--userdir PATH]
                                    [-p PAIRS [PAIRS ...]] --format-from
-                                    {json,jsongz} --format-to {json,jsongz}
+                                    {json,jsongz,hdf5} --format-to
-                                    [--erase]
+                                    {json,jsongz,hdf5} [--erase]
 optional arguments:
  -h, --help            show this help message and exit
  -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
                        Show profits for only these pairs. Pairs are space-
                        separated.
-  --format-from {json,jsongz}
+  --format-from {json,jsongz,hdf5}
                        Source format for data conversion.
-  --format-to {json,jsongz}
+  --format-to {json,jsongz,hdf5}
                        Destination format for data conversion.
  --erase               Clean all existing data for the selected
                        exchange/pairs/timeframes.
@@ -140,13 +171,15 @@ Common arguments:
                        details.
  -V, --version         show program's version number and exit
  -c PATH, --config PATH
-                        Specify configuration file (default: `config.json`).
+                        Specify configuration file (default:
-                        Multiple --config options may be used. Can be set to
+                        `userdir/config.json` or `config.json` whichever
-                        `-` to read config from stdin.
+                        exists). Multiple --config options may be used. Can be
                        set to `-` to read config from stdin.
  -d PATH, --datadir PATH
                        Path to directory with historical backtesting data.
  --userdir PATH, --user-data-dir PATH
                        Path to userdata directory.
 ```
 ##### Example converting trades
@@ -158,21 +191,21 @@ It'll also remove original jsongz data files (`--erase` parameter).
 freqtrade convert-trade-data --format-from jsongz --format-to json --datadir ~/.freqtrade/data/kraken --erase
 ```
-### Subcommand list-data
+### Sub-command list-data
-You can get a list of downloaded data using the `list-data` subcommand.
+You can get a list of downloaded data using the `list-data` sub-command.
 ```
 usage: freqtrade list-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH]
                           [--userdir PATH] [--exchange EXCHANGE]
-                           [--data-format-ohlcv {json,jsongz}]
+                           [--data-format-ohlcv {json,jsongz,hdf5}]
                           [-p PAIRS [PAIRS ...]]
 optional arguments:
  -h, --help            show this help message and exit
  --exchange EXCHANGE   Exchange name (default: `bittrex`). Only valid if no
                        config is provided.
-  --data-format-ohlcv {json,jsongz}
+  --data-format-ohlcv {json,jsongz,hdf5}
                        Storage format for downloaded candle (OHLCV) data.
                        (default: `json`).
  -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
@@ -194,6 +227,7 @@ Common arguments:
                        Path to directory with historical backtesting data.
  --userdir PATH, --user-data-dir PATH
                        Path to userdata directory.
 ```
 #### Example list-data
@@ -249,7 +283,7 @@ This will download historical candle (OHLCV) data for all the currency pairs you
 ### Other Notes
 - To use a different directory than the exchange specific default, use `--datadir user_data/data/some_directory`.
- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust ratelimits etc.)
+- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.)
 - To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`.
 - To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days).
 - Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data.
@@ -257,7 +291,7 @@ This will download historical candle (OHLCV) data for all the currency pairs you
 ### Trades (tick) data
-By default, `download-data` subcommand downloads Candles (OHLCV) data. Some exchanges also provide historic trade-data via their API.
+By default, `download-data` sub-command downloads Candles (OHLCV) data. Some exchanges also provide historic trade-data via their API.
 This data can be useful if you need many different timeframes, since it is only downloaded once, and then resampled locally to the desired timeframes.
 Since this data is large by default, the files use gzip by default. They are stored in your data-directory with the naming convention of `<pair>-trades.json.gz` (`ETH_BTC-trades.json.gz`). Incremental mode is also supported, as for historic OHLCV data, so downloading the data once per week with `--days 8` will create an incremental data-repository.
@@ -15,7 +15,7 @@ ARGS_STRATEGY = ["strategy", "strategy_path"]
 ARGS_TRADE = ["db_url", "sd_notify", "dry_run"]
-ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange",
+ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv",
                        "max_open_trades", "stake_amount", "fee"]
 ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions",
@@ -24,7 +24,7 @@ ORDERTIF_POSSIBILITIES = ['gtc', 'fok', 'ioc']
 AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList',
                       'AgeFilter', 'PrecisionFilter', 'PriceFilter',
                       'ShuffleFilter', 'SpreadFilter']
-AVAILABLE_DATAHANDLERS = ['json', 'jsongz']
+AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5']
 DRY_RUN_WALLET = 1000
 DATETIME_PRINT_FORMAT = '%Y-%m-%d %H:%M:%S'
 MATH_CLOSE_PREC = 1e-14  # Precision used for float comparisons
@@ -255,7 +255,8 @@ def convert_ohlcv_format(config: Dict[str, Any], convert_from: str, convert_to:
                                  drop_incomplete=False,
                                  startup_candles=0)
            logger.info(f"Converting {len(data)} candles for {pair}")
-            trg.ohlcv_store(pair=pair, timeframe=timeframe, data=data)
+            if len(data) > 0:
-            if erase and convert_from != convert_to:
+                trg.ohlcv_store(pair=pair, timeframe=timeframe, data=data)
-                logger.info(f"Deleting source data for {pair} / {timeframe}")
+                if erase and convert_from != convert_to:
-                src.ohlcv_purge(pair=pair, timeframe=timeframe)
+                    logger.info(f"Deleting source data for {pair} / {timeframe}")
                    src.ohlcv_purge(pair=pair, timeframe=timeframe)
@@ -0,0 +1,211 @@
 import logging
 import re
 from pathlib import Path
 from typing import List, Optional
 import pandas as pd
 from freqtrade import misc
 from freqtrade.configuration import TimeRange
 from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS,
                                 DEFAULT_TRADES_COLUMNS,
                                 ListPairsWithTimeframes)
 from .idatahandler import IDataHandler, TradeList
 logger = logging.getLogger(__name__)
 class HDF5DataHandler(IDataHandler):
    _columns = DEFAULT_DATAFRAME_COLUMNS
    @classmethod
    def ohlcv_get_available_data(cls, datadir: Path) -> ListPairsWithTimeframes:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        :param datadir: Directory to search for ohlcv files
        :return: List of Tuples of (pair, timeframe)
        """
        _tmp = [re.search(r'^([a-zA-Z_]+)\-(\d+\S+)(?=.h5)', p.name)
                for p in datadir.glob("*.h5")]
        return [(match[1].replace('_', '/'), match[2]) for match in _tmp
                if match and len(match.groups()) > 1]
    @classmethod
    def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        for the specified timeframe
        :param datadir: Directory to search for ohlcv files
        :param timeframe: Timeframe to search pairs for
        :return: List of Pairs
        """
        _tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.h5)', p.name)
                for p in datadir.glob(f"*{timeframe}.h5")]
        # Check if regex found something and only return these results
        return [match[0].replace('_', '/') for match in _tmp if match]
    def ohlcv_store(self, pair: str, timeframe: str, data: pd.DataFrame) -> None:
        """
        Store data in hdf5 file.
        :param pair: Pair - used to generate filename
        :timeframe: Timeframe - used to generate filename
        :data: Dataframe containing OHLCV data
        :return: None
        """
        key = self._pair_ohlcv_key(pair, timeframe)
        _data = data.copy()
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        ds = pd.HDFStore(filename, mode='a', complevel=9, complib='blosc')
        ds.put(key, _data.loc[:, self._columns], format='table', data_columns=['date'])
        ds.close()
    def _ohlcv_load(self, pair: str, timeframe: str,
                    timerange: Optional[TimeRange] = None) -> pd.DataFrame:
        """
        Internal method used to load data for one pair from disk.
        Implements the loading and conversion to a Pandas dataframe.
        Timerange trimming and dataframe validation happens outside of this method.
        :param pair: Pair to load data
        :param timeframe: Timeframe (e.g. "5m")
        :param timerange: Limit data to be loaded to this timerange.
                        Optionally implemented by subclasses to avoid loading
                        all data where possible.
        :return: DataFrame with ohlcv data, or empty DataFrame
        """
        key = self._pair_ohlcv_key(pair, timeframe)
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        if not filename.exists():
            return pd.DataFrame(columns=self._columns)
        where = []
        if timerange:
            if timerange.starttype == 'date':
                where.append(f"date >= Timestamp({timerange.startts * 1e9})")
            if timerange.stoptype == 'date':
                where.append(f"date < Timestamp({timerange.stopts * 1e9})")
        pairdata = pd.read_hdf(filename, key=key, mode="r", where=where)
        if list(pairdata.columns) != self._columns:
            raise ValueError("Wrong dataframe format")
        pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
                                          'low': 'float', 'close': 'float', 'volume': 'float'})
        return pairdata
    def ohlcv_purge(self, pair: str, timeframe: str) -> bool:
        """
        Remove data for this pair
        :param pair: Delete data for this pair.
        :param timeframe: Timeframe (e.g. "5m")
        :return: True when deleted, false if file did not exist.
        """
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        if filename.exists():
            filename.unlink()
            return True
        return False
    def ohlcv_append(self, pair: str, timeframe: str, data: pd.DataFrame) -> None:
        """
        Append data to existing data structures
        :param pair: Pair
        :param timeframe: Timeframe this ohlcv data is for
        :param data: Data to append.
        """
        raise NotImplementedError()
    @classmethod
    def trades_get_pairs(cls, datadir: Path) -> List[str]:
        """
        Returns a list of all pairs for which trade data is available in this
        :param datadir: Directory to search for ohlcv files
        :return: List of Pairs
        """
        _tmp = [re.search(r'^(\S+)(?=\-trades.h5)', p.name)
                for p in datadir.glob("*trades.h5")]
        # Check if regex found something and only return these results to avoid exceptions.
        return [match[0].replace('_', '/') for match in _tmp if match]
    def trades_store(self, pair: str, data: TradeList) -> None:
        """
        Store trades data (list of Dicts) to file
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        key = self._pair_trades_key(pair)
        ds = pd.HDFStore(self._pair_trades_filename(self._datadir, pair),
                         mode='a', complevel=9, complib='blosc')
        ds.put(key, pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS),
               format='table', data_columns=['timestamp'])
        ds.close()
    def trades_append(self, pair: str, data: TradeList):
        """
        Append data to existing files
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        raise NotImplementedError()
    def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
        """
        Load a pair from h5 file.
        :param pair: Load trades for this pair
        :param timerange: Timerange to load trades for - currently not implemented
        :return: List of trades
        """
        key = self._pair_trades_key(pair)
        filename = self._pair_trades_filename(self._datadir, pair)
        if not filename.exists():
            return []
        where = []
        if timerange:
            if timerange.starttype == 'date':
                where.append(f"timestamp >= {timerange.startts * 1e3}")
            if timerange.stoptype == 'date':
                where.append(f"timestamp < {timerange.stopts * 1e3}")
        trades = pd.read_hdf(filename, key=key, mode="r", where=where)
        return trades.values.tolist()
    def trades_purge(self, pair: str) -> bool:
        """
        Remove data for this pair
        :param pair: Delete data for this pair.
        :return: True when deleted, false if file did not exist.
        """
        filename = self._pair_trades_filename(self._datadir, pair)
        if filename.exists():
            filename.unlink()
            return True
        return False
    @classmethod
    def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str:
        return f"{pair}/ohlcv/tf_{timeframe}"
    @classmethod
    def _pair_trades_key(cls, pair: str) -> str:
        return f"{pair}/trades"
    @classmethod
    def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path:
        pair_s = misc.pair_to_filename(pair)
        filename = datadir.joinpath(f'{pair_s}-{timeframe}.h5')
        return filename
    @classmethod
    def _pair_trades_filename(cls, datadir: Path, pair: str) -> Path:
        pair_s = misc.pair_to_filename(pair)
        filename = datadir.joinpath(f'{pair_s}-trades.h5')
        return filename
@@ -9,7 +9,8 @@ from pandas import DataFrame
 from freqtrade.configuration import TimeRange
 from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
-from freqtrade.data.converter import (ohlcv_to_dataframe,
+from freqtrade.data.converter import (clean_ohlcv_dataframe,
                                      ohlcv_to_dataframe,
                                      trades_remove_duplicates,
                                      trades_to_ohlcv)
 from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
@@ -202,7 +203,10 @@ def _download_pair_history(datadir: Path,
        if data.empty:
            data = new_dataframe
        else:
-            data = data.append(new_dataframe)
+            # Run cleaning again to ensure there were no duplicate candles
            # Especially between existing and new data.
            data = clean_ohlcv_dataframe(data.append(new_dataframe), timeframe, pair,
                                         fill_missing=False, drop_incomplete=False)
        logger.debug("New  Start: %s",
                     f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
@@ -50,9 +50,7 @@ class IDataHandler(ABC):
    @abstractmethod
    def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None:
        """
-        Store data in json format "values".
+        Store ohlcv data.
            format looks as follows:
            [[<date>,<open>,<high>,<low>,<close>]]
        :param pair: Pair - used to generate filename
        :timeframe: Timeframe - used to generate filename
        :data: Dataframe containing OHLCV data
@@ -239,6 +237,9 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]:
    elif datatype == 'jsongz':
        from .jsondatahandler import JsonGzDataHandler
        return JsonGzDataHandler
    elif datatype == 'hdf5':
        from .hdf5datahandler import HDF5DataHandler
        return HDF5DataHandler
    else:
        raise ValueError(f"No datahandler for datatype {datatype} available.")
@@ -13,6 +13,8 @@ TA-Lib==0.4.18
 tabulate==0.8.7
 pycoingecko==1.3.0
 jinja2==2.11.2
 tables==3.6.1
 blosc==1.9.1
 # find first, C search in arrays
 py_find_1st==1.1.4
@@ -85,6 +85,8 @@ setup(name='freqtrade',
          # from requirements.txt
          'numpy',
          'pandas',
          'tables',
          'blosc',
      ],
      extras_require={
          'api': api,
@@ -12,7 +12,9 @@ from pandas import DataFrame
 from pandas.testing import assert_frame_equal
 from freqtrade.configuration import TimeRange
 from freqtrade.constants import AVAILABLE_DATAHANDLERS
 from freqtrade.data.converter import ohlcv_to_dataframe
 from freqtrade.data.history.hdf5datahandler import HDF5DataHandler
 from freqtrade.data.history.history_utils import (
    _download_pair_history, _download_trades_history,
    _load_cached_data_for_updating, convert_trades_to_ohlcv, get_timerange,
@@ -620,7 +622,7 @@ def test_convert_trades_to_ohlcv(mocker, default_conf, testdatadir, caplog):
    _clean_test_file(file5)
-def test_jsondatahandler_ohlcv_get_pairs(testdatadir):
+def test_datahandler_ohlcv_get_pairs(testdatadir):
    pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '5m')
    # Convert to set to avoid failures due to sorting
    assert set(pairs) == {'UNITTEST/BTC', 'XLM/BTC', 'ETH/BTC', 'TRX/BTC', 'LTC/BTC',
@@ -630,8 +632,11 @@ def test_jsondatahandler_ohlcv_get_pairs(testdatadir):
    pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '8m')
    assert set(pairs) == {'UNITTEST/BTC'}
    pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '5m')
    assert set(pairs) == {'UNITTEST/BTC'}
-def test_jsondatahandler_ohlcv_get_available_data(testdatadir):
+
 def test_datahandler_ohlcv_get_available_data(testdatadir):
    paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir)
    # Convert to set to avoid failures due to sorting
    assert set(paircombs) == {('UNITTEST/BTC', '5m'), ('ETH/BTC', '5m'), ('XLM/BTC', '5m'),
@@ -643,6 +648,8 @@ def test_jsondatahandler_ohlcv_get_available_data(testdatadir):
    paircombs = JsonGzDataHandler.ohlcv_get_available_data(testdatadir)
    assert set(paircombs) == {('UNITTEST/BTC', '8m')}
    paircombs = HDF5DataHandler.ohlcv_get_available_data(testdatadir)
    assert set(paircombs) == {('UNITTEST/BTC', '5m')}
 def test_jsondatahandler_trades_get_pairs(testdatadir):
@@ -653,15 +660,17 @@ def test_jsondatahandler_trades_get_pairs(testdatadir):
 def test_jsondatahandler_ohlcv_purge(mocker, testdatadir):
    mocker.patch.object(Path, "exists", MagicMock(return_value=False))
-    mocker.patch.object(Path, "unlink", MagicMock())
+    unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
    dh = JsonGzDataHandler(testdatadir)
    assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m')
    assert unlinkmock.call_count == 0
    mocker.patch.object(Path, "exists", MagicMock(return_value=True))
    assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m')
    assert unlinkmock.call_count == 1
-def test_jsondatahandler_trades_load(mocker, testdatadir, caplog):
+def test_jsondatahandler_trades_load(testdatadir, caplog):
    dh = JsonGzDataHandler(testdatadir)
    logmsg = "Old trades format detected - converting"
    dh.trades_load('XRP/ETH')
@@ -674,26 +683,144 @@ def test_jsondatahandler_trades_load(mocker, testdatadir, caplog):
 def test_jsondatahandler_trades_purge(mocker, testdatadir):
    mocker.patch.object(Path, "exists", MagicMock(return_value=False))
-    mocker.patch.object(Path, "unlink", MagicMock())
+    unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
    dh = JsonGzDataHandler(testdatadir)
    assert not dh.trades_purge('UNITTEST/NONEXIST')
    assert unlinkmock.call_count == 0
    mocker.patch.object(Path, "exists", MagicMock(return_value=True))
    assert dh.trades_purge('UNITTEST/NONEXIST')
    assert unlinkmock.call_count == 1
-def test_jsondatahandler_ohlcv_append(testdatadir):
+@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
-    dh = JsonGzDataHandler(testdatadir)
+def test_datahandler_ohlcv_append(datahandler, testdatadir, ):
    dh = get_datahandler(testdatadir, datahandler)
    with pytest.raises(NotImplementedError):
        dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame())
-def test_jsondatahandler_trades_append(testdatadir):
+@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS)
-    dh = JsonGzDataHandler(testdatadir)
+def test_datahandler_trades_append(datahandler, testdatadir):
    dh = get_datahandler(testdatadir, datahandler)
    with pytest.raises(NotImplementedError):
        dh.trades_append('UNITTEST/ETH', [])
 def test_hdf5datahandler_trades_get_pairs(testdatadir):
    pairs = HDF5DataHandler.trades_get_pairs(testdatadir)
    # Convert to set to avoid failures due to sorting
    assert set(pairs) == {'XRP/ETH'}
 def test_hdf5datahandler_trades_load(testdatadir):
    dh = HDF5DataHandler(testdatadir)
    trades = dh.trades_load('XRP/ETH')
    assert isinstance(trades, list)
    trades1 = dh.trades_load('UNITTEST/NONEXIST')
    assert trades1 == []
    # data goes from 2019-10-11 - 2019-10-13
    timerange = TimeRange.parse_timerange('20191011-20191012')
    trades2 = dh._trades_load('XRP/ETH', timerange)
    assert len(trades) > len(trades2)
    # unfiltered load has trades before starttime
    assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0
    # filtered list does not have trades before starttime
    assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0
    # unfiltered load has trades after endtime
    assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0
    # filtered list does not have trades after endtime
    assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0
 def test_hdf5datahandler_trades_store(testdatadir):
    dh = HDF5DataHandler(testdatadir)
    trades = dh.trades_load('XRP/ETH')
    dh.trades_store('XRP/NEW', trades)
    file = testdatadir / 'XRP_NEW-trades.h5'
    assert file.is_file()
    # Load trades back
    trades_new = dh.trades_load('XRP/NEW')
    assert len(trades_new) == len(trades)
    assert trades[0][0] == trades_new[0][0]
    assert trades[0][1] == trades_new[0][1]
    # assert trades[0][2] == trades_new[0][2]  # This is nan - so comparison does not make sense
    assert trades[0][3] == trades_new[0][3]
    assert trades[0][4] == trades_new[0][4]
    assert trades[0][5] == trades_new[0][5]
    assert trades[0][6] == trades_new[0][6]
    assert trades[-1][0] == trades_new[-1][0]
    assert trades[-1][1] == trades_new[-1][1]
    # assert trades[-1][2] == trades_new[-1][2]  # This is nan - so comparison does not make sense
    assert trades[-1][3] == trades_new[-1][3]
    assert trades[-1][4] == trades_new[-1][4]
    assert trades[-1][5] == trades_new[-1][5]
    assert trades[-1][6] == trades_new[-1][6]
    _clean_test_file(file)
 def test_hdf5datahandler_trades_purge(mocker, testdatadir):
    mocker.patch.object(Path, "exists", MagicMock(return_value=False))
    unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
    dh = HDF5DataHandler(testdatadir)
    assert not dh.trades_purge('UNITTEST/NONEXIST')
    assert unlinkmock.call_count == 0
    mocker.patch.object(Path, "exists", MagicMock(return_value=True))
    assert dh.trades_purge('UNITTEST/NONEXIST')
    assert unlinkmock.call_count == 1
 def test_hdf5datahandler_ohlcv_load_and_resave(testdatadir):
    dh = HDF5DataHandler(testdatadir)
    ohlcv = dh.ohlcv_load('UNITTEST/BTC', '5m')
    assert isinstance(ohlcv, DataFrame)
    assert len(ohlcv) > 0
    file = testdatadir / 'UNITTEST_NEW-5m.h5'
    assert not file.is_file()
    dh.ohlcv_store('UNITTEST/NEW', '5m', ohlcv)
    assert file.is_file()
    assert not ohlcv[ohlcv['date'] < '2018-01-15'].empty
    # Data gores from 2018-01-10 - 2018-01-30
    timerange = TimeRange.parse_timerange('20180115-20180119')
    # Call private function to ensure timerange is filtered in hdf5
    ohlcv = dh._ohlcv_load('UNITTEST/BTC', '5m', timerange)
    ohlcv1 = dh._ohlcv_load('UNITTEST/NEW', '5m', timerange)
    assert len(ohlcv) == len(ohlcv1)
    assert ohlcv.equals(ohlcv1)
    assert ohlcv[ohlcv['date'] < '2018-01-15'].empty
    assert ohlcv[ohlcv['date'] > '2018-01-19'].empty
    _clean_test_file(file)
    # Try loading inexisting file
    ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', '5m')
    assert ohlcv.empty
 def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
    mocker.patch.object(Path, "exists", MagicMock(return_value=False))
    unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
    dh = HDF5DataHandler(testdatadir)
    assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m')
    assert unlinkmock.call_count == 0
    mocker.patch.object(Path, "exists", MagicMock(return_value=True))
    assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m')
    assert unlinkmock.call_count == 1
 def test_gethandlerclass():
    cl = get_datahandlerclass('json')
    assert cl == JsonDataHandler
@@ -702,6 +829,9 @@ def test_gethandlerclass():
    assert cl == JsonGzDataHandler
    assert issubclass(cl, IDataHandler)
    assert issubclass(cl, JsonDataHandler)
    cl = get_datahandlerclass('hdf5')
    assert cl == HDF5DataHandler
    assert issubclass(cl, IDataHandler)
    with pytest.raises(ValueError, match=r"No datahandler for .*"):
        get_datahandlerclass('DeadBeef')
@@ -713,3 +843,6 @@ def test_get_datahandler(testdatadir):
    assert type(dh) == JsonGzDataHandler
    dh1 = get_datahandler(testdatadir, 'jsongz', dh)
    assert id(dh1) == id(dh)
    dh = get_datahandler(testdatadir, 'hdf5')
    assert type(dh) == HDF5DataHandler