diff --git a/docs/data-download.md b/docs/data-download.md index a2bbec837..0b22ec9ce 100644 --- a/docs/data-download.md +++ b/docs/data-download.md @@ -15,61 +15,91 @@ Otherwise `--exchange` becomes mandatory. ### Usage ``` -usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] - [--pairs-file FILE] [--days INT] [--dl-trades] [--exchange EXCHANGE] +usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] + [-d PATH] [--userdir PATH] + [-p PAIRS [PAIRS ...]] [--pairs-file FILE] + [--days INT] [--dl-trades] + [--exchange EXCHANGE] [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...]] - [--erase] [--data-format-ohlcv {json,jsongz}] [--data-format-trades {json,jsongz}] + [--erase] + [--data-format-ohlcv {json,jsongz,hdf5}] + [--data-format-trades {json,jsongz,hdf5}] optional arguments: -h, --help show this help message and exit -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] - Show profits for only these pairs. Pairs are space-separated. + Show profits for only these pairs. Pairs are space- + separated. --pairs-file FILE File containing a list of pairs to download. --days INT Download data for given number of days. - --dl-trades Download trades instead of OHLCV data. The bot will resample trades to the desired timeframe as specified as - --timeframes/-t. - --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no config is provided. + --dl-trades Download trades instead of OHLCV data. The bot will + resample trades to the desired timeframe as specified + as --timeframes/-t. + --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no + config is provided. -t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...] - Specify which tickers to download. Space-separated list. Default: `1m 5m`. - --erase Clean all existing data for the selected exchange/pairs/timeframes. - --data-format-ohlcv {json,jsongz} - Storage format for downloaded candle (OHLCV) data. (default: `json`). - --data-format-trades {json,jsongz} - Storage format for downloaded trades data. (default: `jsongz`). + Specify which tickers to download. Space-separated + list. Default: `1m 5m`. + --erase Clean all existing data for the selected + exchange/pairs/timeframes. + --data-format-ohlcv {json,jsongz,hdf5} + Storage format for downloaded candle (OHLCV) data. + (default: `json`). + --data-format-trades {json,jsongz,hdf5} + Storage format for downloaded trades data. (default: + `jsongz`). Common arguments: -v, --verbose Verbose mode (-vv for more, -vvv to get all messages). - --logfile FILE Log to the file specified. Special values are: 'syslog', 'journald'. See the documentation for more details. + --logfile FILE Log to the file specified. Special values are: + 'syslog', 'journald'. See the documentation for more + details. -V, --version show program's version number and exit -c PATH, --config PATH - Specify configuration file (default: `config.json`). Multiple --config options may be used. Can be set to `-` - to read config from stdin. + Specify configuration file (default: + `userdir/config.json` or `config.json` whichever + exists). Multiple --config options may be used. Can be + set to `-` to read config from stdin. -d PATH, --datadir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. + ``` ### Data format -Freqtrade currently supports 2 dataformats, `json` (plain "text" json files) and `jsongz` (a gzipped version of json files). +Freqtrade currently supports 3 data-formats for both OHLCV and trades data: + +* `json` (plain "text" json files) +* `jsongz` (a gzip-zipped version of json files) +* `hdf5` (a high performance datastore) + By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data. -This can be changed via the `--data-format-ohlcv` and `--data-format-trades` parameters respectivly. +This can be changed via the `--data-format-ohlcv` and `--data-format-trades` command line arguments respectively. +To persist this change, you can should also add the following snippet to your configuration, so you don't have to insert the above arguments each time: -If the default dataformat has been changed during download, then the keys `dataformat_ohlcv` and `dataformat_trades` in the configuration file need to be adjusted to the selected dataformat as well. +``` jsonc + // ... + "dataformat_ohlcv": "hdf5", + "dataformat_trades": "hdf5", + // ... +``` + +If the default data-format has been changed during download, then the keys `dataformat_ohlcv` and `dataformat_trades` in the configuration file need to be adjusted to the selected dataformat as well. !!! Note - You can convert between data-formats using the [convert-data](#subcommand-convert-data) and [convert-trade-data](#subcommand-convert-trade-data) methods. + You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods. -#### Subcommand convert data +#### Sub-command convert data ``` usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] --format-from - {json,jsongz} --format-to {json,jsongz} - [--erase] + {json,jsongz,hdf5} --format-to + {json,jsongz,hdf5} [--erase] [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w} ...]] optional arguments: @@ -77,9 +107,9 @@ optional arguments: -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] Show profits for only these pairs. Pairs are space- separated. - --format-from {json,jsongz} + --format-from {json,jsongz,hdf5} Source format for data conversion. - --format-to {json,jsongz} + --format-to {json,jsongz,hdf5} Destination format for data conversion. --erase Clean all existing data for the selected exchange/pairs/timeframes. @@ -94,9 +124,10 @@ Common arguments: details. -V, --version show program's version number and exit -c PATH, --config PATH - Specify configuration file (default: `config.json`). - Multiple --config options may be used. Can be set to - `-` to read config from stdin. + Specify configuration file (default: + `userdir/config.json` or `config.json` whichever + exists). Multiple --config options may be used. Can be + set to `-` to read config from stdin. -d PATH, --datadir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH @@ -112,23 +143,23 @@ It'll also remove original json data files (`--erase` parameter). freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtrade/data/binance -t 5m 15m --erase ``` -#### Subcommand convert-trade data +#### Sub-command convert trade data ``` usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] --format-from - {json,jsongz} --format-to {json,jsongz} - [--erase] + {json,jsongz,hdf5} --format-to + {json,jsongz,hdf5} [--erase] optional arguments: -h, --help show this help message and exit -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] Show profits for only these pairs. Pairs are space- separated. - --format-from {json,jsongz} + --format-from {json,jsongz,hdf5} Source format for data conversion. - --format-to {json,jsongz} + --format-to {json,jsongz,hdf5} Destination format for data conversion. --erase Clean all existing data for the selected exchange/pairs/timeframes. @@ -140,13 +171,15 @@ Common arguments: details. -V, --version show program's version number and exit -c PATH, --config PATH - Specify configuration file (default: `config.json`). - Multiple --config options may be used. Can be set to - `-` to read config from stdin. + Specify configuration file (default: + `userdir/config.json` or `config.json` whichever + exists). Multiple --config options may be used. Can be + set to `-` to read config from stdin. -d PATH, --datadir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. + ``` ##### Example converting trades @@ -158,21 +191,21 @@ It'll also remove original jsongz data files (`--erase` parameter). freqtrade convert-trade-data --format-from jsongz --format-to json --datadir ~/.freqtrade/data/kraken --erase ``` -### Subcommand list-data +### Sub-command list-data -You can get a list of downloaded data using the `list-data` subcommand. +You can get a list of downloaded data using the `list-data` sub-command. ``` usage: freqtrade list-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [--exchange EXCHANGE] - [--data-format-ohlcv {json,jsongz}] + [--data-format-ohlcv {json,jsongz,hdf5}] [-p PAIRS [PAIRS ...]] optional arguments: -h, --help show this help message and exit --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no config is provided. - --data-format-ohlcv {json,jsongz} + --data-format-ohlcv {json,jsongz,hdf5} Storage format for downloaded candle (OHLCV) data. (default: `json`). -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] @@ -194,6 +227,7 @@ Common arguments: Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. + ``` #### Example list-data @@ -249,7 +283,7 @@ This will download historical candle (OHLCV) data for all the currency pairs you ### Other Notes - To use a different directory than the exchange specific default, use `--datadir user_data/data/some_directory`. -- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust ratelimits etc.) +- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.) - To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`. - To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days). - Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data. @@ -257,7 +291,7 @@ This will download historical candle (OHLCV) data for all the currency pairs you ### Trades (tick) data -By default, `download-data` subcommand downloads Candles (OHLCV) data. Some exchanges also provide historic trade-data via their API. +By default, `download-data` sub-command downloads Candles (OHLCV) data. Some exchanges also provide historic trade-data via their API. This data can be useful if you need many different timeframes, since it is only downloaded once, and then resampled locally to the desired timeframes. Since this data is large by default, the files use gzip by default. They are stored in your data-directory with the naming convention of `-trades.json.gz` (`ETH_BTC-trades.json.gz`). Incremental mode is also supported, as for historic OHLCV data, so downloading the data once per week with `--days 8` will create an incremental data-repository. diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index cc93fc590..7268c3c8f 100644 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -15,7 +15,7 @@ ARGS_STRATEGY = ["strategy", "strategy_path"] ARGS_TRADE = ["db_url", "sd_notify", "dry_run"] -ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", +ARGS_COMMON_OPTIMIZE = ["timeframe", "timerange", "dataformat_ohlcv", "max_open_trades", "stake_amount", "fee"] ARGS_BACKTEST = ARGS_COMMON_OPTIMIZE + ["position_stacking", "use_max_market_positions", diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 1f8cebd0d..79f8c17c5 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -24,7 +24,7 @@ ORDERTIF_POSSIBILITIES = ['gtc', 'fok', 'ioc'] AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList', 'AgeFilter', 'PrecisionFilter', 'PriceFilter', 'ShuffleFilter', 'SpreadFilter'] -AVAILABLE_DATAHANDLERS = ['json', 'jsongz'] +AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5'] DRY_RUN_WALLET = 1000 DATETIME_PRINT_FORMAT = '%Y-%m-%d %H:%M:%S' MATH_CLOSE_PREC = 1e-14 # Precision used for float comparisons diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index 46b653eb0..100a578a2 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -255,7 +255,8 @@ def convert_ohlcv_format(config: Dict[str, Any], convert_from: str, convert_to: drop_incomplete=False, startup_candles=0) logger.info(f"Converting {len(data)} candles for {pair}") - trg.ohlcv_store(pair=pair, timeframe=timeframe, data=data) - if erase and convert_from != convert_to: - logger.info(f"Deleting source data for {pair} / {timeframe}") - src.ohlcv_purge(pair=pair, timeframe=timeframe) + if len(data) > 0: + trg.ohlcv_store(pair=pair, timeframe=timeframe, data=data) + if erase and convert_from != convert_to: + logger.info(f"Deleting source data for {pair} / {timeframe}") + src.ohlcv_purge(pair=pair, timeframe=timeframe) diff --git a/freqtrade/data/history/hdf5datahandler.py b/freqtrade/data/history/hdf5datahandler.py new file mode 100644 index 000000000..594a1598a --- /dev/null +++ b/freqtrade/data/history/hdf5datahandler.py @@ -0,0 +1,211 @@ +import logging +import re +from pathlib import Path +from typing import List, Optional + +import pandas as pd + +from freqtrade import misc +from freqtrade.configuration import TimeRange +from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, + DEFAULT_TRADES_COLUMNS, + ListPairsWithTimeframes) + +from .idatahandler import IDataHandler, TradeList + +logger = logging.getLogger(__name__) + + +class HDF5DataHandler(IDataHandler): + + _columns = DEFAULT_DATAFRAME_COLUMNS + + @classmethod + def ohlcv_get_available_data(cls, datadir: Path) -> ListPairsWithTimeframes: + """ + Returns a list of all pairs with ohlcv data available in this datadir + :param datadir: Directory to search for ohlcv files + :return: List of Tuples of (pair, timeframe) + """ + _tmp = [re.search(r'^([a-zA-Z_]+)\-(\d+\S+)(?=.h5)', p.name) + for p in datadir.glob("*.h5")] + return [(match[1].replace('_', '/'), match[2]) for match in _tmp + if match and len(match.groups()) > 1] + + @classmethod + def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]: + """ + Returns a list of all pairs with ohlcv data available in this datadir + for the specified timeframe + :param datadir: Directory to search for ohlcv files + :param timeframe: Timeframe to search pairs for + :return: List of Pairs + """ + + _tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.h5)', p.name) + for p in datadir.glob(f"*{timeframe}.h5")] + # Check if regex found something and only return these results + return [match[0].replace('_', '/') for match in _tmp if match] + + def ohlcv_store(self, pair: str, timeframe: str, data: pd.DataFrame) -> None: + """ + Store data in hdf5 file. + :param pair: Pair - used to generate filename + :timeframe: Timeframe - used to generate filename + :data: Dataframe containing OHLCV data + :return: None + """ + key = self._pair_ohlcv_key(pair, timeframe) + _data = data.copy() + + filename = self._pair_data_filename(self._datadir, pair, timeframe) + + ds = pd.HDFStore(filename, mode='a', complevel=9, complib='blosc') + ds.put(key, _data.loc[:, self._columns], format='table', data_columns=['date']) + + ds.close() + + def _ohlcv_load(self, pair: str, timeframe: str, + timerange: Optional[TimeRange] = None) -> pd.DataFrame: + """ + Internal method used to load data for one pair from disk. + Implements the loading and conversion to a Pandas dataframe. + Timerange trimming and dataframe validation happens outside of this method. + :param pair: Pair to load data + :param timeframe: Timeframe (e.g. "5m") + :param timerange: Limit data to be loaded to this timerange. + Optionally implemented by subclasses to avoid loading + all data where possible. + :return: DataFrame with ohlcv data, or empty DataFrame + """ + key = self._pair_ohlcv_key(pair, timeframe) + filename = self._pair_data_filename(self._datadir, pair, timeframe) + + if not filename.exists(): + return pd.DataFrame(columns=self._columns) + where = [] + if timerange: + if timerange.starttype == 'date': + where.append(f"date >= Timestamp({timerange.startts * 1e9})") + if timerange.stoptype == 'date': + where.append(f"date < Timestamp({timerange.stopts * 1e9})") + + pairdata = pd.read_hdf(filename, key=key, mode="r", where=where) + + if list(pairdata.columns) != self._columns: + raise ValueError("Wrong dataframe format") + pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', + 'low': 'float', 'close': 'float', 'volume': 'float'}) + return pairdata + + def ohlcv_purge(self, pair: str, timeframe: str) -> bool: + """ + Remove data for this pair + :param pair: Delete data for this pair. + :param timeframe: Timeframe (e.g. "5m") + :return: True when deleted, false if file did not exist. + """ + filename = self._pair_data_filename(self._datadir, pair, timeframe) + if filename.exists(): + filename.unlink() + return True + return False + + def ohlcv_append(self, pair: str, timeframe: str, data: pd.DataFrame) -> None: + """ + Append data to existing data structures + :param pair: Pair + :param timeframe: Timeframe this ohlcv data is for + :param data: Data to append. + """ + raise NotImplementedError() + + @classmethod + def trades_get_pairs(cls, datadir: Path) -> List[str]: + """ + Returns a list of all pairs for which trade data is available in this + :param datadir: Directory to search for ohlcv files + :return: List of Pairs + """ + _tmp = [re.search(r'^(\S+)(?=\-trades.h5)', p.name) + for p in datadir.glob("*trades.h5")] + # Check if regex found something and only return these results to avoid exceptions. + return [match[0].replace('_', '/') for match in _tmp if match] + + def trades_store(self, pair: str, data: TradeList) -> None: + """ + Store trades data (list of Dicts) to file + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + key = self._pair_trades_key(pair) + + ds = pd.HDFStore(self._pair_trades_filename(self._datadir, pair), + mode='a', complevel=9, complib='blosc') + ds.put(key, pd.DataFrame(data, columns=DEFAULT_TRADES_COLUMNS), + format='table', data_columns=['timestamp']) + ds.close() + + def trades_append(self, pair: str, data: TradeList): + """ + Append data to existing files + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + raise NotImplementedError() + + def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList: + """ + Load a pair from h5 file. + :param pair: Load trades for this pair + :param timerange: Timerange to load trades for - currently not implemented + :return: List of trades + """ + key = self._pair_trades_key(pair) + filename = self._pair_trades_filename(self._datadir, pair) + + if not filename.exists(): + return [] + where = [] + if timerange: + if timerange.starttype == 'date': + where.append(f"timestamp >= {timerange.startts * 1e3}") + if timerange.stoptype == 'date': + where.append(f"timestamp < {timerange.stopts * 1e3}") + + trades = pd.read_hdf(filename, key=key, mode="r", where=where) + return trades.values.tolist() + + def trades_purge(self, pair: str) -> bool: + """ + Remove data for this pair + :param pair: Delete data for this pair. + :return: True when deleted, false if file did not exist. + """ + filename = self._pair_trades_filename(self._datadir, pair) + if filename.exists(): + filename.unlink() + return True + return False + + @classmethod + def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str: + return f"{pair}/ohlcv/tf_{timeframe}" + + @classmethod + def _pair_trades_key(cls, pair: str) -> str: + return f"{pair}/trades" + + @classmethod + def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path: + pair_s = misc.pair_to_filename(pair) + filename = datadir.joinpath(f'{pair_s}-{timeframe}.h5') + return filename + + @classmethod + def _pair_trades_filename(cls, datadir: Path, pair: str) -> Path: + pair_s = misc.pair_to_filename(pair) + filename = datadir.joinpath(f'{pair_s}-trades.h5') + return filename diff --git a/freqtrade/data/history/history_utils.py b/freqtrade/data/history/history_utils.py index 58bd752ea..dd09c4c05 100644 --- a/freqtrade/data/history/history_utils.py +++ b/freqtrade/data/history/history_utils.py @@ -9,7 +9,8 @@ from pandas import DataFrame from freqtrade.configuration import TimeRange from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS -from freqtrade.data.converter import (ohlcv_to_dataframe, +from freqtrade.data.converter import (clean_ohlcv_dataframe, + ohlcv_to_dataframe, trades_remove_duplicates, trades_to_ohlcv) from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler @@ -202,7 +203,10 @@ def _download_pair_history(datadir: Path, if data.empty: data = new_dataframe else: - data = data.append(new_dataframe) + # Run cleaning again to ensure there were no duplicate candles + # Especially between existing and new data. + data = clean_ohlcv_dataframe(data.append(new_dataframe), timeframe, pair, + fill_missing=False, drop_incomplete=False) logger.debug("New Start: %s", f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None') diff --git a/freqtrade/data/history/idatahandler.py b/freqtrade/data/history/idatahandler.py index 96d288e01..01b14f501 100644 --- a/freqtrade/data/history/idatahandler.py +++ b/freqtrade/data/history/idatahandler.py @@ -50,9 +50,7 @@ class IDataHandler(ABC): @abstractmethod def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None: """ - Store data in json format "values". - format looks as follows: - [[,,,,]] + Store ohlcv data. :param pair: Pair - used to generate filename :timeframe: Timeframe - used to generate filename :data: Dataframe containing OHLCV data @@ -239,6 +237,9 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]: elif datatype == 'jsongz': from .jsondatahandler import JsonGzDataHandler return JsonGzDataHandler + elif datatype == 'hdf5': + from .hdf5datahandler import HDF5DataHandler + return HDF5DataHandler else: raise ValueError(f"No datahandler for datatype {datatype} available.") diff --git a/requirements-common.txt b/requirements-common.txt index be966c687..f305d8793 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -13,6 +13,8 @@ TA-Lib==0.4.18 tabulate==0.8.7 pycoingecko==1.3.0 jinja2==2.11.2 +tables==3.6.1 +blosc==1.9.1 # find first, C search in arrays py_find_1st==1.1.4 diff --git a/setup.py b/setup.py index 6d832e3f5..7213d3092 100644 --- a/setup.py +++ b/setup.py @@ -85,6 +85,8 @@ setup(name='freqtrade', # from requirements.txt 'numpy', 'pandas', + 'tables', + 'blosc', ], extras_require={ 'api': api, diff --git a/tests/data/test_history.py b/tests/data/test_history.py index 7a3d5e5af..787f62a75 100644 --- a/tests/data/test_history.py +++ b/tests/data/test_history.py @@ -12,7 +12,9 @@ from pandas import DataFrame from pandas.testing import assert_frame_equal from freqtrade.configuration import TimeRange +from freqtrade.constants import AVAILABLE_DATAHANDLERS from freqtrade.data.converter import ohlcv_to_dataframe +from freqtrade.data.history.hdf5datahandler import HDF5DataHandler from freqtrade.data.history.history_utils import ( _download_pair_history, _download_trades_history, _load_cached_data_for_updating, convert_trades_to_ohlcv, get_timerange, @@ -620,7 +622,7 @@ def test_convert_trades_to_ohlcv(mocker, default_conf, testdatadir, caplog): _clean_test_file(file5) -def test_jsondatahandler_ohlcv_get_pairs(testdatadir): +def test_datahandler_ohlcv_get_pairs(testdatadir): pairs = JsonDataHandler.ohlcv_get_pairs(testdatadir, '5m') # Convert to set to avoid failures due to sorting assert set(pairs) == {'UNITTEST/BTC', 'XLM/BTC', 'ETH/BTC', 'TRX/BTC', 'LTC/BTC', @@ -630,8 +632,11 @@ def test_jsondatahandler_ohlcv_get_pairs(testdatadir): pairs = JsonGzDataHandler.ohlcv_get_pairs(testdatadir, '8m') assert set(pairs) == {'UNITTEST/BTC'} + pairs = HDF5DataHandler.ohlcv_get_pairs(testdatadir, '5m') + assert set(pairs) == {'UNITTEST/BTC'} -def test_jsondatahandler_ohlcv_get_available_data(testdatadir): + +def test_datahandler_ohlcv_get_available_data(testdatadir): paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir) # Convert to set to avoid failures due to sorting assert set(paircombs) == {('UNITTEST/BTC', '5m'), ('ETH/BTC', '5m'), ('XLM/BTC', '5m'), @@ -643,6 +648,8 @@ def test_jsondatahandler_ohlcv_get_available_data(testdatadir): paircombs = JsonGzDataHandler.ohlcv_get_available_data(testdatadir) assert set(paircombs) == {('UNITTEST/BTC', '8m')} + paircombs = HDF5DataHandler.ohlcv_get_available_data(testdatadir) + assert set(paircombs) == {('UNITTEST/BTC', '5m')} def test_jsondatahandler_trades_get_pairs(testdatadir): @@ -653,15 +660,17 @@ def test_jsondatahandler_trades_get_pairs(testdatadir): def test_jsondatahandler_ohlcv_purge(mocker, testdatadir): mocker.patch.object(Path, "exists", MagicMock(return_value=False)) - mocker.patch.object(Path, "unlink", MagicMock()) + unlinkmock = mocker.patch.object(Path, "unlink", MagicMock()) dh = JsonGzDataHandler(testdatadir) assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m') + assert unlinkmock.call_count == 0 mocker.patch.object(Path, "exists", MagicMock(return_value=True)) assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m') + assert unlinkmock.call_count == 1 -def test_jsondatahandler_trades_load(mocker, testdatadir, caplog): +def test_jsondatahandler_trades_load(testdatadir, caplog): dh = JsonGzDataHandler(testdatadir) logmsg = "Old trades format detected - converting" dh.trades_load('XRP/ETH') @@ -674,26 +683,144 @@ def test_jsondatahandler_trades_load(mocker, testdatadir, caplog): def test_jsondatahandler_trades_purge(mocker, testdatadir): mocker.patch.object(Path, "exists", MagicMock(return_value=False)) - mocker.patch.object(Path, "unlink", MagicMock()) + unlinkmock = mocker.patch.object(Path, "unlink", MagicMock()) dh = JsonGzDataHandler(testdatadir) assert not dh.trades_purge('UNITTEST/NONEXIST') + assert unlinkmock.call_count == 0 mocker.patch.object(Path, "exists", MagicMock(return_value=True)) assert dh.trades_purge('UNITTEST/NONEXIST') + assert unlinkmock.call_count == 1 -def test_jsondatahandler_ohlcv_append(testdatadir): - dh = JsonGzDataHandler(testdatadir) +@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS) +def test_datahandler_ohlcv_append(datahandler, testdatadir, ): + dh = get_datahandler(testdatadir, datahandler) with pytest.raises(NotImplementedError): dh.ohlcv_append('UNITTEST/ETH', '5m', DataFrame()) -def test_jsondatahandler_trades_append(testdatadir): - dh = JsonGzDataHandler(testdatadir) +@pytest.mark.parametrize('datahandler', AVAILABLE_DATAHANDLERS) +def test_datahandler_trades_append(datahandler, testdatadir): + dh = get_datahandler(testdatadir, datahandler) with pytest.raises(NotImplementedError): dh.trades_append('UNITTEST/ETH', []) +def test_hdf5datahandler_trades_get_pairs(testdatadir): + pairs = HDF5DataHandler.trades_get_pairs(testdatadir) + # Convert to set to avoid failures due to sorting + assert set(pairs) == {'XRP/ETH'} + + +def test_hdf5datahandler_trades_load(testdatadir): + dh = HDF5DataHandler(testdatadir) + trades = dh.trades_load('XRP/ETH') + assert isinstance(trades, list) + + trades1 = dh.trades_load('UNITTEST/NONEXIST') + assert trades1 == [] + # data goes from 2019-10-11 - 2019-10-13 + timerange = TimeRange.parse_timerange('20191011-20191012') + + trades2 = dh._trades_load('XRP/ETH', timerange) + assert len(trades) > len(trades2) + + # unfiltered load has trades before starttime + assert len([t for t in trades if t[0] < timerange.startts * 1000]) >= 0 + # filtered list does not have trades before starttime + assert len([t for t in trades2 if t[0] < timerange.startts * 1000]) == 0 + # unfiltered load has trades after endtime + assert len([t for t in trades if t[0] > timerange.stopts * 1000]) > 0 + # filtered list does not have trades after endtime + assert len([t for t in trades2 if t[0] > timerange.stopts * 1000]) == 0 + + +def test_hdf5datahandler_trades_store(testdatadir): + dh = HDF5DataHandler(testdatadir) + trades = dh.trades_load('XRP/ETH') + + dh.trades_store('XRP/NEW', trades) + file = testdatadir / 'XRP_NEW-trades.h5' + assert file.is_file() + # Load trades back + trades_new = dh.trades_load('XRP/NEW') + + assert len(trades_new) == len(trades) + assert trades[0][0] == trades_new[0][0] + assert trades[0][1] == trades_new[0][1] + # assert trades[0][2] == trades_new[0][2] # This is nan - so comparison does not make sense + assert trades[0][3] == trades_new[0][3] + assert trades[0][4] == trades_new[0][4] + assert trades[0][5] == trades_new[0][5] + assert trades[0][6] == trades_new[0][6] + assert trades[-1][0] == trades_new[-1][0] + assert trades[-1][1] == trades_new[-1][1] + # assert trades[-1][2] == trades_new[-1][2] # This is nan - so comparison does not make sense + assert trades[-1][3] == trades_new[-1][3] + assert trades[-1][4] == trades_new[-1][4] + assert trades[-1][5] == trades_new[-1][5] + assert trades[-1][6] == trades_new[-1][6] + + _clean_test_file(file) + + +def test_hdf5datahandler_trades_purge(mocker, testdatadir): + mocker.patch.object(Path, "exists", MagicMock(return_value=False)) + unlinkmock = mocker.patch.object(Path, "unlink", MagicMock()) + dh = HDF5DataHandler(testdatadir) + assert not dh.trades_purge('UNITTEST/NONEXIST') + assert unlinkmock.call_count == 0 + + mocker.patch.object(Path, "exists", MagicMock(return_value=True)) + assert dh.trades_purge('UNITTEST/NONEXIST') + assert unlinkmock.call_count == 1 + + +def test_hdf5datahandler_ohlcv_load_and_resave(testdatadir): + dh = HDF5DataHandler(testdatadir) + ohlcv = dh.ohlcv_load('UNITTEST/BTC', '5m') + assert isinstance(ohlcv, DataFrame) + assert len(ohlcv) > 0 + + file = testdatadir / 'UNITTEST_NEW-5m.h5' + assert not file.is_file() + + dh.ohlcv_store('UNITTEST/NEW', '5m', ohlcv) + assert file.is_file() + + assert not ohlcv[ohlcv['date'] < '2018-01-15'].empty + + # Data gores from 2018-01-10 - 2018-01-30 + timerange = TimeRange.parse_timerange('20180115-20180119') + + # Call private function to ensure timerange is filtered in hdf5 + ohlcv = dh._ohlcv_load('UNITTEST/BTC', '5m', timerange) + ohlcv1 = dh._ohlcv_load('UNITTEST/NEW', '5m', timerange) + assert len(ohlcv) == len(ohlcv1) + assert ohlcv.equals(ohlcv1) + assert ohlcv[ohlcv['date'] < '2018-01-15'].empty + assert ohlcv[ohlcv['date'] > '2018-01-19'].empty + + _clean_test_file(file) + + # Try loading inexisting file + ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', '5m') + assert ohlcv.empty + + +def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir): + mocker.patch.object(Path, "exists", MagicMock(return_value=False)) + unlinkmock = mocker.patch.object(Path, "unlink", MagicMock()) + dh = HDF5DataHandler(testdatadir) + assert not dh.ohlcv_purge('UNITTEST/NONEXIST', '5m') + assert unlinkmock.call_count == 0 + + mocker.patch.object(Path, "exists", MagicMock(return_value=True)) + assert dh.ohlcv_purge('UNITTEST/NONEXIST', '5m') + assert unlinkmock.call_count == 1 + + def test_gethandlerclass(): cl = get_datahandlerclass('json') assert cl == JsonDataHandler @@ -702,6 +829,9 @@ def test_gethandlerclass(): assert cl == JsonGzDataHandler assert issubclass(cl, IDataHandler) assert issubclass(cl, JsonDataHandler) + cl = get_datahandlerclass('hdf5') + assert cl == HDF5DataHandler + assert issubclass(cl, IDataHandler) with pytest.raises(ValueError, match=r"No datahandler for .*"): get_datahandlerclass('DeadBeef') @@ -713,3 +843,6 @@ def test_get_datahandler(testdatadir): assert type(dh) == JsonGzDataHandler dh1 = get_datahandler(testdatadir, 'jsongz', dh) assert id(dh1) == id(dh) + + dh = get_datahandler(testdatadir, 'hdf5') + assert type(dh) == HDF5DataHandler diff --git a/tests/testdata/UNITTEST_BTC-5m.h5 b/tests/testdata/UNITTEST_BTC-5m.h5 new file mode 100644 index 000000000..52232af9e Binary files /dev/null and b/tests/testdata/UNITTEST_BTC-5m.h5 differ diff --git a/tests/testdata/XRP_ETH-trades.h5 b/tests/testdata/XRP_ETH-trades.h5 new file mode 100644 index 000000000..c13789e2a Binary files /dev/null and b/tests/testdata/XRP_ETH-trades.h5 differ