diff --git a/docs/data-download.md b/docs/data-download.md index 2b76d4f74..700ca04f4 100644 --- a/docs/data-download.md +++ b/docs/data-download.md @@ -26,7 +26,7 @@ usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [--timerange TIMERANGE] [--dl-trades] [--exchange EXCHANGE] [-t TIMEFRAMES [TIMEFRAMES ...]] [--erase] - [--data-format-ohlcv {json,jsongz,hdf5}] + [--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}] [--data-format-trades {json,jsongz,hdf5}] [--trading-mode {spot,margin,futures}] [--prepend] @@ -55,7 +55,7 @@ optional arguments: list. Default: `1m 5m`. --erase Clean all existing data for the selected exchange/pairs/timeframes. - --data-format-ohlcv {json,jsongz,hdf5} + --data-format-ohlcv {json,jsongz,hdf5,feather,parquet} Storage format for downloaded candle (OHLCV) data. (default: `json`). --data-format-trades {json,jsongz,hdf5} @@ -76,7 +76,7 @@ Common arguments: `userdir/config.json` or `config.json` whichever exists). Multiple --config options may be used. Can be set to `-` to read config from stdin. - -d PATH, --datadir PATH + -d PATH, --datadir PATH, --data-dir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. @@ -179,9 +179,11 @@ freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT -- Freqtrade currently supports 3 data-formats for both OHLCV and trades data: -* `json` (plain "text" json files) -* `jsongz` (a gzip-zipped version of json files) -* `hdf5` (a high performance datastore) +* `json` - plain "text" json files +* `jsongz` - a gzip-zipped version of json files +* `hdf5` - a high performance datastore +* `feather` - a dataformat based on Apache Arrow +* `parquet` - columnar datastore By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data. @@ -200,38 +202,74 @@ If the default data-format has been changed during download, then the keys `data !!! Note You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods. +#### Dataformat comparison + +The following comparisons have been made with the following data, and by using the linux `time` command. + +``` +Found 6 pair / timeframe combinations. ++----------+-------------+--------+---------------------+---------------------+ +| Pair | Timeframe | Type | From | To | +|----------+-------------+--------+---------------------+---------------------| +| BTC/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:25:00 | +| ETH/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:26:00 | +| BTC/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:30:00 | +| XRP/USDT | 5m | spot | 2018-05-04 08:10:00 | 2022-09-13 19:15:00 | +| XRP/USDT | 1m | spot | 2018-05-04 08:11:00 | 2022-09-13 19:22:00 | +| ETH/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:20:00 | ++----------+-------------+--------+---------------------+---------------------+ +``` + +Timings have been taken in a not very scientific way with the following command, which forces reading the data into memory. + +``` bash +time freqtrade list-data --show-timerange --data-format-ohlcv +``` + +| Format | Size | timing | +|------------|-------------|-------------| +| `json` | 149Mb | 25.6s | +| `jsongz` | 39Mb | 27s | +| `hdf5` | 145Mb | 3.9s | +| `feather` | 72Mb | 3.5s | +| `parquet` | 83Mb | 3.8s | + +Size has been taken from the BTC/USDT 1m spot combination for the timerange specified above. + +To have a best performance/size mix, we recommend the use of either feather or parquet. + #### Sub-command convert data ``` usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] --format-from - {json,jsongz,hdf5} --format-to - {json,jsongz,hdf5} [--erase] - [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]] + {json,jsongz,hdf5,feather,parquet} --format-to + {json,jsongz,hdf5,feather,parquet} [--erase] [--exchange EXCHANGE] + [-t TIMEFRAMES [TIMEFRAMES ...]] [--trading-mode {spot,margin,futures}] - [--candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...]] + [--candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...]] optional arguments: -h, --help show this help message and exit -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] Limit command to these pairs. Pairs are space- separated. - --format-from {json,jsongz,hdf5} + --format-from {json,jsongz,hdf5,feather,parquet} Source format for data conversion. - --format-to {json,jsongz,hdf5} + --format-to {json,jsongz,hdf5,feather,parquet} Destination format for data conversion. --erase Clean all existing data for the selected exchange/pairs/timeframes. - -t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...] - Specify which tickers to download. Space-separated - list. Default: `1m 5m`. --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no config is provided. - --trading-mode {spot,margin,futures} + -t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...] + Specify which tickers to download. Space-separated + list. Default: `1m 5m`. + --trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures} Select Trading mode - --candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...] + --candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...] Select candle type to use Common arguments: @@ -245,7 +283,7 @@ Common arguments: `userdir/config.json` or `config.json` whichever exists). Multiple --config options may be used. Can be set to `-` to read config from stdin. - -d PATH, --datadir PATH + -d PATH, --datadir PATH, --data-dir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. @@ -267,20 +305,24 @@ freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtr usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] --format-from - {json,jsongz,hdf5} --format-to - {json,jsongz,hdf5} [--erase] + {json,jsongz,hdf5,feather,parquet} + --format-to + {json,jsongz,hdf5,feather,parquet} + [--erase] [--exchange EXCHANGE] optional arguments: -h, --help show this help message and exit -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] - Show profits for only these pairs. Pairs are space- + Limit command to these pairs. Pairs are space- separated. - --format-from {json,jsongz,hdf5} + --format-from {json,jsongz,hdf5,feather,parquet} Source format for data conversion. - --format-to {json,jsongz,hdf5} + --format-to {json,jsongz,hdf5,feather,parquet} Destination format for data conversion. --erase Clean all existing data for the selected exchange/pairs/timeframes. + --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no + config is provided. Common arguments: -v, --verbose Verbose mode (-vv for more, -vvv to get all messages). @@ -293,7 +335,7 @@ Common arguments: `userdir/config.json` or `config.json` whichever exists). Multiple --config options may be used. Can be set to `-` to read config from stdin. - -d PATH, --datadir PATH + -d PATH, --datadir PATH, --data-dir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. @@ -318,9 +360,9 @@ This command will allow you to repeat this last step for additional timeframes w usage: freqtrade trades-to-ohlcv [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [-p PAIRS [PAIRS ...]] - [-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]] + [-t TIMEFRAMES [TIMEFRAMES ...]] [--exchange EXCHANGE] - [--data-format-ohlcv {json,jsongz,hdf5}] + [--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}] [--data-format-trades {json,jsongz,hdf5}] optional arguments: @@ -328,12 +370,12 @@ optional arguments: -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] Limit command to these pairs. Pairs are space- separated. - -t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...] + -t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...] Specify which tickers to download. Space-separated list. Default: `1m 5m`. --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no config is provided. - --data-format-ohlcv {json,jsongz,hdf5} + --data-format-ohlcv {json,jsongz,hdf5,feather,parquet} Storage format for downloaded candle (OHLCV) data. (default: `json`). --data-format-trades {json,jsongz,hdf5} @@ -351,7 +393,7 @@ Common arguments: `userdir/config.json` or `config.json` whichever exists). Multiple --config options may be used. Can be set to `-` to read config from stdin. - -d PATH, --datadir PATH + -d PATH, --datadir PATH, --data-dir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. @@ -371,7 +413,7 @@ You can get a list of downloaded data using the `list-data` sub-command. ``` usage: freqtrade list-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH] [--userdir PATH] [--exchange EXCHANGE] - [--data-format-ohlcv {json,jsongz,hdf5}] + [--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}] [-p PAIRS [PAIRS ...]] [--trading-mode {spot,margin,futures}] [--show-timerange] @@ -380,13 +422,13 @@ optional arguments: -h, --help show this help message and exit --exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no config is provided. - --data-format-ohlcv {json,jsongz,hdf5} + --data-format-ohlcv {json,jsongz,hdf5,feather,parquet} Storage format for downloaded candle (OHLCV) data. (default: `json`). -p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...] Limit command to these pairs. Pairs are space- separated. - --trading-mode {spot,margin,futures} + --trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures} Select Trading mode --show-timerange Show timerange available for available data. (May take a while to calculate). @@ -402,7 +444,7 @@ Common arguments: `userdir/config.json` or `config.json` whichever exists). Multiple --config options may be used. Can be set to `-` to read config from stdin. - -d PATH, --datadir PATH + -d PATH, --datadir PATH, --data-dir PATH Path to directory with historical backtesting data. --userdir PATH, --user-data-dir PATH Path to userdata directory. diff --git a/environment.yml b/environment.yml index d6d85de9d..5298b2baa 100644 --- a/environment.yml +++ b/environment.yml @@ -34,6 +34,7 @@ dependencies: - schedule - python-dateutil - joblib + - pyarrow # ============================ diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index f383f0768..e50fb86d8 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -440,7 +440,7 @@ AVAILABLE_CLI_OPTIONS = { "dataformat_trades": Arg( '--data-format-trades', help='Storage format for downloaded trades data. (default: `jsongz`).', - choices=constants.AVAILABLE_DATAHANDLERS, + choices=constants.AVAILABLE_DATAHANDLERS_TRADES, ), "show_timerange": Arg( '--show-timerange', diff --git a/freqtrade/constants.py b/freqtrade/constants.py index fe17b40bc..4c2bd6e18 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -36,7 +36,8 @@ AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList', 'PrecisionFilter', 'PriceFilter', 'RangeStabilityFilter', 'ShuffleFilter', 'SpreadFilter', 'VolatilityFilter'] AVAILABLE_PROTECTIONS = ['CooldownPeriod', 'LowProfitPairs', 'MaxDrawdown', 'StoplossGuard'] -AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5'] +AVAILABLE_DATAHANDLERS_TRADES = ['json', 'jsongz', 'hdf5'] +AVAILABLE_DATAHANDLERS = AVAILABLE_DATAHANDLERS_TRADES + ['feather', 'parquet'] BACKTEST_BREAKDOWNS = ['day', 'week', 'month'] BACKTEST_CACHE_AGE = ['none', 'day', 'week', 'month'] BACKTEST_CACHE_DEFAULT = 'day' @@ -434,7 +435,7 @@ CONF_SCHEMA = { }, 'dataformat_trades': { 'type': 'string', - 'enum': AVAILABLE_DATAHANDLERS, + 'enum': AVAILABLE_DATAHANDLERS_TRADES, 'default': 'jsongz' }, 'position_adjustment_enable': {'type': 'boolean'}, diff --git a/freqtrade/data/history/featherdatahandler.py b/freqtrade/data/history/featherdatahandler.py new file mode 100644 index 000000000..22a6805e7 --- /dev/null +++ b/freqtrade/data/history/featherdatahandler.py @@ -0,0 +1,130 @@ +import logging +from typing import Optional + +from pandas import DataFrame, read_feather, to_datetime + +from freqtrade.configuration import TimeRange +from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList +from freqtrade.enums import CandleType + +from .idatahandler import IDataHandler + + +logger = logging.getLogger(__name__) + + +class FeatherDataHandler(IDataHandler): + + _columns = DEFAULT_DATAFRAME_COLUMNS + + def ohlcv_store( + self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: + """ + Store data in json format "values". + format looks as follows: + [[,,,,]] + :param pair: Pair - used to generate filename + :param timeframe: Timeframe - used to generate filename + :param data: Dataframe containing OHLCV data + :param candle_type: Any of the enum CandleType (must match trading mode!) + :return: None + """ + filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type) + self.create_dir_if_needed(filename) + + data.reset_index(drop=True).loc[:, self._columns].to_feather( + filename, compression_level=9, compression='lz4') + + def _ohlcv_load(self, pair: str, timeframe: str, + timerange: Optional[TimeRange], candle_type: CandleType + ) -> DataFrame: + """ + Internal method used to load data for one pair from disk. + Implements the loading and conversion to a Pandas dataframe. + Timerange trimming and dataframe validation happens outside of this method. + :param pair: Pair to load data + :param timeframe: Timeframe (e.g. "5m") + :param timerange: Limit data to be loaded to this timerange. + Optionally implemented by subclasses to avoid loading + all data where possible. + :param candle_type: Any of the enum CandleType (must match trading mode!) + :return: DataFrame with ohlcv data, or empty DataFrame + """ + filename = self._pair_data_filename( + self._datadir, pair, timeframe, candle_type=candle_type) + if not filename.exists(): + # Fallback mode for 1M files + filename = self._pair_data_filename( + self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) + if not filename.exists(): + return DataFrame(columns=self._columns) + + pairdata = read_feather(filename) + pairdata.columns = self._columns + pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', + 'low': 'float', 'close': 'float', 'volume': 'float'}) + pairdata['date'] = to_datetime(pairdata['date'], + unit='ms', + utc=True, + infer_datetime_format=True) + return pairdata + + def ohlcv_append( + self, + pair: str, + timeframe: str, + data: DataFrame, + candle_type: CandleType + ) -> None: + """ + Append data to existing data structures + :param pair: Pair + :param timeframe: Timeframe this ohlcv data is for + :param data: Data to append. + :param candle_type: Any of the enum CandleType (must match trading mode!) + """ + raise NotImplementedError() + + def trades_store(self, pair: str, data: TradeList) -> None: + """ + Store trades data (list of Dicts) to file + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + # filename = self._pair_trades_filename(self._datadir, pair) + + raise NotImplementedError() + # array = pa.array(data) + # array + # feather.write_feather(data, filename) + + def trades_append(self, pair: str, data: TradeList): + """ + Append data to existing files + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + raise NotImplementedError() + + def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList: + """ + Load a pair from file, either .json.gz or .json + # TODO: respect timerange ... + :param pair: Load trades for this pair + :param timerange: Timerange to load trades for - currently not implemented + :return: List of trades + """ + raise NotImplementedError() + # filename = self._pair_trades_filename(self._datadir, pair) + # tradesdata = misc.file_load_json(filename) + + # if not tradesdata: + # return [] + + # return tradesdata + + @classmethod + def _get_file_extension(cls): + return "feather" diff --git a/freqtrade/data/history/hdf5datahandler.py b/freqtrade/data/history/hdf5datahandler.py index 01b7af7e7..fd46115de 100644 --- a/freqtrade/data/history/hdf5datahandler.py +++ b/freqtrade/data/history/hdf5datahandler.py @@ -81,6 +81,7 @@ class HDF5DataHandler(IDataHandler): raise ValueError("Wrong dataframe format") pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float', 'volume': 'float'}) + pairdata = pairdata.reset_index(drop=True) return pairdata def ohlcv_append( diff --git a/freqtrade/data/history/idatahandler.py b/freqtrade/data/history/idatahandler.py index 8c1823c00..eb5ad3621 100644 --- a/freqtrade/data/history/idatahandler.py +++ b/freqtrade/data/history/idatahandler.py @@ -375,6 +375,12 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]: elif datatype == 'hdf5': from .hdf5datahandler import HDF5DataHandler return HDF5DataHandler + elif datatype == 'feather': + from .featherdatahandler import FeatherDataHandler + return FeatherDataHandler + elif datatype == 'parquet': + from .parquetdatahandler import ParquetDataHandler + return ParquetDataHandler else: raise ValueError(f"No datahandler for datatype {datatype} available.") diff --git a/freqtrade/data/history/parquetdatahandler.py b/freqtrade/data/history/parquetdatahandler.py new file mode 100644 index 000000000..57581861d --- /dev/null +++ b/freqtrade/data/history/parquetdatahandler.py @@ -0,0 +1,129 @@ +import logging +from typing import Optional + +from pandas import DataFrame, read_parquet, to_datetime + +from freqtrade.configuration import TimeRange +from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList +from freqtrade.enums import CandleType + +from .idatahandler import IDataHandler + + +logger = logging.getLogger(__name__) + + +class ParquetDataHandler(IDataHandler): + + _columns = DEFAULT_DATAFRAME_COLUMNS + + def ohlcv_store( + self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: + """ + Store data in json format "values". + format looks as follows: + [[,,,,]] + :param pair: Pair - used to generate filename + :param timeframe: Timeframe - used to generate filename + :param data: Dataframe containing OHLCV data + :param candle_type: Any of the enum CandleType (must match trading mode!) + :return: None + """ + filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type) + self.create_dir_if_needed(filename) + + data.reset_index(drop=True).loc[:, self._columns].to_parquet(filename) + + def _ohlcv_load(self, pair: str, timeframe: str, + timerange: Optional[TimeRange], candle_type: CandleType + ) -> DataFrame: + """ + Internal method used to load data for one pair from disk. + Implements the loading and conversion to a Pandas dataframe. + Timerange trimming and dataframe validation happens outside of this method. + :param pair: Pair to load data + :param timeframe: Timeframe (e.g. "5m") + :param timerange: Limit data to be loaded to this timerange. + Optionally implemented by subclasses to avoid loading + all data where possible. + :param candle_type: Any of the enum CandleType (must match trading mode!) + :return: DataFrame with ohlcv data, or empty DataFrame + """ + filename = self._pair_data_filename( + self._datadir, pair, timeframe, candle_type=candle_type) + if not filename.exists(): + # Fallback mode for 1M files + filename = self._pair_data_filename( + self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) + if not filename.exists(): + return DataFrame(columns=self._columns) + + pairdata = read_parquet(filename) + pairdata.columns = self._columns + pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', + 'low': 'float', 'close': 'float', 'volume': 'float'}) + pairdata['date'] = to_datetime(pairdata['date'], + unit='ms', + utc=True, + infer_datetime_format=True) + return pairdata + + def ohlcv_append( + self, + pair: str, + timeframe: str, + data: DataFrame, + candle_type: CandleType + ) -> None: + """ + Append data to existing data structures + :param pair: Pair + :param timeframe: Timeframe this ohlcv data is for + :param data: Data to append. + :param candle_type: Any of the enum CandleType (must match trading mode!) + """ + raise NotImplementedError() + + def trades_store(self, pair: str, data: TradeList) -> None: + """ + Store trades data (list of Dicts) to file + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + # filename = self._pair_trades_filename(self._datadir, pair) + + raise NotImplementedError() + # array = pa.array(data) + # array + # feather.write_feather(data, filename) + + def trades_append(self, pair: str, data: TradeList): + """ + Append data to existing files + :param pair: Pair - used for filename + :param data: List of Lists containing trade data, + column sequence as in DEFAULT_TRADES_COLUMNS + """ + raise NotImplementedError() + + def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList: + """ + Load a pair from file, either .json.gz or .json + # TODO: respect timerange ... + :param pair: Load trades for this pair + :param timerange: Timerange to load trades for - currently not implemented + :return: List of trades + """ + raise NotImplementedError() + # filename = self._pair_trades_filename(self._datadir, pair) + # tradesdata = misc.file_load_json(filename) + + # if not tradesdata: + # return [] + + # return tradesdata + + @classmethod + def _get_file_extension(cls): + return "parquet" diff --git a/requirements.txt b/requirements.txt index 690e33a09..c12d3fb08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,7 @@ jinja2==3.1.2 tables==3.7.0 blosc==1.10.6 joblib==1.2.0 +pyarrow==9.0.0 # find first, C search in arrays py_find_1st==1.1.5 @@ -54,3 +55,4 @@ schedule==1.1.0 #WS Messages websockets==10.3 janus==1.0.0 + diff --git a/setup.py b/setup.py index 2e6e354b0..1547b7974 100644 --- a/setup.py +++ b/setup.py @@ -8,13 +8,11 @@ hyperopt = [ 'scikit-learn', 'scikit-optimize>=0.7.0', 'filelock', - 'joblib', 'progressbar2', ] freqai = [ 'scikit-learn', - 'joblib', 'catboost; platform_machine != "aarch64"', 'lightgbm', ] @@ -74,6 +72,8 @@ setup( 'pandas', 'tables', 'blosc', + 'joblib', + 'pyarrow', 'fastapi', 'uvicorn', 'psutil', diff --git a/tests/data/test_datahandler.py b/tests/data/test_datahandler.py index 6d9f72a81..8e1b0050a 100644 --- a/tests/data/test_datahandler.py +++ b/tests/data/test_datahandler.py @@ -9,9 +9,11 @@ from pandas import DataFrame from freqtrade.configuration import TimeRange from freqtrade.constants import AVAILABLE_DATAHANDLERS +from freqtrade.data.history.featherdatahandler import FeatherDataHandler from freqtrade.data.history.hdf5datahandler import HDF5DataHandler from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler, get_datahandlerclass from freqtrade.data.history.jsondatahandler import JsonDataHandler, JsonGzDataHandler +from freqtrade.data.history.parquetdatahandler import ParquetDataHandler from freqtrade.enums import CandleType, TradingMode from tests.conftest import log_has @@ -152,6 +154,15 @@ def test_jsondatahandler_ohlcv_load(testdatadir, caplog): assert df.columns.equals(df1.columns) +@pytest.mark.parametrize('datahandler', ['feather', 'parquet']) +def test_datahandler_trades_not_supported(datahandler, testdatadir, ): + dh = get_datahandler(testdatadir, datahandler) + with pytest.raises(NotImplementedError): + dh.trades_load('UNITTEST/ETH') + with pytest.raises(NotImplementedError): + dh.trades_store('UNITTEST/ETH', MagicMock()) + + def test_jsondatahandler_trades_load(testdatadir, caplog): dh = JsonGzDataHandler(testdatadir) logmsg = "Old trades format detected - converting" @@ -312,6 +323,67 @@ def test_hdf5datahandler_ohlcv_load_and_resave( assert ohlcv.empty +@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [ + # Data goes from 2018-01-10 - 2018-01-30 + ('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'), + # Mark data goes from to 2021-11-15 2021-11-19 + ('UNITTEST/USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'), +]) +@pytest.mark.parametrize('datahandler', ['hdf5', 'feather', 'parquet']) +def test_generic_datahandler_ohlcv_load_and_resave( + datahandler, + testdatadir, + tmpdir, + pair, + timeframe, + candle_type, + candle_append, + startdt, enddt +): + tmpdir1 = Path(tmpdir) + tmpdir2 = tmpdir1 + if candle_type not in ('', 'spot'): + tmpdir2 = tmpdir1 / 'futures' + tmpdir2.mkdir() + # Load data from one common file + dhbase = get_datahandler(testdatadir, 'json') + ohlcv = dhbase._ohlcv_load(pair, timeframe, None, candle_type=candle_type) + assert isinstance(ohlcv, DataFrame) + assert len(ohlcv) > 0 + + # Get data to test + dh = get_datahandler(testdatadir, datahandler) + + file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.{dh._get_file_extension()}" + assert not file.is_file() + + dh1 = get_datahandler(tmpdir1, datahandler) + dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type) + assert file.is_file() + + assert not ohlcv[ohlcv['date'] < startdt].empty + + timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}") + + ohlcv = dhbase.ohlcv_load(pair, timeframe, timerange=timerange, candle_type=candle_type) + if datahandler == 'hdf5': + ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type) + if candle_type == 'mark': + ohlcv1['volume'] = 0.0 + else: + ohlcv1 = dh1.ohlcv_load('UNITTEST/NEW', timeframe, + timerange=timerange, candle_type=candle_type) + + assert len(ohlcv) == len(ohlcv1) + assert ohlcv.equals(ohlcv1) + assert ohlcv[ohlcv['date'] < startdt].empty + assert ohlcv[ohlcv['date'] > enddt].empty + + # Try loading inexisting file + ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type) + assert ohlcv.empty + + def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir): mocker.patch.object(Path, "exists", MagicMock(return_value=False)) unlinkmock = mocker.patch.object(Path, "unlink", MagicMock()) @@ -330,13 +402,24 @@ def test_gethandlerclass(): cl = get_datahandlerclass('json') assert cl == JsonDataHandler assert issubclass(cl, IDataHandler) + cl = get_datahandlerclass('jsongz') assert cl == JsonGzDataHandler assert issubclass(cl, IDataHandler) assert issubclass(cl, JsonDataHandler) + cl = get_datahandlerclass('hdf5') assert cl == HDF5DataHandler assert issubclass(cl, IDataHandler) + + cl = get_datahandlerclass('feather') + assert cl == FeatherDataHandler + assert issubclass(cl, IDataHandler) + + cl = get_datahandlerclass('parquet') + assert cl == ParquetDataHandler + assert issubclass(cl, IDataHandler) + with pytest.raises(ValueError, match=r"No datahandler for .*"): get_datahandlerclass('DeadBeef')