Merge pull request #7458 from xmatthias/dataformat/feather
new Dataformats feather and Parquet
This commit is contained in:
commit
f4fac53a13
@ -26,7 +26,7 @@ usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
|
||||
[--timerange TIMERANGE] [--dl-trades]
|
||||
[--exchange EXCHANGE]
|
||||
[-t TIMEFRAMES [TIMEFRAMES ...]] [--erase]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5}]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
|
||||
[--data-format-trades {json,jsongz,hdf5}]
|
||||
[--trading-mode {spot,margin,futures}]
|
||||
[--prepend]
|
||||
@ -55,7 +55,7 @@ optional arguments:
|
||||
list. Default: `1m 5m`.
|
||||
--erase Clean all existing data for the selected
|
||||
exchange/pairs/timeframes.
|
||||
--data-format-ohlcv {json,jsongz,hdf5}
|
||||
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
|
||||
Storage format for downloaded candle (OHLCV) data.
|
||||
(default: `json`).
|
||||
--data-format-trades {json,jsongz,hdf5}
|
||||
@ -76,7 +76,7 @@ Common arguments:
|
||||
`userdir/config.json` or `config.json` whichever
|
||||
exists). Multiple --config options may be used. Can be
|
||||
set to `-` to read config from stdin.
|
||||
-d PATH, --datadir PATH
|
||||
-d PATH, --datadir PATH, --data-dir PATH
|
||||
Path to directory with historical backtesting data.
|
||||
--userdir PATH, --user-data-dir PATH
|
||||
Path to userdata directory.
|
||||
@ -179,9 +179,11 @@ freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT --
|
||||
|
||||
Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
|
||||
|
||||
* `json` (plain "text" json files)
|
||||
* `jsongz` (a gzip-zipped version of json files)
|
||||
* `hdf5` (a high performance datastore)
|
||||
* `json` - plain "text" json files
|
||||
* `jsongz` - a gzip-zipped version of json files
|
||||
* `hdf5` - a high performance datastore
|
||||
* `feather` - a dataformat based on Apache Arrow
|
||||
* `parquet` - columnar datastore
|
||||
|
||||
By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
|
||||
|
||||
@ -200,38 +202,74 @@ If the default data-format has been changed during download, then the keys `data
|
||||
!!! Note
|
||||
You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
|
||||
|
||||
#### Dataformat comparison
|
||||
|
||||
The following comparisons have been made with the following data, and by using the linux `time` command.
|
||||
|
||||
```
|
||||
Found 6 pair / timeframe combinations.
|
||||
+----------+-------------+--------+---------------------+---------------------+
|
||||
| Pair | Timeframe | Type | From | To |
|
||||
|----------+-------------+--------+---------------------+---------------------|
|
||||
| BTC/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:25:00 |
|
||||
| ETH/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:26:00 |
|
||||
| BTC/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:30:00 |
|
||||
| XRP/USDT | 5m | spot | 2018-05-04 08:10:00 | 2022-09-13 19:15:00 |
|
||||
| XRP/USDT | 1m | spot | 2018-05-04 08:11:00 | 2022-09-13 19:22:00 |
|
||||
| ETH/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:20:00 |
|
||||
+----------+-------------+--------+---------------------+---------------------+
|
||||
```
|
||||
|
||||
Timings have been taken in a not very scientific way with the following command, which forces reading the data into memory.
|
||||
|
||||
``` bash
|
||||
time freqtrade list-data --show-timerange --data-format-ohlcv <dataformat>
|
||||
```
|
||||
|
||||
| Format | Size | timing |
|
||||
|------------|-------------|-------------|
|
||||
| `json` | 149Mb | 25.6s |
|
||||
| `jsongz` | 39Mb | 27s |
|
||||
| `hdf5` | 145Mb | 3.9s |
|
||||
| `feather` | 72Mb | 3.5s |
|
||||
| `parquet` | 83Mb | 3.8s |
|
||||
|
||||
Size has been taken from the BTC/USDT 1m spot combination for the timerange specified above.
|
||||
|
||||
To have a best performance/size mix, we recommend the use of either feather or parquet.
|
||||
|
||||
#### Sub-command convert data
|
||||
|
||||
```
|
||||
usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
|
||||
[-d PATH] [--userdir PATH]
|
||||
[-p PAIRS [PAIRS ...]] --format-from
|
||||
{json,jsongz,hdf5} --format-to
|
||||
{json,jsongz,hdf5} [--erase]
|
||||
[-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]]
|
||||
{json,jsongz,hdf5,feather,parquet} --format-to
|
||||
{json,jsongz,hdf5,feather,parquet} [--erase]
|
||||
[--exchange EXCHANGE]
|
||||
[-t TIMEFRAMES [TIMEFRAMES ...]]
|
||||
[--trading-mode {spot,margin,futures}]
|
||||
[--candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...]]
|
||||
[--candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...]]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
|
||||
Limit command to these pairs. Pairs are space-
|
||||
separated.
|
||||
--format-from {json,jsongz,hdf5}
|
||||
--format-from {json,jsongz,hdf5,feather,parquet}
|
||||
Source format for data conversion.
|
||||
--format-to {json,jsongz,hdf5}
|
||||
--format-to {json,jsongz,hdf5,feather,parquet}
|
||||
Destination format for data conversion.
|
||||
--erase Clean all existing data for the selected
|
||||
exchange/pairs/timeframes.
|
||||
-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]
|
||||
Specify which tickers to download. Space-separated
|
||||
list. Default: `1m 5m`.
|
||||
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
|
||||
config is provided.
|
||||
--trading-mode {spot,margin,futures}
|
||||
-t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...]
|
||||
Specify which tickers to download. Space-separated
|
||||
list. Default: `1m 5m`.
|
||||
--trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures}
|
||||
Select Trading mode
|
||||
--candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...]
|
||||
--candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...]
|
||||
Select candle type to use
|
||||
|
||||
Common arguments:
|
||||
@ -245,7 +283,7 @@ Common arguments:
|
||||
`userdir/config.json` or `config.json` whichever
|
||||
exists). Multiple --config options may be used. Can be
|
||||
set to `-` to read config from stdin.
|
||||
-d PATH, --datadir PATH
|
||||
-d PATH, --datadir PATH, --data-dir PATH
|
||||
Path to directory with historical backtesting data.
|
||||
--userdir PATH, --user-data-dir PATH
|
||||
Path to userdata directory.
|
||||
@ -267,20 +305,24 @@ freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtr
|
||||
usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
|
||||
[-d PATH] [--userdir PATH]
|
||||
[-p PAIRS [PAIRS ...]] --format-from
|
||||
{json,jsongz,hdf5} --format-to
|
||||
{json,jsongz,hdf5} [--erase]
|
||||
{json,jsongz,hdf5,feather,parquet}
|
||||
--format-to
|
||||
{json,jsongz,hdf5,feather,parquet}
|
||||
[--erase] [--exchange EXCHANGE]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
|
||||
Show profits for only these pairs. Pairs are space-
|
||||
Limit command to these pairs. Pairs are space-
|
||||
separated.
|
||||
--format-from {json,jsongz,hdf5}
|
||||
--format-from {json,jsongz,hdf5,feather,parquet}
|
||||
Source format for data conversion.
|
||||
--format-to {json,jsongz,hdf5}
|
||||
--format-to {json,jsongz,hdf5,feather,parquet}
|
||||
Destination format for data conversion.
|
||||
--erase Clean all existing data for the selected
|
||||
exchange/pairs/timeframes.
|
||||
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
|
||||
config is provided.
|
||||
|
||||
Common arguments:
|
||||
-v, --verbose Verbose mode (-vv for more, -vvv to get all messages).
|
||||
@ -293,7 +335,7 @@ Common arguments:
|
||||
`userdir/config.json` or `config.json` whichever
|
||||
exists). Multiple --config options may be used. Can be
|
||||
set to `-` to read config from stdin.
|
||||
-d PATH, --datadir PATH
|
||||
-d PATH, --datadir PATH, --data-dir PATH
|
||||
Path to directory with historical backtesting data.
|
||||
--userdir PATH, --user-data-dir PATH
|
||||
Path to userdata directory.
|
||||
@ -318,9 +360,9 @@ This command will allow you to repeat this last step for additional timeframes w
|
||||
usage: freqtrade trades-to-ohlcv [-h] [-v] [--logfile FILE] [-V] [-c PATH]
|
||||
[-d PATH] [--userdir PATH]
|
||||
[-p PAIRS [PAIRS ...]]
|
||||
[-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]]
|
||||
[-t TIMEFRAMES [TIMEFRAMES ...]]
|
||||
[--exchange EXCHANGE]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5}]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
|
||||
[--data-format-trades {json,jsongz,hdf5}]
|
||||
|
||||
optional arguments:
|
||||
@ -328,12 +370,12 @@ optional arguments:
|
||||
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
|
||||
Limit command to these pairs. Pairs are space-
|
||||
separated.
|
||||
-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]
|
||||
-t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...]
|
||||
Specify which tickers to download. Space-separated
|
||||
list. Default: `1m 5m`.
|
||||
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
|
||||
config is provided.
|
||||
--data-format-ohlcv {json,jsongz,hdf5}
|
||||
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
|
||||
Storage format for downloaded candle (OHLCV) data.
|
||||
(default: `json`).
|
||||
--data-format-trades {json,jsongz,hdf5}
|
||||
@ -351,7 +393,7 @@ Common arguments:
|
||||
`userdir/config.json` or `config.json` whichever
|
||||
exists). Multiple --config options may be used. Can be
|
||||
set to `-` to read config from stdin.
|
||||
-d PATH, --datadir PATH
|
||||
-d PATH, --datadir PATH, --data-dir PATH
|
||||
Path to directory with historical backtesting data.
|
||||
--userdir PATH, --user-data-dir PATH
|
||||
Path to userdata directory.
|
||||
@ -371,7 +413,7 @@ You can get a list of downloaded data using the `list-data` sub-command.
|
||||
```
|
||||
usage: freqtrade list-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH]
|
||||
[--userdir PATH] [--exchange EXCHANGE]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5}]
|
||||
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
|
||||
[-p PAIRS [PAIRS ...]]
|
||||
[--trading-mode {spot,margin,futures}]
|
||||
[--show-timerange]
|
||||
@ -380,13 +422,13 @@ optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
|
||||
config is provided.
|
||||
--data-format-ohlcv {json,jsongz,hdf5}
|
||||
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
|
||||
Storage format for downloaded candle (OHLCV) data.
|
||||
(default: `json`).
|
||||
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
|
||||
Limit command to these pairs. Pairs are space-
|
||||
separated.
|
||||
--trading-mode {spot,margin,futures}
|
||||
--trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures}
|
||||
Select Trading mode
|
||||
--show-timerange Show timerange available for available data. (May take
|
||||
a while to calculate).
|
||||
@ -402,7 +444,7 @@ Common arguments:
|
||||
`userdir/config.json` or `config.json` whichever
|
||||
exists). Multiple --config options may be used. Can be
|
||||
set to `-` to read config from stdin.
|
||||
-d PATH, --datadir PATH
|
||||
-d PATH, --datadir PATH, --data-dir PATH
|
||||
Path to directory with historical backtesting data.
|
||||
--userdir PATH, --user-data-dir PATH
|
||||
Path to userdata directory.
|
||||
|
@ -34,6 +34,7 @@ dependencies:
|
||||
- schedule
|
||||
- python-dateutil
|
||||
- joblib
|
||||
- pyarrow
|
||||
|
||||
|
||||
# ============================
|
||||
|
@ -440,7 +440,7 @@ AVAILABLE_CLI_OPTIONS = {
|
||||
"dataformat_trades": Arg(
|
||||
'--data-format-trades',
|
||||
help='Storage format for downloaded trades data. (default: `jsongz`).',
|
||||
choices=constants.AVAILABLE_DATAHANDLERS,
|
||||
choices=constants.AVAILABLE_DATAHANDLERS_TRADES,
|
||||
),
|
||||
"show_timerange": Arg(
|
||||
'--show-timerange',
|
||||
|
@ -36,7 +36,8 @@ AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList',
|
||||
'PrecisionFilter', 'PriceFilter', 'RangeStabilityFilter',
|
||||
'ShuffleFilter', 'SpreadFilter', 'VolatilityFilter']
|
||||
AVAILABLE_PROTECTIONS = ['CooldownPeriod', 'LowProfitPairs', 'MaxDrawdown', 'StoplossGuard']
|
||||
AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5']
|
||||
AVAILABLE_DATAHANDLERS_TRADES = ['json', 'jsongz', 'hdf5']
|
||||
AVAILABLE_DATAHANDLERS = AVAILABLE_DATAHANDLERS_TRADES + ['feather', 'parquet']
|
||||
BACKTEST_BREAKDOWNS = ['day', 'week', 'month']
|
||||
BACKTEST_CACHE_AGE = ['none', 'day', 'week', 'month']
|
||||
BACKTEST_CACHE_DEFAULT = 'day'
|
||||
@ -434,7 +435,7 @@ CONF_SCHEMA = {
|
||||
},
|
||||
'dataformat_trades': {
|
||||
'type': 'string',
|
||||
'enum': AVAILABLE_DATAHANDLERS,
|
||||
'enum': AVAILABLE_DATAHANDLERS_TRADES,
|
||||
'default': 'jsongz'
|
||||
},
|
||||
'position_adjustment_enable': {'type': 'boolean'},
|
||||
|
130
freqtrade/data/history/featherdatahandler.py
Normal file
130
freqtrade/data/history/featherdatahandler.py
Normal file
@ -0,0 +1,130 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from pandas import DataFrame, read_feather, to_datetime
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
|
||||
from freqtrade.enums import CandleType
|
||||
|
||||
from .idatahandler import IDataHandler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FeatherDataHandler(IDataHandler):
|
||||
|
||||
_columns = DEFAULT_DATAFRAME_COLUMNS
|
||||
|
||||
def ohlcv_store(
|
||||
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None:
|
||||
"""
|
||||
Store data in json format "values".
|
||||
format looks as follows:
|
||||
[[<date>,<open>,<high>,<low>,<close>]]
|
||||
:param pair: Pair - used to generate filename
|
||||
:param timeframe: Timeframe - used to generate filename
|
||||
:param data: Dataframe containing OHLCV data
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
:return: None
|
||||
"""
|
||||
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
|
||||
self.create_dir_if_needed(filename)
|
||||
|
||||
data.reset_index(drop=True).loc[:, self._columns].to_feather(
|
||||
filename, compression_level=9, compression='lz4')
|
||||
|
||||
def _ohlcv_load(self, pair: str, timeframe: str,
|
||||
timerange: Optional[TimeRange], candle_type: CandleType
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Internal method used to load data for one pair from disk.
|
||||
Implements the loading and conversion to a Pandas dataframe.
|
||||
Timerange trimming and dataframe validation happens outside of this method.
|
||||
:param pair: Pair to load data
|
||||
:param timeframe: Timeframe (e.g. "5m")
|
||||
:param timerange: Limit data to be loaded to this timerange.
|
||||
Optionally implemented by subclasses to avoid loading
|
||||
all data where possible.
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
:return: DataFrame with ohlcv data, or empty DataFrame
|
||||
"""
|
||||
filename = self._pair_data_filename(
|
||||
self._datadir, pair, timeframe, candle_type=candle_type)
|
||||
if not filename.exists():
|
||||
# Fallback mode for 1M files
|
||||
filename = self._pair_data_filename(
|
||||
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
||||
if not filename.exists():
|
||||
return DataFrame(columns=self._columns)
|
||||
|
||||
pairdata = read_feather(filename)
|
||||
pairdata.columns = self._columns
|
||||
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
||||
'low': 'float', 'close': 'float', 'volume': 'float'})
|
||||
pairdata['date'] = to_datetime(pairdata['date'],
|
||||
unit='ms',
|
||||
utc=True,
|
||||
infer_datetime_format=True)
|
||||
return pairdata
|
||||
|
||||
def ohlcv_append(
|
||||
self,
|
||||
pair: str,
|
||||
timeframe: str,
|
||||
data: DataFrame,
|
||||
candle_type: CandleType
|
||||
) -> None:
|
||||
"""
|
||||
Append data to existing data structures
|
||||
:param pair: Pair
|
||||
:param timeframe: Timeframe this ohlcv data is for
|
||||
:param data: Data to append.
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def trades_store(self, pair: str, data: TradeList) -> None:
|
||||
"""
|
||||
Store trades data (list of Dicts) to file
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
# filename = self._pair_trades_filename(self._datadir, pair)
|
||||
|
||||
raise NotImplementedError()
|
||||
# array = pa.array(data)
|
||||
# array
|
||||
# feather.write_feather(data, filename)
|
||||
|
||||
def trades_append(self, pair: str, data: TradeList):
|
||||
"""
|
||||
Append data to existing files
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
||||
"""
|
||||
Load a pair from file, either .json.gz or .json
|
||||
# TODO: respect timerange ...
|
||||
:param pair: Load trades for this pair
|
||||
:param timerange: Timerange to load trades for - currently not implemented
|
||||
:return: List of trades
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
# filename = self._pair_trades_filename(self._datadir, pair)
|
||||
# tradesdata = misc.file_load_json(filename)
|
||||
|
||||
# if not tradesdata:
|
||||
# return []
|
||||
|
||||
# return tradesdata
|
||||
|
||||
@classmethod
|
||||
def _get_file_extension(cls):
|
||||
return "feather"
|
@ -81,6 +81,7 @@ class HDF5DataHandler(IDataHandler):
|
||||
raise ValueError("Wrong dataframe format")
|
||||
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
||||
'low': 'float', 'close': 'float', 'volume': 'float'})
|
||||
pairdata = pairdata.reset_index(drop=True)
|
||||
return pairdata
|
||||
|
||||
def ohlcv_append(
|
||||
|
@ -375,6 +375,12 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]:
|
||||
elif datatype == 'hdf5':
|
||||
from .hdf5datahandler import HDF5DataHandler
|
||||
return HDF5DataHandler
|
||||
elif datatype == 'feather':
|
||||
from .featherdatahandler import FeatherDataHandler
|
||||
return FeatherDataHandler
|
||||
elif datatype == 'parquet':
|
||||
from .parquetdatahandler import ParquetDataHandler
|
||||
return ParquetDataHandler
|
||||
else:
|
||||
raise ValueError(f"No datahandler for datatype {datatype} available.")
|
||||
|
||||
|
129
freqtrade/data/history/parquetdatahandler.py
Normal file
129
freqtrade/data/history/parquetdatahandler.py
Normal file
@ -0,0 +1,129 @@
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from pandas import DataFrame, read_parquet, to_datetime
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
|
||||
from freqtrade.enums import CandleType
|
||||
|
||||
from .idatahandler import IDataHandler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ParquetDataHandler(IDataHandler):
|
||||
|
||||
_columns = DEFAULT_DATAFRAME_COLUMNS
|
||||
|
||||
def ohlcv_store(
|
||||
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None:
|
||||
"""
|
||||
Store data in json format "values".
|
||||
format looks as follows:
|
||||
[[<date>,<open>,<high>,<low>,<close>]]
|
||||
:param pair: Pair - used to generate filename
|
||||
:param timeframe: Timeframe - used to generate filename
|
||||
:param data: Dataframe containing OHLCV data
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
:return: None
|
||||
"""
|
||||
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
|
||||
self.create_dir_if_needed(filename)
|
||||
|
||||
data.reset_index(drop=True).loc[:, self._columns].to_parquet(filename)
|
||||
|
||||
def _ohlcv_load(self, pair: str, timeframe: str,
|
||||
timerange: Optional[TimeRange], candle_type: CandleType
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Internal method used to load data for one pair from disk.
|
||||
Implements the loading and conversion to a Pandas dataframe.
|
||||
Timerange trimming and dataframe validation happens outside of this method.
|
||||
:param pair: Pair to load data
|
||||
:param timeframe: Timeframe (e.g. "5m")
|
||||
:param timerange: Limit data to be loaded to this timerange.
|
||||
Optionally implemented by subclasses to avoid loading
|
||||
all data where possible.
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
:return: DataFrame with ohlcv data, or empty DataFrame
|
||||
"""
|
||||
filename = self._pair_data_filename(
|
||||
self._datadir, pair, timeframe, candle_type=candle_type)
|
||||
if not filename.exists():
|
||||
# Fallback mode for 1M files
|
||||
filename = self._pair_data_filename(
|
||||
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
||||
if not filename.exists():
|
||||
return DataFrame(columns=self._columns)
|
||||
|
||||
pairdata = read_parquet(filename)
|
||||
pairdata.columns = self._columns
|
||||
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
||||
'low': 'float', 'close': 'float', 'volume': 'float'})
|
||||
pairdata['date'] = to_datetime(pairdata['date'],
|
||||
unit='ms',
|
||||
utc=True,
|
||||
infer_datetime_format=True)
|
||||
return pairdata
|
||||
|
||||
def ohlcv_append(
|
||||
self,
|
||||
pair: str,
|
||||
timeframe: str,
|
||||
data: DataFrame,
|
||||
candle_type: CandleType
|
||||
) -> None:
|
||||
"""
|
||||
Append data to existing data structures
|
||||
:param pair: Pair
|
||||
:param timeframe: Timeframe this ohlcv data is for
|
||||
:param data: Data to append.
|
||||
:param candle_type: Any of the enum CandleType (must match trading mode!)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def trades_store(self, pair: str, data: TradeList) -> None:
|
||||
"""
|
||||
Store trades data (list of Dicts) to file
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
# filename = self._pair_trades_filename(self._datadir, pair)
|
||||
|
||||
raise NotImplementedError()
|
||||
# array = pa.array(data)
|
||||
# array
|
||||
# feather.write_feather(data, filename)
|
||||
|
||||
def trades_append(self, pair: str, data: TradeList):
|
||||
"""
|
||||
Append data to existing files
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
||||
"""
|
||||
Load a pair from file, either .json.gz or .json
|
||||
# TODO: respect timerange ...
|
||||
:param pair: Load trades for this pair
|
||||
:param timerange: Timerange to load trades for - currently not implemented
|
||||
:return: List of trades
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
# filename = self._pair_trades_filename(self._datadir, pair)
|
||||
# tradesdata = misc.file_load_json(filename)
|
||||
|
||||
# if not tradesdata:
|
||||
# return []
|
||||
|
||||
# return tradesdata
|
||||
|
||||
@classmethod
|
||||
def _get_file_extension(cls):
|
||||
return "parquet"
|
@ -21,6 +21,7 @@ jinja2==3.1.2
|
||||
tables==3.7.0
|
||||
blosc==1.10.6
|
||||
joblib==1.2.0
|
||||
pyarrow==9.0.0
|
||||
|
||||
# find first, C search in arrays
|
||||
py_find_1st==1.1.5
|
||||
@ -54,3 +55,4 @@ schedule==1.1.0
|
||||
#WS Messages
|
||||
websockets==10.3
|
||||
janus==1.0.0
|
||||
|
||||
|
4
setup.py
4
setup.py
@ -8,13 +8,11 @@ hyperopt = [
|
||||
'scikit-learn',
|
||||
'scikit-optimize>=0.7.0',
|
||||
'filelock',
|
||||
'joblib',
|
||||
'progressbar2',
|
||||
]
|
||||
|
||||
freqai = [
|
||||
'scikit-learn',
|
||||
'joblib',
|
||||
'catboost; platform_machine != "aarch64"',
|
||||
'lightgbm',
|
||||
]
|
||||
@ -74,6 +72,8 @@ setup(
|
||||
'pandas',
|
||||
'tables',
|
||||
'blosc',
|
||||
'joblib',
|
||||
'pyarrow',
|
||||
'fastapi',
|
||||
'uvicorn',
|
||||
'psutil',
|
||||
|
@ -9,9 +9,11 @@ from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import AVAILABLE_DATAHANDLERS
|
||||
from freqtrade.data.history.featherdatahandler import FeatherDataHandler
|
||||
from freqtrade.data.history.hdf5datahandler import HDF5DataHandler
|
||||
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler, get_datahandlerclass
|
||||
from freqtrade.data.history.jsondatahandler import JsonDataHandler, JsonGzDataHandler
|
||||
from freqtrade.data.history.parquetdatahandler import ParquetDataHandler
|
||||
from freqtrade.enums import CandleType, TradingMode
|
||||
from tests.conftest import log_has
|
||||
|
||||
@ -152,6 +154,15 @@ def test_jsondatahandler_ohlcv_load(testdatadir, caplog):
|
||||
assert df.columns.equals(df1.columns)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('datahandler', ['feather', 'parquet'])
|
||||
def test_datahandler_trades_not_supported(datahandler, testdatadir, ):
|
||||
dh = get_datahandler(testdatadir, datahandler)
|
||||
with pytest.raises(NotImplementedError):
|
||||
dh.trades_load('UNITTEST/ETH')
|
||||
with pytest.raises(NotImplementedError):
|
||||
dh.trades_store('UNITTEST/ETH', MagicMock())
|
||||
|
||||
|
||||
def test_jsondatahandler_trades_load(testdatadir, caplog):
|
||||
dh = JsonGzDataHandler(testdatadir)
|
||||
logmsg = "Old trades format detected - converting"
|
||||
@ -312,6 +323,67 @@ def test_hdf5datahandler_ohlcv_load_and_resave(
|
||||
assert ohlcv.empty
|
||||
|
||||
|
||||
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
|
||||
# Data goes from 2018-01-10 - 2018-01-30
|
||||
('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'),
|
||||
# Mark data goes from to 2021-11-15 2021-11-19
|
||||
('UNITTEST/USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'),
|
||||
])
|
||||
@pytest.mark.parametrize('datahandler', ['hdf5', 'feather', 'parquet'])
|
||||
def test_generic_datahandler_ohlcv_load_and_resave(
|
||||
datahandler,
|
||||
testdatadir,
|
||||
tmpdir,
|
||||
pair,
|
||||
timeframe,
|
||||
candle_type,
|
||||
candle_append,
|
||||
startdt, enddt
|
||||
):
|
||||
tmpdir1 = Path(tmpdir)
|
||||
tmpdir2 = tmpdir1
|
||||
if candle_type not in ('', 'spot'):
|
||||
tmpdir2 = tmpdir1 / 'futures'
|
||||
tmpdir2.mkdir()
|
||||
# Load data from one common file
|
||||
dhbase = get_datahandler(testdatadir, 'json')
|
||||
ohlcv = dhbase._ohlcv_load(pair, timeframe, None, candle_type=candle_type)
|
||||
assert isinstance(ohlcv, DataFrame)
|
||||
assert len(ohlcv) > 0
|
||||
|
||||
# Get data to test
|
||||
dh = get_datahandler(testdatadir, datahandler)
|
||||
|
||||
file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.{dh._get_file_extension()}"
|
||||
assert not file.is_file()
|
||||
|
||||
dh1 = get_datahandler(tmpdir1, datahandler)
|
||||
dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type)
|
||||
assert file.is_file()
|
||||
|
||||
assert not ohlcv[ohlcv['date'] < startdt].empty
|
||||
|
||||
timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}")
|
||||
|
||||
ohlcv = dhbase.ohlcv_load(pair, timeframe, timerange=timerange, candle_type=candle_type)
|
||||
if datahandler == 'hdf5':
|
||||
ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type)
|
||||
if candle_type == 'mark':
|
||||
ohlcv1['volume'] = 0.0
|
||||
else:
|
||||
ohlcv1 = dh1.ohlcv_load('UNITTEST/NEW', timeframe,
|
||||
timerange=timerange, candle_type=candle_type)
|
||||
|
||||
assert len(ohlcv) == len(ohlcv1)
|
||||
assert ohlcv.equals(ohlcv1)
|
||||
assert ohlcv[ohlcv['date'] < startdt].empty
|
||||
assert ohlcv[ohlcv['date'] > enddt].empty
|
||||
|
||||
# Try loading inexisting file
|
||||
ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type)
|
||||
assert ohlcv.empty
|
||||
|
||||
|
||||
def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
|
||||
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
|
||||
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
|
||||
@ -330,13 +402,24 @@ def test_gethandlerclass():
|
||||
cl = get_datahandlerclass('json')
|
||||
assert cl == JsonDataHandler
|
||||
assert issubclass(cl, IDataHandler)
|
||||
|
||||
cl = get_datahandlerclass('jsongz')
|
||||
assert cl == JsonGzDataHandler
|
||||
assert issubclass(cl, IDataHandler)
|
||||
assert issubclass(cl, JsonDataHandler)
|
||||
|
||||
cl = get_datahandlerclass('hdf5')
|
||||
assert cl == HDF5DataHandler
|
||||
assert issubclass(cl, IDataHandler)
|
||||
|
||||
cl = get_datahandlerclass('feather')
|
||||
assert cl == FeatherDataHandler
|
||||
assert issubclass(cl, IDataHandler)
|
||||
|
||||
cl = get_datahandlerclass('parquet')
|
||||
assert cl == ParquetDataHandler
|
||||
assert issubclass(cl, IDataHandler)
|
||||
|
||||
with pytest.raises(ValueError, match=r"No datahandler for .*"):
|
||||
get_datahandlerclass('DeadBeef')
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user