Add feather/parquet docs
This commit is contained in:
parent
48352b8a37
commit
7e1e388b9c
@ -179,9 +179,11 @@ freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT --
|
|||||||
|
|
||||||
Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
|
Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
|
||||||
|
|
||||||
* `json` (plain "text" json files)
|
* `json` - plain "text" json files
|
||||||
* `jsongz` (a gzip-zipped version of json files)
|
* `jsongz` - a gzip-zipped version of json files
|
||||||
* `hdf5` (a high performance datastore)
|
* `hdf5` - a high performance datastore
|
||||||
|
* `feather` - a dataformat based on Apache Arrow
|
||||||
|
* `parquet` - columnar datastore
|
||||||
|
|
||||||
By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
|
By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
|
||||||
|
|
||||||
@ -200,6 +202,42 @@ If the default data-format has been changed during download, then the keys `data
|
|||||||
!!! Note
|
!!! Note
|
||||||
You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
|
You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
|
||||||
|
|
||||||
|
#### Dataformat comparison
|
||||||
|
|
||||||
|
The following comparisons have been made with the following data, and by using the linux `time` command.
|
||||||
|
|
||||||
|
```
|
||||||
|
Found 6 pair / timeframe combinations.
|
||||||
|
+----------+-------------+--------+---------------------+---------------------+
|
||||||
|
| Pair | Timeframe | Type | From | To |
|
||||||
|
|----------+-------------+--------+---------------------+---------------------|
|
||||||
|
| BTC/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:25:00 |
|
||||||
|
| ETH/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:26:00 |
|
||||||
|
| BTC/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:30:00 |
|
||||||
|
| XRP/USDT | 5m | spot | 2018-05-04 08:10:00 | 2022-09-13 19:15:00 |
|
||||||
|
| XRP/USDT | 1m | spot | 2018-05-04 08:11:00 | 2022-09-13 19:22:00 |
|
||||||
|
| ETH/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:20:00 |
|
||||||
|
+----------+-------------+--------+---------------------+---------------------+
|
||||||
|
```
|
||||||
|
|
||||||
|
Timings have been taken in a not very scientific way with the following command, which forces reading the data into memory.
|
||||||
|
|
||||||
|
``` bash
|
||||||
|
time freqtrade list-data --show-timerange --data-format-ohlcv <dataformat>
|
||||||
|
```
|
||||||
|
|
||||||
|
| Format | Size | timing |
|
||||||
|
|------------|-------------|-------------|
|
||||||
|
| `json` | 149Mb | 25.6s |
|
||||||
|
| `jsongz` | 39Mb | 27s |
|
||||||
|
| `hdf5` | 145Mb | 3.9s |
|
||||||
|
| `feather` | 72Mb | 3.5s |
|
||||||
|
| `parquet` | 83Mb | 3.8s |
|
||||||
|
|
||||||
|
Size has been taken from the BTC/USDT 1m spot combination for the timerange specified above.
|
||||||
|
|
||||||
|
To have a best performance/size mix, we recommend the use of either feather or parquet.
|
||||||
|
|
||||||
#### Sub-command convert data
|
#### Sub-command convert data
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -58,12 +58,9 @@ class FeatherDataHandler(IDataHandler):
|
|||||||
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
||||||
if not filename.exists():
|
if not filename.exists():
|
||||||
return DataFrame(columns=self._columns)
|
return DataFrame(columns=self._columns)
|
||||||
try:
|
|
||||||
pairdata = read_feather(filename)
|
pairdata = read_feather(filename)
|
||||||
pairdata.columns = self._columns
|
pairdata.columns = self._columns
|
||||||
except ValueError:
|
|
||||||
logger.error(f"Could not load data for {pair}.")
|
|
||||||
return DataFrame(columns=self._columns)
|
|
||||||
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
||||||
'low': 'float', 'close': 'float', 'volume': 'float'})
|
'low': 'float', 'close': 'float', 'volume': 'float'})
|
||||||
pairdata['date'] = to_datetime(pairdata['date'],
|
pairdata['date'] = to_datetime(pairdata['date'],
|
||||||
|
@ -57,12 +57,9 @@ class ParquetDataHandler(IDataHandler):
|
|||||||
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
|
||||||
if not filename.exists():
|
if not filename.exists():
|
||||||
return DataFrame(columns=self._columns)
|
return DataFrame(columns=self._columns)
|
||||||
try:
|
|
||||||
pairdata = read_parquet(filename)
|
pairdata = read_parquet(filename)
|
||||||
pairdata.columns = self._columns
|
pairdata.columns = self._columns
|
||||||
except ValueError:
|
|
||||||
logger.error(f"Could not load data for {pair}.")
|
|
||||||
return DataFrame(columns=self._columns)
|
|
||||||
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
|
||||||
'low': 'float', 'close': 'float', 'volume': 'float'})
|
'low': 'float', 'close': 'float', 'volume': 'float'})
|
||||||
pairdata['date'] = to_datetime(pairdata['date'],
|
pairdata['date'] = to_datetime(pairdata['date'],
|
||||||
|
Loading…
Reference in New Issue
Block a user