Merge pull request #7458 from xmatthias/dataformat/feather

new Dataformats feather and Parquet
This commit is contained in:
Matthias 2022-09-25 09:00:22 +02:00 committed by GitHub
commit f4fac53a13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 433 additions and 38 deletions

View File

@ -26,7 +26,7 @@ usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
[--timerange TIMERANGE] [--dl-trades]
[--exchange EXCHANGE]
[-t TIMEFRAMES [TIMEFRAMES ...]] [--erase]
[--data-format-ohlcv {json,jsongz,hdf5}]
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
[--data-format-trades {json,jsongz,hdf5}]
[--trading-mode {spot,margin,futures}]
[--prepend]
@ -55,7 +55,7 @@ optional arguments:
list. Default: `1m 5m`.
--erase Clean all existing data for the selected
exchange/pairs/timeframes.
--data-format-ohlcv {json,jsongz,hdf5}
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
Storage format for downloaded candle (OHLCV) data.
(default: `json`).
--data-format-trades {json,jsongz,hdf5}
@ -76,7 +76,7 @@ Common arguments:
`userdir/config.json` or `config.json` whichever
exists). Multiple --config options may be used. Can be
set to `-` to read config from stdin.
-d PATH, --datadir PATH
-d PATH, --datadir PATH, --data-dir PATH
Path to directory with historical backtesting data.
--userdir PATH, --user-data-dir PATH
Path to userdata directory.
@ -179,9 +179,11 @@ freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT --
Freqtrade currently supports 3 data-formats for both OHLCV and trades data:
* `json` (plain "text" json files)
* `jsongz` (a gzip-zipped version of json files)
* `hdf5` (a high performance datastore)
* `json` - plain "text" json files
* `jsongz` - a gzip-zipped version of json files
* `hdf5` - a high performance datastore
* `feather` - a dataformat based on Apache Arrow
* `parquet` - columnar datastore
By default, OHLCV data is stored as `json` data, while trades data is stored as `jsongz` data.
@ -200,38 +202,74 @@ If the default data-format has been changed during download, then the keys `data
!!! Note
You can convert between data-formats using the [convert-data](#sub-command-convert-data) and [convert-trade-data](#sub-command-convert-trade-data) methods.
#### Dataformat comparison
The following comparisons have been made with the following data, and by using the linux `time` command.
```
Found 6 pair / timeframe combinations.
+----------+-------------+--------+---------------------+---------------------+
| Pair | Timeframe | Type | From | To |
|----------+-------------+--------+---------------------+---------------------|
| BTC/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:25:00 |
| ETH/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:26:00 |
| BTC/USDT | 1m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:30:00 |
| XRP/USDT | 5m | spot | 2018-05-04 08:10:00 | 2022-09-13 19:15:00 |
| XRP/USDT | 1m | spot | 2018-05-04 08:11:00 | 2022-09-13 19:22:00 |
| ETH/USDT | 5m | spot | 2017-08-17 04:00:00 | 2022-09-13 19:20:00 |
+----------+-------------+--------+---------------------+---------------------+
```
Timings have been taken in a not very scientific way with the following command, which forces reading the data into memory.
``` bash
time freqtrade list-data --show-timerange --data-format-ohlcv <dataformat>
```
| Format | Size | timing |
|------------|-------------|-------------|
| `json` | 149Mb | 25.6s |
| `jsongz` | 39Mb | 27s |
| `hdf5` | 145Mb | 3.9s |
| `feather` | 72Mb | 3.5s |
| `parquet` | 83Mb | 3.8s |
Size has been taken from the BTC/USDT 1m spot combination for the timerange specified above.
To have a best performance/size mix, we recommend the use of either feather or parquet.
#### Sub-command convert data
```
usage: freqtrade convert-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
[-d PATH] [--userdir PATH]
[-p PAIRS [PAIRS ...]] --format-from
{json,jsongz,hdf5} --format-to
{json,jsongz,hdf5} [--erase]
[-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]]
{json,jsongz,hdf5,feather,parquet} --format-to
{json,jsongz,hdf5,feather,parquet} [--erase]
[--exchange EXCHANGE]
[-t TIMEFRAMES [TIMEFRAMES ...]]
[--trading-mode {spot,margin,futures}]
[--candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...]]
[--candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...]]
optional arguments:
-h, --help show this help message and exit
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
Limit command to these pairs. Pairs are space-
separated.
--format-from {json,jsongz,hdf5}
--format-from {json,jsongz,hdf5,feather,parquet}
Source format for data conversion.
--format-to {json,jsongz,hdf5}
--format-to {json,jsongz,hdf5,feather,parquet}
Destination format for data conversion.
--erase Clean all existing data for the selected
exchange/pairs/timeframes.
-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]
Specify which tickers to download. Space-separated
list. Default: `1m 5m`.
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
config is provided.
--trading-mode {spot,margin,futures}
-t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...]
Specify which tickers to download. Space-separated
list. Default: `1m 5m`.
--trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures}
Select Trading mode
--candle-types {spot,,futures,mark,index,premiumIndex,funding_rate} [{spot,,futures,mark,index,premiumIndex,funding_rate} ...]
--candle-types {spot,futures,mark,index,premiumIndex,funding_rate} [{spot,futures,mark,index,premiumIndex,funding_rate} ...]
Select candle type to use
Common arguments:
@ -245,7 +283,7 @@ Common arguments:
`userdir/config.json` or `config.json` whichever
exists). Multiple --config options may be used. Can be
set to `-` to read config from stdin.
-d PATH, --datadir PATH
-d PATH, --datadir PATH, --data-dir PATH
Path to directory with historical backtesting data.
--userdir PATH, --user-data-dir PATH
Path to userdata directory.
@ -267,20 +305,24 @@ freqtrade convert-data --format-from json --format-to jsongz --datadir ~/.freqtr
usage: freqtrade convert-trade-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
[-d PATH] [--userdir PATH]
[-p PAIRS [PAIRS ...]] --format-from
{json,jsongz,hdf5} --format-to
{json,jsongz,hdf5} [--erase]
{json,jsongz,hdf5,feather,parquet}
--format-to
{json,jsongz,hdf5,feather,parquet}
[--erase] [--exchange EXCHANGE]
optional arguments:
-h, --help show this help message and exit
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
Show profits for only these pairs. Pairs are space-
Limit command to these pairs. Pairs are space-
separated.
--format-from {json,jsongz,hdf5}
--format-from {json,jsongz,hdf5,feather,parquet}
Source format for data conversion.
--format-to {json,jsongz,hdf5}
--format-to {json,jsongz,hdf5,feather,parquet}
Destination format for data conversion.
--erase Clean all existing data for the selected
exchange/pairs/timeframes.
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
config is provided.
Common arguments:
-v, --verbose Verbose mode (-vv for more, -vvv to get all messages).
@ -293,7 +335,7 @@ Common arguments:
`userdir/config.json` or `config.json` whichever
exists). Multiple --config options may be used. Can be
set to `-` to read config from stdin.
-d PATH, --datadir PATH
-d PATH, --datadir PATH, --data-dir PATH
Path to directory with historical backtesting data.
--userdir PATH, --user-data-dir PATH
Path to userdata directory.
@ -318,9 +360,9 @@ This command will allow you to repeat this last step for additional timeframes w
usage: freqtrade trades-to-ohlcv [-h] [-v] [--logfile FILE] [-V] [-c PATH]
[-d PATH] [--userdir PATH]
[-p PAIRS [PAIRS ...]]
[-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]]
[-t TIMEFRAMES [TIMEFRAMES ...]]
[--exchange EXCHANGE]
[--data-format-ohlcv {json,jsongz,hdf5}]
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
[--data-format-trades {json,jsongz,hdf5}]
optional arguments:
@ -328,12 +370,12 @@ optional arguments:
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
Limit command to these pairs. Pairs are space-
separated.
-t {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...], --timeframes {1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} [{1m,3m,5m,15m,30m,1h,2h,4h,6h,8h,12h,1d,3d,1w,2w,1M,1y} ...]
-t TIMEFRAMES [TIMEFRAMES ...], --timeframes TIMEFRAMES [TIMEFRAMES ...]
Specify which tickers to download. Space-separated
list. Default: `1m 5m`.
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
config is provided.
--data-format-ohlcv {json,jsongz,hdf5}
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
Storage format for downloaded candle (OHLCV) data.
(default: `json`).
--data-format-trades {json,jsongz,hdf5}
@ -351,7 +393,7 @@ Common arguments:
`userdir/config.json` or `config.json` whichever
exists). Multiple --config options may be used. Can be
set to `-` to read config from stdin.
-d PATH, --datadir PATH
-d PATH, --datadir PATH, --data-dir PATH
Path to directory with historical backtesting data.
--userdir PATH, --user-data-dir PATH
Path to userdata directory.
@ -371,7 +413,7 @@ You can get a list of downloaded data using the `list-data` sub-command.
```
usage: freqtrade list-data [-h] [-v] [--logfile FILE] [-V] [-c PATH] [-d PATH]
[--userdir PATH] [--exchange EXCHANGE]
[--data-format-ohlcv {json,jsongz,hdf5}]
[--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}]
[-p PAIRS [PAIRS ...]]
[--trading-mode {spot,margin,futures}]
[--show-timerange]
@ -380,13 +422,13 @@ optional arguments:
-h, --help show this help message and exit
--exchange EXCHANGE Exchange name (default: `bittrex`). Only valid if no
config is provided.
--data-format-ohlcv {json,jsongz,hdf5}
--data-format-ohlcv {json,jsongz,hdf5,feather,parquet}
Storage format for downloaded candle (OHLCV) data.
(default: `json`).
-p PAIRS [PAIRS ...], --pairs PAIRS [PAIRS ...]
Limit command to these pairs. Pairs are space-
separated.
--trading-mode {spot,margin,futures}
--trading-mode {spot,margin,futures}, --tradingmode {spot,margin,futures}
Select Trading mode
--show-timerange Show timerange available for available data. (May take
a while to calculate).
@ -402,7 +444,7 @@ Common arguments:
`userdir/config.json` or `config.json` whichever
exists). Multiple --config options may be used. Can be
set to `-` to read config from stdin.
-d PATH, --datadir PATH
-d PATH, --datadir PATH, --data-dir PATH
Path to directory with historical backtesting data.
--userdir PATH, --user-data-dir PATH
Path to userdata directory.

View File

@ -34,6 +34,7 @@ dependencies:
- schedule
- python-dateutil
- joblib
- pyarrow
# ============================

View File

@ -440,7 +440,7 @@ AVAILABLE_CLI_OPTIONS = {
"dataformat_trades": Arg(
'--data-format-trades',
help='Storage format for downloaded trades data. (default: `jsongz`).',
choices=constants.AVAILABLE_DATAHANDLERS,
choices=constants.AVAILABLE_DATAHANDLERS_TRADES,
),
"show_timerange": Arg(
'--show-timerange',

View File

@ -36,7 +36,8 @@ AVAILABLE_PAIRLISTS = ['StaticPairList', 'VolumePairList',
'PrecisionFilter', 'PriceFilter', 'RangeStabilityFilter',
'ShuffleFilter', 'SpreadFilter', 'VolatilityFilter']
AVAILABLE_PROTECTIONS = ['CooldownPeriod', 'LowProfitPairs', 'MaxDrawdown', 'StoplossGuard']
AVAILABLE_DATAHANDLERS = ['json', 'jsongz', 'hdf5']
AVAILABLE_DATAHANDLERS_TRADES = ['json', 'jsongz', 'hdf5']
AVAILABLE_DATAHANDLERS = AVAILABLE_DATAHANDLERS_TRADES + ['feather', 'parquet']
BACKTEST_BREAKDOWNS = ['day', 'week', 'month']
BACKTEST_CACHE_AGE = ['none', 'day', 'week', 'month']
BACKTEST_CACHE_DEFAULT = 'day'
@ -434,7 +435,7 @@ CONF_SCHEMA = {
},
'dataformat_trades': {
'type': 'string',
'enum': AVAILABLE_DATAHANDLERS,
'enum': AVAILABLE_DATAHANDLERS_TRADES,
'default': 'jsongz'
},
'position_adjustment_enable': {'type': 'boolean'},

View File

@ -0,0 +1,130 @@
import logging
from typing import Optional
from pandas import DataFrame, read_feather, to_datetime
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
logger = logging.getLogger(__name__)
class FeatherDataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None:
"""
Store data in json format "values".
format looks as follows:
[[<date>,<open>,<high>,<low>,<close>]]
:param pair: Pair - used to generate filename
:param timeframe: Timeframe - used to generate filename
:param data: Dataframe containing OHLCV data
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: None
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
self.create_dir_if_needed(filename)
data.reset_index(drop=True).loc[:, self._columns].to_feather(
filename, compression_level=9, compression='lz4')
def _ohlcv_load(self, pair: str, timeframe: str,
timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame:
"""
Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe.
Timerange trimming and dataframe validation happens outside of this method.
:param pair: Pair to load data
:param timeframe: Timeframe (e.g. "5m")
:param timerange: Limit data to be loaded to this timerange.
Optionally implemented by subclasses to avoid loading
all data where possible.
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame
"""
filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists():
# Fallback mode for 1M files
filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
if not filename.exists():
return DataFrame(columns=self._columns)
pairdata = read_feather(filename)
pairdata.columns = self._columns
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
'low': 'float', 'close': 'float', 'volume': 'float'})
pairdata['date'] = to_datetime(pairdata['date'],
unit='ms',
utc=True,
infer_datetime_format=True)
return pairdata
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None:
"""
Append data to existing data structures
:param pair: Pair
:param timeframe: Timeframe this ohlcv data is for
:param data: Data to append.
:param candle_type: Any of the enum CandleType (must match trading mode!)
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
# filename = self._pair_trades_filename(self._datadir, pair)
raise NotImplementedError()
# array = pa.array(data)
# array
# feather.write_feather(data, filename)
def trades_append(self, pair: str, data: TradeList):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
"""
Load a pair from file, either .json.gz or .json
# TODO: respect timerange ...
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
raise NotImplementedError()
# filename = self._pair_trades_filename(self._datadir, pair)
# tradesdata = misc.file_load_json(filename)
# if not tradesdata:
# return []
# return tradesdata
@classmethod
def _get_file_extension(cls):
return "feather"

View File

@ -81,6 +81,7 @@ class HDF5DataHandler(IDataHandler):
raise ValueError("Wrong dataframe format")
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
'low': 'float', 'close': 'float', 'volume': 'float'})
pairdata = pairdata.reset_index(drop=True)
return pairdata
def ohlcv_append(

View File

@ -375,6 +375,12 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]:
elif datatype == 'hdf5':
from .hdf5datahandler import HDF5DataHandler
return HDF5DataHandler
elif datatype == 'feather':
from .featherdatahandler import FeatherDataHandler
return FeatherDataHandler
elif datatype == 'parquet':
from .parquetdatahandler import ParquetDataHandler
return ParquetDataHandler
else:
raise ValueError(f"No datahandler for datatype {datatype} available.")

View File

@ -0,0 +1,129 @@
import logging
from typing import Optional
from pandas import DataFrame, read_parquet, to_datetime
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
from freqtrade.enums import CandleType
from .idatahandler import IDataHandler
logger = logging.getLogger(__name__)
class ParquetDataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None:
"""
Store data in json format "values".
format looks as follows:
[[<date>,<open>,<high>,<low>,<close>]]
:param pair: Pair - used to generate filename
:param timeframe: Timeframe - used to generate filename
:param data: Dataframe containing OHLCV data
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: None
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
self.create_dir_if_needed(filename)
data.reset_index(drop=True).loc[:, self._columns].to_parquet(filename)
def _ohlcv_load(self, pair: str, timeframe: str,
timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame:
"""
Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe.
Timerange trimming and dataframe validation happens outside of this method.
:param pair: Pair to load data
:param timeframe: Timeframe (e.g. "5m")
:param timerange: Limit data to be loaded to this timerange.
Optionally implemented by subclasses to avoid loading
all data where possible.
:param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame
"""
filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists():
# Fallback mode for 1M files
filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True)
if not filename.exists():
return DataFrame(columns=self._columns)
pairdata = read_parquet(filename)
pairdata.columns = self._columns
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
'low': 'float', 'close': 'float', 'volume': 'float'})
pairdata['date'] = to_datetime(pairdata['date'],
unit='ms',
utc=True,
infer_datetime_format=True)
return pairdata
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None:
"""
Append data to existing data structures
:param pair: Pair
:param timeframe: Timeframe this ohlcv data is for
:param data: Data to append.
:param candle_type: Any of the enum CandleType (must match trading mode!)
"""
raise NotImplementedError()
def trades_store(self, pair: str, data: TradeList) -> None:
"""
Store trades data (list of Dicts) to file
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
# filename = self._pair_trades_filename(self._datadir, pair)
raise NotImplementedError()
# array = pa.array(data)
# array
# feather.write_feather(data, filename)
def trades_append(self, pair: str, data: TradeList):
"""
Append data to existing files
:param pair: Pair - used for filename
:param data: List of Lists containing trade data,
column sequence as in DEFAULT_TRADES_COLUMNS
"""
raise NotImplementedError()
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
"""
Load a pair from file, either .json.gz or .json
# TODO: respect timerange ...
:param pair: Load trades for this pair
:param timerange: Timerange to load trades for - currently not implemented
:return: List of trades
"""
raise NotImplementedError()
# filename = self._pair_trades_filename(self._datadir, pair)
# tradesdata = misc.file_load_json(filename)
# if not tradesdata:
# return []
# return tradesdata
@classmethod
def _get_file_extension(cls):
return "parquet"

View File

@ -21,6 +21,7 @@ jinja2==3.1.2
tables==3.7.0
blosc==1.10.6
joblib==1.2.0
pyarrow==9.0.0
# find first, C search in arrays
py_find_1st==1.1.5
@ -54,3 +55,4 @@ schedule==1.1.0
#WS Messages
websockets==10.3
janus==1.0.0

View File

@ -8,13 +8,11 @@ hyperopt = [
'scikit-learn',
'scikit-optimize>=0.7.0',
'filelock',
'joblib',
'progressbar2',
]
freqai = [
'scikit-learn',
'joblib',
'catboost; platform_machine != "aarch64"',
'lightgbm',
]
@ -74,6 +72,8 @@ setup(
'pandas',
'tables',
'blosc',
'joblib',
'pyarrow',
'fastapi',
'uvicorn',
'psutil',

View File

@ -9,9 +9,11 @@ from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.constants import AVAILABLE_DATAHANDLERS
from freqtrade.data.history.featherdatahandler import FeatherDataHandler
from freqtrade.data.history.hdf5datahandler import HDF5DataHandler
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler, get_datahandlerclass
from freqtrade.data.history.jsondatahandler import JsonDataHandler, JsonGzDataHandler
from freqtrade.data.history.parquetdatahandler import ParquetDataHandler
from freqtrade.enums import CandleType, TradingMode
from tests.conftest import log_has
@ -152,6 +154,15 @@ def test_jsondatahandler_ohlcv_load(testdatadir, caplog):
assert df.columns.equals(df1.columns)
@pytest.mark.parametrize('datahandler', ['feather', 'parquet'])
def test_datahandler_trades_not_supported(datahandler, testdatadir, ):
dh = get_datahandler(testdatadir, datahandler)
with pytest.raises(NotImplementedError):
dh.trades_load('UNITTEST/ETH')
with pytest.raises(NotImplementedError):
dh.trades_store('UNITTEST/ETH', MagicMock())
def test_jsondatahandler_trades_load(testdatadir, caplog):
dh = JsonGzDataHandler(testdatadir)
logmsg = "Old trades format detected - converting"
@ -312,6 +323,67 @@ def test_hdf5datahandler_ohlcv_load_and_resave(
assert ohlcv.empty
@pytest.mark.parametrize('pair,timeframe,candle_type,candle_append,startdt,enddt', [
# Data goes from 2018-01-10 - 2018-01-30
('UNITTEST/BTC', '5m', 'spot', '', '2018-01-15', '2018-01-19'),
# Mark data goes from to 2021-11-15 2021-11-19
('UNITTEST/USDT', '1h', 'mark', '-mark', '2021-11-16', '2021-11-18'),
])
@pytest.mark.parametrize('datahandler', ['hdf5', 'feather', 'parquet'])
def test_generic_datahandler_ohlcv_load_and_resave(
datahandler,
testdatadir,
tmpdir,
pair,
timeframe,
candle_type,
candle_append,
startdt, enddt
):
tmpdir1 = Path(tmpdir)
tmpdir2 = tmpdir1
if candle_type not in ('', 'spot'):
tmpdir2 = tmpdir1 / 'futures'
tmpdir2.mkdir()
# Load data from one common file
dhbase = get_datahandler(testdatadir, 'json')
ohlcv = dhbase._ohlcv_load(pair, timeframe, None, candle_type=candle_type)
assert isinstance(ohlcv, DataFrame)
assert len(ohlcv) > 0
# Get data to test
dh = get_datahandler(testdatadir, datahandler)
file = tmpdir2 / f"UNITTEST_NEW-{timeframe}{candle_append}.{dh._get_file_extension()}"
assert not file.is_file()
dh1 = get_datahandler(tmpdir1, datahandler)
dh1.ohlcv_store('UNITTEST/NEW', timeframe, ohlcv, candle_type=candle_type)
assert file.is_file()
assert not ohlcv[ohlcv['date'] < startdt].empty
timerange = TimeRange.parse_timerange(f"{startdt.replace('-', '')}-{enddt.replace('-', '')}")
ohlcv = dhbase.ohlcv_load(pair, timeframe, timerange=timerange, candle_type=candle_type)
if datahandler == 'hdf5':
ohlcv1 = dh1._ohlcv_load('UNITTEST/NEW', timeframe, timerange, candle_type=candle_type)
if candle_type == 'mark':
ohlcv1['volume'] = 0.0
else:
ohlcv1 = dh1.ohlcv_load('UNITTEST/NEW', timeframe,
timerange=timerange, candle_type=candle_type)
assert len(ohlcv) == len(ohlcv1)
assert ohlcv.equals(ohlcv1)
assert ohlcv[ohlcv['date'] < startdt].empty
assert ohlcv[ohlcv['date'] > enddt].empty
# Try loading inexisting file
ohlcv = dh.ohlcv_load('UNITTEST/NONEXIST', timeframe, candle_type=candle_type)
assert ohlcv.empty
def test_hdf5datahandler_ohlcv_purge(mocker, testdatadir):
mocker.patch.object(Path, "exists", MagicMock(return_value=False))
unlinkmock = mocker.patch.object(Path, "unlink", MagicMock())
@ -330,13 +402,24 @@ def test_gethandlerclass():
cl = get_datahandlerclass('json')
assert cl == JsonDataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('jsongz')
assert cl == JsonGzDataHandler
assert issubclass(cl, IDataHandler)
assert issubclass(cl, JsonDataHandler)
cl = get_datahandlerclass('hdf5')
assert cl == HDF5DataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('feather')
assert cl == FeatherDataHandler
assert issubclass(cl, IDataHandler)
cl = get_datahandlerclass('parquet')
assert cl == ParquetDataHandler
assert issubclass(cl, IDataHandler)
with pytest.raises(ValueError, match=r"No datahandler for .*"):
get_datahandlerclass('DeadBeef')