Add prepend option to download-data

This commit is contained in:
Matthias 2022-04-30 17:24:57 +02:00
parent 11d447cd5a
commit f6a7e6b785
7 changed files with 49 additions and 14 deletions

View File

@ -30,6 +30,7 @@ usage: freqtrade download-data [-h] [-v] [--logfile FILE] [-V] [-c PATH]
[--data-format-ohlcv {json,jsongz,hdf5}] [--data-format-ohlcv {json,jsongz,hdf5}]
[--data-format-trades {json,jsongz,hdf5}] [--data-format-trades {json,jsongz,hdf5}]
[--trading-mode {spot,margin,futures}] [--trading-mode {spot,margin,futures}]
[--prepend]
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
@ -62,6 +63,7 @@ optional arguments:
`jsongz`). `jsongz`).
--trading-mode {spot,margin,futures} --trading-mode {spot,margin,futures}
Select Trading mode Select Trading mode
--prepend Allow data prepending.
Common arguments: Common arguments:
-v, --verbose Verbose mode (-vv for more, -vvv to get all messages). -v, --verbose Verbose mode (-vv for more, -vvv to get all messages).
@ -157,10 +159,21 @@ freqtrade download-data --exchange binance --pairs .*/USDT
- To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.) - To change the exchange used to download the historical data from, please use a different configuration file (you'll probably need to adjust rate limits etc.)
- To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`. - To use `pairs.json` from some other directory, use `--pairs-file some_other_dir/pairs.json`.
- To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days). - To download historical candle (OHLCV) data for only 10 days, use `--days 10` (defaults to 30 days).
- To download historical candle (OHLCV) data from a fixed starting point, use `--timerange 20200101-` - which will download all data from January 1st, 2020. Eventually set end dates are ignored. - To download historical candle (OHLCV) data from a fixed starting point, use `--timerange 20200101-` - which will download all data from January 1st, 2020.
- Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data. - Use `--timeframes` to specify what timeframe download the historical candle (OHLCV) data for. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute data.
- To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with most other options. - To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with most other options.
#### Download additional data before the current timerange
Assuming you downloaded all data from 2022 (`--timerange 20220101-`) - but you'd now like to also backtest with earlier data.
You can do so by using the `--prepend` flag, combined with
``` bash
freqtrade download-data --exchange binance --pairs ETH/USDT XRP/USDT BTC/USDT --prepend --timerange 20210101-20220101
```
!!! Note
Freqtrade will ignore the end-date in this mode if data is available, updating the end-date to the existing data start point.
### Data format ### Data format

View File

@ -72,7 +72,8 @@ ARGS_LIST_DATA = ["exchange", "dataformat_ohlcv", "pairs", "trading_mode"]
ARGS_DOWNLOAD_DATA = ["pairs", "pairs_file", "days", "new_pairs_days", "include_inactive", ARGS_DOWNLOAD_DATA = ["pairs", "pairs_file", "days", "new_pairs_days", "include_inactive",
"timerange", "download_trades", "exchange", "timeframes", "timerange", "download_trades", "exchange", "timeframes",
"erase", "dataformat_ohlcv", "dataformat_trades", "trading_mode"] "erase", "dataformat_ohlcv", "dataformat_trades", "trading_mode",
"prepend_data"]
ARGS_PLOT_DATAFRAME = ["pairs", "indicators1", "indicators2", "plot_limit", ARGS_PLOT_DATAFRAME = ["pairs", "indicators1", "indicators2", "plot_limit",
"db_url", "trade_source", "export", "exportfilename", "db_url", "trade_source", "export", "exportfilename",

View File

@ -443,6 +443,11 @@ AVAILABLE_CLI_OPTIONS = {
default=['1m', '5m'], default=['1m', '5m'],
nargs='+', nargs='+',
), ),
"prepend_data": Arg(
'--prepend',
help='Allow data prepending.',
action='store_true',
),
"erase": Arg( "erase": Arg(
'--erase', '--erase',
help='Clean all existing data for the selected exchange/pairs/timeframes.', help='Clean all existing data for the selected exchange/pairs/timeframes.',

View File

@ -85,6 +85,7 @@ def start_download_data(args: Dict[str, Any]) -> None:
new_pairs_days=config['new_pairs_days'], new_pairs_days=config['new_pairs_days'],
erase=bool(config.get('erase')), data_format=config['dataformat_ohlcv'], erase=bool(config.get('erase')), data_format=config['dataformat_ohlcv'],
trading_mode=config.get('trading_mode', 'spot'), trading_mode=config.get('trading_mode', 'spot'),
prepend=config.get('prepend_data', False)
) )
except KeyboardInterrupt: except KeyboardInterrupt:

View File

@ -393,6 +393,8 @@ class Configuration:
self._args_to_config(config, argname='trade_source', self._args_to_config(config, argname='trade_source',
logstring='Using trades from: {}') logstring='Using trades from: {}')
self._args_to_config(config, argname='prepend_data',
logstring='Prepend detected. Allowing data prepending.')
self._args_to_config(config, argname='erase', self._args_to_config(config, argname='erase',
logstring='Erase detected. Deleting existing data.') logstring='Erase detected. Deleting existing data.')

View File

@ -172,7 +172,6 @@ def _load_cached_data_for_updating(
end = data.iloc[0]['date'] end = data.iloc[0]['date']
else: else:
start = data.iloc[-1]['date'] start = data.iloc[-1]['date']
start_ms = int(start.timestamp() * 1000) if start else None start_ms = int(start.timestamp() * 1000) if start else None
end_ms = int(end.timestamp() * 1000) if end else None end_ms = int(end.timestamp() * 1000) if end else None
return data, start_ms, end_ms return data, start_ms, end_ms
@ -188,6 +187,7 @@ def _download_pair_history(pair: str, *,
timerange: Optional[TimeRange] = None, timerange: Optional[TimeRange] = None,
candle_type: CandleType, candle_type: CandleType,
erase: bool = False, erase: bool = False,
prepend: bool = False,
) -> bool: ) -> bool:
""" """
Download latest candles from the exchange for the pair and timeframe passed in parameters Download latest candles from the exchange for the pair and timeframe passed in parameters
@ -195,8 +195,6 @@ def _download_pair_history(pair: str, *,
exists in a cache. If timerange starts earlier than the data in the cache, exists in a cache. If timerange starts earlier than the data in the cache,
the full data will be redownloaded the full data will be redownloaded
Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
:param pair: pair to download :param pair: pair to download
:param timeframe: Timeframe (e.g "5m") :param timeframe: Timeframe (e.g "5m")
:param timerange: range of time to download :param timerange: range of time to download
@ -211,17 +209,17 @@ def _download_pair_history(pair: str, *,
if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type): if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(f'Deleting existing data for pair {pair}, {timeframe}, {candle_type}.') logger.info(f'Deleting existing data for pair {pair}, {timeframe}, {candle_type}.')
logger.info(
f'Download history data for pair: "{pair}" ({process}), timeframe: {timeframe}, '
f'candle type: {candle_type} and store in {datadir}.'
)
data, since_ms, until_ms = _load_cached_data_for_updating( data, since_ms, until_ms = _load_cached_data_for_updating(
pair, timeframe, timerange, pair, timeframe, timerange,
data_handler=data_handler, data_handler=data_handler,
candle_type=candle_type, candle_type=candle_type,
prepend=False) prepend=prepend)
# TODO: Prepend should come from a param
logger.info(f'Download history data for "{pair}" ({process}), timeframe: {timeframe}, '
f'candle type: {candle_type} and store in {datadir}.'
f'From {format_ms_time(since_ms) if since_ms else "start"} to '
f'{format_ms_time(until_ms) if until_ms else "now"}'
)
logger.debug("Current Start: %s", logger.debug("Current Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None') f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
@ -269,6 +267,7 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
timerange: Optional[TimeRange] = None, timerange: Optional[TimeRange] = None,
new_pairs_days: int = 30, erase: bool = False, new_pairs_days: int = 30, erase: bool = False,
data_format: str = None, data_format: str = None,
prepend: bool = False,
) -> List[str]: ) -> List[str]:
""" """
Refresh stored ohlcv data for backtesting and hyperopt operations. Refresh stored ohlcv data for backtesting and hyperopt operations.
@ -292,7 +291,7 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
timerange=timerange, data_handler=data_handler, timerange=timerange, data_handler=data_handler,
timeframe=str(timeframe), new_pairs_days=new_pairs_days, timeframe=str(timeframe), new_pairs_days=new_pairs_days,
candle_type=candle_type, candle_type=candle_type,
erase=erase) erase=erase, prepend=prepend)
if trading_mode == 'futures': if trading_mode == 'futures':
# Predefined candletype (and timeframe) depending on exchange # Predefined candletype (and timeframe) depending on exchange
# Downloads what is necessary to backtest based on futures data. # Downloads what is necessary to backtest based on futures data.
@ -306,7 +305,7 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
timerange=timerange, data_handler=data_handler, timerange=timerange, data_handler=data_handler,
timeframe=str(tf_mark), new_pairs_days=new_pairs_days, timeframe=str(tf_mark), new_pairs_days=new_pairs_days,
candle_type=funding_candle_type, candle_type=funding_candle_type,
erase=erase) erase=erase, prepend=prepend)
return pairs_not_available return pairs_not_available

View File

@ -1983,6 +1983,20 @@ async def test__async_get_historic_ohlcv(default_conf, mocker, caplog, exchange_
assert exchange._api_async.fetch_ohlcv.call_count > 200 assert exchange._api_async.fetch_ohlcv.call_count > 200
assert res[0] == ohlcv[0] assert res[0] == ohlcv[0]
exchange._api_async.fetch_ohlcv.reset_mock()
end_ts = 1_500_500_000_000
start_ts = 1_500_000_000_000
respair, restf, _, res = await exchange._async_get_historic_ohlcv(
pair, "5m", since_ms=start_ts, candle_type=candle_type, is_new_pair=False,
until_ms=end_ts
)
# Required candles
candles = (end_ts - start_ts) / 300_000
exp = candles // exchange.ohlcv_candle_limit('5m') + 1
# Depending on the exchange, this should be called between 1 and 6 times.
assert exchange._api_async.fetch_ohlcv.call_count == exp
@pytest.mark.parametrize('candle_type', [CandleType.FUTURES, CandleType.MARK, CandleType.SPOT]) @pytest.mark.parametrize('candle_type', [CandleType.FUTURES, CandleType.MARK, CandleType.SPOT])
def test_refresh_latest_ohlcv(mocker, default_conf, caplog, candle_type) -> None: def test_refresh_latest_ohlcv(mocker, default_conf, caplog, candle_type) -> None: