Merge pull request #1893 from hroff-1902/refactor-download-script

refactoring download_backtest_data.py
This commit is contained in:
Matthias 2019-06-14 20:12:07 +02:00 committed by GitHub
commit 2965931a78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 134 additions and 91 deletions

View File

@ -123,11 +123,12 @@ python scripts/download_backtest_data.py --exchange binance
This will download ticker data for all the currency pairs you defined in `pairs.json`.
- To use a different folder than the exchange specific default, use `--export user_data/data/some_directory`.
- To use a different folder than the exchange specific default, use `--datadir user_data/data/some_directory`.
- To change the exchange used to download the tickers, use `--exchange`. Default is `bittrex`.
- To use `pairs.json` from some other folder, use `--pairs-file some_other_dir/pairs.json`.
- To download ticker data for only 10 days, use `--days 10`.
- Use `--timeframes` to specify which tickers to download. Default is `--timeframes 1m 5m` which will download 1-minute and 5-minute tickers.
- To use exchange, timeframe and list of pairs as defined in your configuration file, use the `-c/--config` option. With this, the script uses the whitelist defined in the config as the list of currency pairs to download data for and does not require the pairs.json file. You can combine `-c/--config` with other options.
For help about backtesting usage, please refer to [Backtesting commands](#backtesting-commands).

View File

@ -47,7 +47,7 @@ class Arguments(object):
return self.parsed_arg
def parse_args(self) -> argparse.Namespace:
def parse_args(self, no_default_config: bool = False) -> argparse.Namespace:
"""
Parses given arguments and returns an argparse Namespace instance.
"""
@ -55,7 +55,7 @@ class Arguments(object):
# Workaround issue in argparse with action='append' and default value
# (see https://bugs.python.org/issue16399)
if parsed_arg.config is None:
if parsed_arg.config is None and not no_default_config:
parsed_arg.config = [constants.DEFAULT_CONFIG]
return parsed_arg
@ -427,26 +427,24 @@ class Arguments(object):
default=None
)
def testdata_dl_options(self) -> None:
def download_data_options(self) -> None:
"""
Parses given arguments for testdata download
"""
self.parser.add_argument(
'--pairs-file',
help='File containing a list of pairs to download.',
dest='pairs_file',
default=None,
metavar='PATH',
'-v', '--verbose',
help='Verbose mode (-vv for more, -vvv to get all messages).',
action='count',
dest='loglevel',
default=0,
)
self.parser.add_argument(
'--export',
help='Export files to given dir.',
dest='export',
default=None,
metavar='PATH',
'--logfile',
help='Log to the file specified',
dest='logfile',
type=str,
metavar='FILE',
)
self.parser.add_argument(
'-c', '--config',
help='Specify configuration file (default: %(default)s). '
@ -456,35 +454,39 @@ class Arguments(object):
type=str,
metavar='PATH',
)
self.parser.add_argument(
'-d', '--datadir',
help='Path to backtest data.',
dest='datadir',
metavar='PATH',
)
self.parser.add_argument(
'--pairs-file',
help='File containing a list of pairs to download.',
dest='pairs_file',
metavar='FILE',
)
self.parser.add_argument(
'--days',
help='Download data for given number of days.',
dest='days',
type=int,
type=Arguments.check_int_positive,
metavar='INT',
default=None
)
self.parser.add_argument(
'--exchange',
help='Exchange name (default: %(default)s). Only valid if no config is provided.',
dest='exchange',
type=str,
default='bittrex'
)
self.parser.add_argument(
'-t', '--timeframes',
help='Specify which tickers to download. Space separated list. \
Default: %(default)s.',
choices=['1m', '3m', '5m', '15m', '30m', '1h', '2h', '4h',
'6h', '8h', '12h', '1d', '3d', '1w'],
default=['1m', '5m'],
nargs='+',
dest='timeframes',
)
self.parser.add_argument(
'--erase',
help='Clean all existing data for the selected exchange/pairs/timeframes.',

View File

@ -122,12 +122,11 @@ class Configuration(object):
return conf
def _load_common_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
def _load_logging_config(self, config: Dict[str, Any]) -> None:
"""
Extract information for sys.argv and load common configuration
:return: configuration as dictionary
Extract information for sys.argv and load logging configuration:
the --loglevel, --logfile options
"""
# Log level
if 'loglevel' in self.args and self.args.loglevel:
config.update({'verbosity': self.args.loglevel})
@ -153,6 +152,13 @@ class Configuration(object):
set_loggers(config['verbosity'])
logger.info('Verbosity set to %s', config['verbosity'])
def _load_common_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract information for sys.argv and load common configuration
:return: configuration as dictionary
"""
self._load_logging_config(config)
# Support for sd_notify
if self.args.sd_notify:
config['internals'].update({'sd_notify': True})
@ -228,6 +234,17 @@ class Configuration(object):
else:
logger.info(logstring.format(config[argname]))
def _load_datadir_config(self, config: Dict[str, Any]) -> None:
"""
Extract information for sys.argv and load datadir configuration:
the --datadir option
"""
if 'datadir' in self.args and self.args.datadir:
config.update({'datadir': self._create_datadir(config, self.args.datadir)})
else:
config.update({'datadir': self._create_datadir(config, None)})
logger.info('Using data folder: %s ...', config.get('datadir'))
def _load_optimize_config(self, config: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract information for sys.argv and load Optimize configuration
@ -263,11 +280,7 @@ class Configuration(object):
self._args_to_config(config, argname='timerange',
logstring='Parameter --timerange detected: {} ...')
if 'datadir' in self.args and self.args.datadir:
config.update({'datadir': self._create_datadir(config, self.args.datadir)})
else:
config.update({'datadir': self._create_datadir(config, None)})
logger.info('Using data folder: %s ...', config.get('datadir'))
self._load_datadir_config(config)
self._args_to_config(config, argname='refresh_pairs',
logstring='Parameter -r/--refresh-pairs-cached detected ...')

View File

@ -170,18 +170,18 @@ def test_parse_args_hyperopt_custom() -> None:
assert call_args.func is not None
def test_testdata_dl_options() -> None:
def test_download_data_options() -> None:
args = [
'--pairs-file', 'file_with_pairs',
'--export', 'export/folder',
'--datadir', 'datadir/folder',
'--days', '30',
'--exchange', 'binance'
]
arguments = Arguments(args, '')
arguments.testdata_dl_options()
arguments.download_data_options()
args = arguments.parse_args()
assert args.pairs_file == 'file_with_pairs'
assert args.export == 'export/folder'
assert args.datadir == 'datadir/folder'
assert args.days == 30
assert args.exchange == 'binance'

View File

@ -1,55 +1,66 @@
#!/usr/bin/env python3
"""
This script generates json data
This script generates json files with pairs history data
"""
import arrow
import json
import sys
from pathlib import Path
import arrow
from typing import Any, Dict
from typing import Any, Dict, List
from freqtrade.arguments import Arguments
from freqtrade.arguments import TimeRange
from freqtrade.exchange import Exchange
from freqtrade.arguments import Arguments, TimeRange
from freqtrade.configuration import Configuration
from freqtrade.data.history import download_pair_history
from freqtrade.configuration import Configuration, set_loggers
from freqtrade.exchange import Exchange
from freqtrade.misc import deep_merge_dicts
import logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
set_loggers(0)
logger = logging.getLogger('download_backtest_data')
DEFAULT_DL_PATH = 'user_data/data'
arguments = Arguments(sys.argv[1:], 'download utility')
arguments.testdata_dl_options()
args = arguments.parse_args()
arguments.download_data_options()
timeframes = args.timeframes
# Do not read the default config if config is not specified
# in the command line options explicitely
args = arguments.parse_args(no_default_config=True)
# Use bittrex as default exchange
exchange_name = args.exchange or 'bittrex'
pairs: List = []
configuration = Configuration(args)
config: Dict[str, Any] = {}
if args.config:
configuration = Configuration(args)
config: Dict[str, Any] = {}
# Now expecting a list of config filenames here, not a string
for path in args.config:
print(f"Using config: {path}...")
logger.info(f"Using config: {path}...")
# Merge config options, overwriting old values
config = deep_merge_dicts(configuration._load_config_file(path), config)
config['stake_currency'] = ''
# Ensure we do not use Exchange credentials
config['exchange']['dry_run'] = True
config['exchange']['key'] = ''
config['exchange']['secret'] = ''
pairs = config['exchange']['pair_whitelist']
if config.get('ticker_interval'):
timeframes = args.timeframes or [config.get('ticker_interval')]
else:
timeframes = args.timeframes or ['1m', '5m']
else:
config = {
'stake_currency': '',
'dry_run': True,
'exchange': {
'name': args.exchange,
'name': exchange_name,
'key': '',
'secret': '',
'pair_whitelist': [],
@ -59,56 +70,72 @@ else:
}
}
}
timeframes = args.timeframes or ['1m', '5m']
configuration._load_logging_config(config)
dl_path = Path(DEFAULT_DL_PATH).joinpath(config['exchange']['name'])
if args.export:
dl_path = Path(args.export)
if args.config and args.exchange:
logger.warning("The --exchange option is ignored, "
"using exchange settings from the configuration file.")
if not dl_path.is_dir():
sys.exit(f'Directory {dl_path} does not exist.')
# Check if the exchange set by the user is supported
configuration.check_exchange(config)
configuration._load_datadir_config(config)
dl_path = Path(config['datadir'])
pairs_file = Path(args.pairs_file) if args.pairs_file else dl_path.joinpath('pairs.json')
if not pairs or args.pairs_file:
logger.info(f'Reading pairs file "{pairs_file}".')
# Download pairs from the pairs file if no config is specified
# or if pairs file is specified explicitely
if not pairs_file.exists():
sys.exit(f'No pairs file found with path {pairs_file}.')
sys.exit(f'No pairs file found with path "{pairs_file}".')
with pairs_file.open() as file:
PAIRS = list(set(json.load(file)))
PAIRS.sort()
pairs = list(set(json.load(file)))
pairs.sort()
timerange = TimeRange()
if args.days:
time_since = arrow.utcnow().shift(days=-args.days).strftime("%Y%m%d")
timerange = arguments.parse_timerange(f'{time_since}-')
logger.info(f'About to download pairs: {pairs}, intervals: {timeframes} to {dl_path}')
print(f'About to download pairs: {PAIRS} to {dl_path}')
# Init exchange
exchange = Exchange(config)
pairs_not_available = []
for pair in PAIRS:
try:
# Init exchange
exchange = Exchange(config)
for pair in pairs:
if pair not in exchange._api.markets:
pairs_not_available.append(pair)
print(f"skipping pair {pair}")
logger.info(f"Skipping pair {pair}...")
continue
for ticker_interval in timeframes:
pair_print = pair.replace('/', '_')
filename = f'{pair_print}-{ticker_interval}.json'
dl_file = dl_path.joinpath(filename)
if args.erase and dl_file.exists():
print(f'Deleting existing data for pair {pair}, interval {ticker_interval}')
logger.info(
f'Deleting existing data for pair {pair}, interval {ticker_interval}.')
dl_file.unlink()
print(f'downloading pair {pair}, interval {ticker_interval}')
logger.info(f'Downloading pair {pair}, interval {ticker_interval}.')
download_pair_history(datadir=dl_path, exchange=exchange,
pair=pair,
ticker_interval=ticker_interval,
pair=pair, ticker_interval=str(ticker_interval),
timerange=timerange)
except KeyboardInterrupt:
sys.exit("SIGINT received, aborting ...")
finally:
if pairs_not_available:
print(f"Pairs [{','.join(pairs_not_available)}] not availble.")
logger.info(
f"Pairs [{','.join(pairs_not_available)}] not available "
f"on exchange {config['exchange']['name']}.")