stable/freqtrade/optimize/__init__.py

# pragma pylint: disable=missing-docstring

import gzip
import json
import logging
import os
import arrow
from typing import Optional, List, Dict, Tuple

from freqtrade import misc, constants
from freqtrade.exchange import get_ticker_history

from user_data.hyperopt_conf import hyperopt_optimize_conf

logger = logging.getLogger(__name__)


def trim_tickerlist(tickerlist: List[Dict], timerange: Tuple[Tuple, int, int]) -> List[Dict]:
    if not tickerlist:
        return tickerlist

    stype, start, stop = timerange

    start_index = 0
    stop_index = len(tickerlist)

    if stype[0] == 'line':
        stop_index = start
    if stype[0] == 'index':
        start_index = start
    elif stype[0] == 'date':
        while tickerlist[start_index][0] < start * 1000:
            start_index += 1

    if stype[1] == 'line':
        start_index = len(tickerlist) + stop
    if stype[1] == 'index':
        stop_index = stop
    elif stype[1] == 'date':
        while tickerlist[stop_index-1][0] > stop * 1000:
            stop_index -= 1

    if start_index > stop_index:
        raise ValueError(f'The timerange [{start},{stop}] is incorrect')

    return tickerlist[start_index:stop_index]


def load_tickerdata_file(
        datadir: str, pair: str,
        ticker_interval: str,
        timerange: Optional[Tuple[Tuple, int, int]] = None) -> Optional[List[Dict]]:
    """
    Load a pair from file,
    :return dict OR empty if unsuccesful
    """
    path = make_testdata_path(datadir)
    pair_file_string = pair.replace('/', '_')
    file = os.path.join(path, '{pair}-{ticker_interval}.json'.format(
        pair=pair_file_string,
        ticker_interval=ticker_interval,
    ))
    gzipfile = file + '.gz'

    # If the file does not exist we download it when None is returned.
    # If file exists, read the file, load the json
    if os.path.isfile(gzipfile):
        logger.debug('Loading ticker data from file %s', gzipfile)
        with gzip.open(gzipfile) as tickerdata:
            pairdata = json.load(tickerdata)
    elif os.path.isfile(file):
        logger.debug('Loading ticker data from file %s', file)
        with open(file) as tickerdata:
            pairdata = json.load(tickerdata)
    else:
        return None

    if timerange:
        pairdata = trim_tickerlist(pairdata, timerange)
    return pairdata


def load_data(datadir: str,
              ticker_interval: str,
              pairs: Optional[List[str]] = None,
              refresh_pairs: Optional[bool] = False,
              timerange: Optional[Tuple[Tuple, int, int]] = None) -> Dict[str, List]:
    """
    Loads ticker history data for the given parameters
    :return: dict
    """
    result = {}

    _pairs = pairs or hyperopt_optimize_conf()['exchange']['pair_whitelist']

    # If the user force the refresh of pairs
    if refresh_pairs:
        logger.info('Download data for all pairs and store them in %s', datadir)
        download_pairs(datadir, _pairs, ticker_interval, timerange=timerange)

    for pair in _pairs:
        pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
        if not pairdata:
            # download the tickerdata from exchange
            download_backtesting_testdata(datadir,
                                          pair=pair,
                                          tick_interval=ticker_interval,
                                          timerange=timerange)
            # and retry reading the pair
            pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)
        result[pair] = pairdata
    return result


def make_testdata_path(datadir: str) -> str:
    """Return the path where testdata files are stored"""
    return datadir or os.path.abspath(
        os.path.join(
            os.path.dirname(__file__), '..', 'tests', 'testdata'
        )
    )


def download_pairs(datadir, pairs: List[str],
                   ticker_interval: str,
                   timerange: Optional[Tuple[Tuple, int, int]] = None) -> bool:
    """For each pairs passed in parameters, download the ticker intervals"""
    for pair in pairs:
        try:
            download_backtesting_testdata(datadir,
                                          pair=pair,
                                          tick_interval=ticker_interval,
                                          timerange=timerange)
        except BaseException:
            logger.info(
                'Failed to download the pair: "%s", Interval: %s',
                pair,
                ticker_interval
            )
            return False
    return True


def load_cached_data_for_updating(filename: str,
                                  tick_interval: str,
                                  timerange: Optional[Tuple[Tuple, int, int]]) -> Tuple[list, int]:
    """
    Load cached data and choose what part of the data should be updated
    """

    since_ms = None

    # user sets timerange, so find the start time
    if timerange:
        if timerange[0][0] == 'date':
            since_ms = timerange[1] * 1000
        elif timerange[0][1] == 'line':
            num_minutes = timerange[2] * constants.TICKER_INTERVAL_MINUTES[tick_interval]
            since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000

    # read the cached file
    if os.path.isfile(filename):
        with open(filename, "rt") as file:
            data = json.load(file)
            # remove the last item, because we are not sure if it is correct
            # it could be fetched when the candle was incompleted
            if data:
                data.pop()
    else:
        data = []

    if data:
        if since_ms and since_ms < data[0][0]:
            # the data is requested for earlier period than the cache has
            # so fully redownload all the data
            data = []
        else:
            # a part of the data was already downloaded, so
            # download unexist data only
            since_ms = data[-1][0] + 1

    return (data, since_ms)


def download_backtesting_testdata(datadir: str,
                                  pair: str,
                                  tick_interval: str = '5m',
                                  timerange: Optional[Tuple[Tuple, int, int]] = None) -> None:

    """
    Download the latest ticker intervals from the exchange for the pairs passed in parameters
    The data is downloaded starting from the last correct ticker interval data that
    esists in a cache. If timerange starts earlier than the data in the cache,
    the full data will be redownloaded

    Based on @Rybolov work: https://github.com/rybolov/freqtrade-data
    :param pairs: list of pairs to download
    :param tick_interval: ticker interval
    :param timerange: range of time to download
    :return: None

    """

    path = make_testdata_path(datadir)
    filepair = pair.replace("/", "_")
    filename = os.path.join(path, f'{filepair}-{tick_interval}.json')

    logger.info(
        'Download the pair: "%s", Interval: %s',
        pair,
        tick_interval
    )

    data, since_ms = load_cached_data_for_updating(filename, tick_interval, timerange)

    logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
    logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')

    new_data = get_ticker_history(pair=pair, tick_interval=tick_interval, since_ms=since_ms)
    data.extend(new_data)

    logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))
    logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))

    misc.file_dump_json(filename, data)
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00			`# pragma pylint: disable=missing-docstring`

optimize imports 2018-03-17 21:44:47 +00:00			`import gzip`
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00			`import json`
use native python logger 2018-03-25 19:37:14 +00:00			`import logging`
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00			`import os`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`import arrow`
optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`from typing import Optional, List, Dict, Tuple`
Move hyperopt_conf.py into user_data/ 2018-01-18 07:10:48 +00:00
Fix conflicts 2018-05-04 10:38:51 +00:00			`from freqtrade import misc, constants`
optimize imports 2018-03-17 21:44:47 +00:00			`from freqtrade.exchange import get_ticker_history`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00
Move hyperopt_conf.py into user_data/ 2018-01-18 07:10:48 +00:00			`from user_data.hyperopt_conf import hyperopt_optimize_conf`
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00
use native python logger 2018-03-25 19:37:14 +00:00			`logger = logging.getLogger(__name__)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00
optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`def trim_tickerlist(tickerlist: List[Dict], timerange: Tuple[Tuple, int, int]) -> List[Dict]:`
Fix case with empty dict 2018-04-27 22:40:48 +00:00			`if not tickerlist:`
			`return tickerlist`

optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`stype, start, stop = timerange`
remove unnecessary else 2018-01-26 16:41:41 +00:00
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`start_index = 0`
			`stop_index = len(tickerlist)`

			`if stype[0] == 'line':`
			`stop_index = start`
			`if stype[0] == 'index':`
			`start_index = start`
			`elif stype[0] == 'date':`
			`while tickerlist[start_index][0] < start * 1000:`
			`start_index += 1`

			`if stype[1] == 'line':`
			`start_index = len(tickerlist) + stop`
			`if stype[1] == 'index':`
			`stop_index = stop`
			`elif stype[1] == 'date':`
			`while tickerlist[stop_index-1][0] > stop * 1000:`
			`stop_index -= 1`

			`if start_index > stop_index:`
			`raise ValueError(f'The timerange [{start},{stop}] is incorrect')`

			`return tickerlist[start_index:stop_index]`
more advanced use of --timerange 2018-01-15 21:25:02 +00:00

optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`def load_tickerdata_file(`
			`datadir: str, pair: str,`
Change ticker interval from minutes as integer to string (1m, 5m, 1h,...) 2018-03-24 09:21:59 +00:00			`ticker_interval: str,`
optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`timerange: Optional[Tuple[Tuple, int, int]] = None) -> Optional[List[Dict]]:`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`"""`
			`Load a pair from file,`
			`:return dict OR empty if unsuccesful`
			`"""`
--datadir <path> argument This argument enables usage of different backtesting directories. Useful if one wants compare backtesting performance over time. 2018-01-06 22:24:35 +00:00			`path = make_testdata_path(datadir)`
Replace 'BTC_XXX' with 'XXX/BTC' for pairs and 'XXX_BTC' for files 2018-02-03 16:15:40 +00:00			`pair_file_string = pair.replace('/', '_')`
correctly join paths and debug log the found results 2018-02-15 06:56:13 +00:00			`file = os.path.join(path, '{pair}-{ticker_interval}.json'.format(`
Replace 'BTC_XXX' with 'XXX/BTC' for pairs and 'XXX_BTC' for files 2018-02-03 16:15:40 +00:00			`pair=pair_file_string,`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`ticker_interval=ticker_interval,`
correctly join paths and debug log the found results 2018-02-15 06:56:13 +00:00			`))`
Added support in /optimize for gzip ticker data files if they exist. 2018-01-28 13:52:27 +00:00			`gzipfile = file + '.gz'`

optimize/__init__.py: Added support for gzip ticker data files if they exist. 2018-01-28 13:57:25 +00:00			`# If the file does not exist we download it when None is returned.`
Added support in /optimize for gzip ticker data files if they exist. 2018-01-28 13:52:27 +00:00			`# If file exists, read the file, load the json`
			`if os.path.isfile(gzipfile):`
correctly join paths and debug log the found results 2018-02-15 06:56:13 +00:00			`logger.debug('Loading ticker data from file %s', gzipfile)`
Added support in /optimize for gzip ticker data files if they exist. 2018-01-28 13:52:27 +00:00			`with gzip.open(gzipfile) as tickerdata:`
			`pairdata = json.load(tickerdata)`
			`elif os.path.isfile(file):`
correctly join paths and debug log the found results 2018-02-15 06:56:13 +00:00			`logger.debug('Loading ticker data from file %s', file)`
Added support in /optimize for gzip ticker data files if they exist. 2018-01-28 13:52:27 +00:00			`with open(file) as tickerdata:`
			`pairdata = json.load(tickerdata)`
			`else:`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`return None`

more advanced use of --timerange 2018-01-15 21:25:02 +00:00			`if timerange:`
			`pairdata = trim_tickerlist(pairdata, timerange)`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`return pairdata`


Change ticker interval from minutes as integer to string (1m, 5m, 1h,...) 2018-03-24 09:21:59 +00:00			`def load_data(datadir: str,`
			`ticker_interval: str,`
optimize: set correct typehints 2018-03-17 21:43:36 +00:00			`pairs: Optional[List[str]] = None,`
			`refresh_pairs: Optional[bool] = False,`
			`timerange: Optional[Tuple[Tuple, int, int]] = None) -> Dict[str, List]:`
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00			`"""`
			`Loads ticker history data for the given parameters`
			`:return: dict`
			`"""`
			`result = {}`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
Fix hyperopt when using MongoDB 2017-12-21 07:31:26 +00:00			`_pairs = pairs or hyperopt_optimize_conf()['exchange']['pair_whitelist']`

Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`# If the user force the refresh of pairs`
			`if refresh_pairs:`
--datadir <path> argument This argument enables usage of different backtesting directories. Useful if one wants compare backtesting performance over time. 2018-01-06 22:24:35 +00:00			`logger.info('Download data for all pairs and store them in %s', datadir)`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`download_pairs(datadir, _pairs, ticker_interval, timerange=timerange)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
Fix hyperopt when using MongoDB 2017-12-21 07:31:26 +00:00			`for pair in _pairs:`
more advanced use of --timerange 2018-01-15 21:25:02 +00:00			`pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`if not pairdata:`
			`# download the tickerdata from exchange`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`download_backtesting_testdata(datadir,`
			`pair=pair,`
			`tick_interval=ticker_interval,`
			`timerange=timerange)`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`# and retry reading the pair`
more advanced use of --timerange 2018-01-15 21:25:02 +00:00			`pairdata = load_tickerdata_file(datadir, pair, ticker_interval, timerange=timerange)`
split load tickerdata function 2018-01-05 09:20:48 +00:00			`result[pair] = pairdata`
integrate hyperopt and implement subcommand 2017-11-25 00:04:11 +00:00			`return result`


--datadir <path> argument This argument enables usage of different backtesting directories. Useful if one wants compare backtesting performance over time. 2018-01-06 22:24:35 +00:00			`def make_testdata_path(datadir: str) -> str:`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`"""Return the path where testdata files are stored"""`
Move Backtesting to a class and add unit tests 2018-02-09 07:35:38 +00:00			`return datadir or os.path.abspath(`
			`os.path.join(`
			`os.path.dirname(__file__), '..', 'tests', 'testdata'`
			`)`
			`)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00

Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`def download_pairs(datadir, pairs: List[str],`
			`ticker_interval: str,`
			`timerange: Optional[Tuple[Tuple, int, int]] = None) -> bool:`
Refreshing pair of only selected ticker_interval 2018-01-13 07:32:44 +00:00			`"""For each pairs passed in parameters, download the ticker intervals"""`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`for pair in pairs:`
			`try:`
Fix codestyle 2018-04-27 21:30:42 +00:00			`download_backtesting_testdata(datadir,`
			`pair=pair,`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`tick_interval=ticker_interval,`
Fix codestyle 2018-04-27 21:30:42 +00:00			`timerange=timerange)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`except BaseException:`
Make Pylint Happy chapter 1 2018-03-02 15:22:00 +00:00			`logger.info(`
Change ticker interval from minutes as integer to string (1m, 5m, 1h,...) 2018-03-24 09:21:59 +00:00			`'Failed to download the pair: "%s", Interval: %s',`
Make Pylint Happy chapter 1 2018-03-02 15:22:00 +00:00			`pair,`
			`ticker_interval`
			`)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`return False`
			`return True`


Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`def load_cached_data_for_updating(filename: str,`
			`tick_interval: str,`
			`timerange: Optional[Tuple[Tuple, int, int]]) -> Tuple[list, int]:`
			`"""`
			`Load cached data and choose what part of the data should be updated`
			`"""`

			`since_ms = None`

			`# user sets timerange, so find the start time`
			`if timerange:`
			`if timerange[0][0] == 'date':`
			`since_ms = timerange[1] * 1000`
			`elif timerange[0][1] == 'line':`
Fix conflicts 2018-05-04 10:38:51 +00:00			`num_minutes = timerange[2] * constants.TICKER_INTERVAL_MINUTES[tick_interval]`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`# read the cached file`
			`if os.path.isfile(filename):`
			`with open(filename, "rt") as file:`
			`data = json.load(file)`
			`# remove the last item, because we are not sure if it is correct`
			`# it could be fetched when the candle was incompleted`
			`if data:`
			`data.pop()`
			`else:`
			`data = []`
Fix codestyle 2018-04-27 21:30:42 +00:00
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`if data:`
			`if since_ms and since_ms < data[0][0]:`
			`# the data is requested for earlier period than the cache has`
			`# so fully redownload all the data`
			`data = []`
			`else:`
			`# a part of the data was already downloaded, so`
			`# download unexist data only`
			`since_ms = data[-1][0] + 1`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`return (data, since_ms)`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00

			`def download_backtesting_testdata(datadir: str,`
			`pair: str,`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`tick_interval: str = '5m',`
Fix conflicts 2018-05-03 08:16:29 +00:00			`timerange: Optional[Tuple[Tuple, int, int]] = None) -> None:`

Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`"""`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`Download the latest ticker intervals from the exchange for the pairs passed in parameters`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`The data is downloaded starting from the last correct ticker interval data that`
			`esists in a cache. If timerange starts earlier than the data in the cache,`
			`the full data will be redownloaded`

Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`Based on @Rybolov work: https://github.com/rybolov/freqtrade-data`
			`:param pairs: list of pairs to download`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`:param tick_interval: ticker interval`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`:param timerange: range of time to download`
Fix conflicts 2018-05-03 08:16:29 +00:00			`:return: None`

Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00			`"""`

--datadir <path> argument This argument enables usage of different backtesting directories. Useful if one wants compare backtesting performance over time. 2018-01-06 22:24:35 +00:00			`path = make_testdata_path(datadir)`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`filepair = pair.replace("/", "_")`
			`filename = os.path.join(path, f'{filepair}-{tick_interval}.json')`

Make Pylint Happy chapter 1 2018-03-02 15:22:00 +00:00			`logger.info(`
Change ticker interval from minutes as integer to string (1m, 5m, 1h,...) 2018-03-24 09:21:59 +00:00			`'Download the pair: "%s", Interval: %s',`
Make Pylint Happy chapter 1 2018-03-02 15:22:00 +00:00			`pair,`
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`tick_interval`
Make Pylint Happy chapter 1 2018-03-02 15:22:00 +00:00			`)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`data, since_ms = load_cached_data_for_updating(filename, tick_interval, timerange)`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')`
			`logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')`

Fix review comments, documenation update 2018-04-30 21:27:05 +00:00			`new_data = get_ticker_history(pair=pair, tick_interval=tick_interval, since_ms=since_ms)`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`data.extend(new_data)`

refactor format_ms_time to misc.py 2018-03-25 11:38:17 +00:00			`logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))`
Fix review comments. Add support of datetime timeganges 2018-04-27 21:16:34 +00:00			`logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))`
Allow user to update testdata files with parameter --refresh-pairs-cached (#174) 2017-12-16 14:42:28 +00:00
file_dump_json 2018-01-11 14:49:04 +00:00			`misc.file_dump_json(filename, data)`