Merge pull request #2138 from freqtrade/history_docstrings

Refactorings to history
This commit is contained in:
Matthias 2019-08-20 06:35:54 +02:00 committed by GitHub
commit 9e8ca8d4bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 53 additions and 62 deletions

View File

@ -43,7 +43,7 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
start_index += 1 start_index += 1
if timerange.stoptype == 'line': if timerange.stoptype == 'line':
start_index = len(tickerlist) + timerange.stopts start_index = max(len(tickerlist) + timerange.stopts, 0)
if timerange.stoptype == 'index': if timerange.stoptype == 'index':
stop_index = timerange.stopts stop_index = timerange.stopts
elif timerange.stoptype == 'date': elif timerange.stoptype == 'date':
@ -57,9 +57,7 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
return tickerlist[start_index:stop_index] return tickerlist[start_index:stop_index]
def load_tickerdata_file( def load_tickerdata_file(datadir: Optional[Path], pair: str, ticker_interval: str,
datadir: Optional[Path], pair: str,
ticker_interval: str,
timerange: Optional[TimeRange] = None) -> Optional[list]: timerange: Optional[TimeRange] = None) -> Optional[list]:
""" """
Load a pair from file, either .json.gz or .json Load a pair from file, either .json.gz or .json
@ -68,13 +66,22 @@ def load_tickerdata_file(
filename = pair_data_filename(datadir, pair, ticker_interval) filename = pair_data_filename(datadir, pair, ticker_interval)
pairdata = misc.file_load_json(filename) pairdata = misc.file_load_json(filename)
if not pairdata: if not pairdata:
return None return []
if timerange: if timerange:
pairdata = trim_tickerlist(pairdata, timerange) pairdata = trim_tickerlist(pairdata, timerange)
return pairdata return pairdata
def store_tickerdata_file(datadir: Optional[Path], pair: str,
ticker_interval: str, data: list, is_zip: bool = False):
"""
Stores tickerdata to file
"""
filename = pair_data_filename(datadir, pair, ticker_interval)
misc.file_dump_json(filename, data, is_zip=is_zip)
def load_pair_history(pair: str, def load_pair_history(pair: str,
ticker_interval: str, ticker_interval: str,
datadir: Optional[Path], datadir: Optional[Path],
@ -177,11 +184,14 @@ def pair_data_filename(datadir: Optional[Path], pair: str, ticker_interval: str)
return filename return filename
def load_cached_data_for_updating(filename: Path, ticker_interval: str, def load_cached_data_for_updating(datadir: Optional[Path], pair: str, ticker_interval: str,
timerange: Optional[TimeRange]) -> Tuple[List[Any], timerange: Optional[TimeRange]) -> Tuple[List[Any],
Optional[int]]: Optional[int]]:
""" """
Load cached data and choose what part of the data should be updated Load cached data to download more data.
If timerange is passed in, checks wether data from an before the stored data will be downloaded.
If that's the case than what's available should be completely overwritten.
Only used by download_pair_history().
""" """
since_ms = None since_ms = None
@ -195,9 +205,8 @@ def load_cached_data_for_updating(filename: Path, ticker_interval: str,
since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000 since_ms = arrow.utcnow().shift(minutes=num_minutes).timestamp * 1000
# read the cached file # read the cached file
if filename.is_file(): # Intentionally don't pass timerange in - since we need to load the full dataset.
with open(filename, "rt") as file: data = load_tickerdata_file(datadir, pair, ticker_interval)
data = misc.json_load(file)
# remove the last item, could be incomplete candle # remove the last item, could be incomplete candle
if data: if data:
data.pop() data.pop()
@ -239,14 +248,12 @@ def download_pair_history(datadir: Optional[Path],
) )
try: try:
filename = pair_data_filename(datadir, pair, ticker_interval)
logger.info( logger.info(
f'Download history data for pair: "{pair}", interval: {ticker_interval} ' f'Download history data for pair: "{pair}", interval: {ticker_interval} '
f'and store in {datadir}.' f'and store in {datadir}.'
) )
data, since_ms = load_cached_data_for_updating(filename, ticker_interval, timerange) data, since_ms = load_cached_data_for_updating(datadir, pair, ticker_interval, timerange)
logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None') logger.debug("Current Start: %s", misc.format_ms_time(data[1][0]) if data else 'None')
logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None') logger.debug("Current End: %s", misc.format_ms_time(data[-1][0]) if data else 'None')
@ -262,7 +269,7 @@ def download_pair_history(datadir: Optional[Path],
logger.debug("New Start: %s", misc.format_ms_time(data[0][0])) logger.debug("New Start: %s", misc.format_ms_time(data[0][0]))
logger.debug("New End: %s", misc.format_ms_time(data[-1][0])) logger.debug("New End: %s", misc.format_ms_time(data[-1][0]))
misc.file_dump_json(filename, data) store_tickerdata_file(datadir, pair, ticker_interval, data=data)
return True return True
except Exception as e: except Exception as e:

View File

@ -5,11 +5,11 @@ import gzip
import logging import logging
import re import re
from datetime import datetime from datetime import datetime
from pathlib import Path
import numpy as np import numpy as np
import rapidjson import rapidjson
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -39,7 +39,7 @@ def datesarray_to_datetimearray(dates: np.ndarray) -> np.ndarray:
return dates.dt.to_pydatetime() return dates.dt.to_pydatetime()
def file_dump_json(filename, data, is_zip=False) -> None: def file_dump_json(filename: Path, data, is_zip=False) -> None:
""" """
Dump JSON data into a file Dump JSON data into a file
:param filename: file to create :param filename: file to create
@ -49,8 +49,8 @@ def file_dump_json(filename, data, is_zip=False) -> None:
logger.info(f'dumping json to "{filename}"') logger.info(f'dumping json to "{filename}"')
if is_zip: if is_zip:
if not filename.endswith('.gz'): if filename.suffix != '.gz':
filename = filename + '.gz' filename = filename.with_suffix('.gz')
with gzip.open(filename, 'w') as fp: with gzip.open(filename, 'w') as fp:
rapidjson.dump(data, fp, default=str, number_mode=rapidjson.NM_NATIVE) rapidjson.dump(data, fp, default=str, number_mode=rapidjson.NM_NATIVE)
else: else:

View File

@ -190,7 +190,7 @@ class Backtesting(object):
return tabulate(tabular_data, headers=headers, # type: ignore return tabulate(tabular_data, headers=headers, # type: ignore
floatfmt=floatfmt, tablefmt="pipe") floatfmt=floatfmt, tablefmt="pipe")
def _store_backtest_result(self, recordfilename: str, results: DataFrame, def _store_backtest_result(self, recordfilename: Path, results: DataFrame,
strategyname: Optional[str] = None) -> None: strategyname: Optional[str] = None) -> None:
records = [(t.pair, t.profit_percent, t.open_time.timestamp(), records = [(t.pair, t.profit_percent, t.open_time.timestamp(),
@ -201,10 +201,10 @@ class Backtesting(object):
if records: if records:
if strategyname: if strategyname:
# Inject strategyname to filename # Inject strategyname to filename
recname = Path(recordfilename) recordfilename = Path.joinpath(
recordfilename = str(Path.joinpath( recordfilename.parent,
recname.parent, f'{recname.stem}-{strategyname}').with_suffix(recname.suffix)) f'{recordfilename.stem}-{strategyname}').with_suffix(recordfilename.suffix)
logger.info('Dumping backtest results to %s', recordfilename) logger.info(f'Dumping backtest results to {recordfilename}')
file_dump_json(recordfilename, records) file_dump_json(recordfilename, records)
def _get_ticker_list(self, processed) -> Dict[str, DataFrame]: def _get_ticker_list(self, processed) -> Dict[str, DataFrame]:
@ -458,7 +458,7 @@ class Backtesting(object):
for strategy, results in all_results.items(): for strategy, results in all_results.items():
if self.config.get('export', False): if self.config.get('export', False):
self._store_backtest_result(self.config['exportfilename'], results, self._store_backtest_result(Path(self.config['exportfilename']), results,
strategy if len(self.strategylist) > 1 else None) strategy if len(self.strategylist) > 1 else None)
print(f"Result for strategy {strategy}") print(f"Result for strategy {strategy}")

View File

@ -178,16 +178,13 @@ def test_load_cached_data_for_updating(mocker) -> None:
# timeframe starts earlier than the cached data # timeframe starts earlier than the cached data
# should fully update data # should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == [] assert data == []
assert start_ts == test_data[0][0] - 1000 assert start_ts == test_data[0][0] - 1000
# same with 'line' timeframe # same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120 num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 120
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m',
'1m',
TimeRange(None, 'line', 0, -num_lines)) TimeRange(None, 'line', 0, -num_lines))
assert data == [] assert data == []
assert start_ts < test_data[0][0] - 1 assert start_ts < test_data[0][0] - 1
@ -195,36 +192,29 @@ def test_load_cached_data_for_updating(mocker) -> None:
# timeframe starts in the center of the cached data # timeframe starts in the center of the cached data
# should return the chached data w/o the last item # should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe # same with 'line' timeframe
num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30 num_lines = (test_data[-1][0] - test_data[1][0]) / 1000 / 60 + 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# timeframe starts after the chached data # timeframe starts after the chached data
# should return the chached data w/o the last item # should return the chached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0) timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 1, 0)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# same with 'line' timeframe # Try loading last 30 lines.
# Not supported by load_cached_data_for_updating, we always need to get the full data.
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
@ -232,35 +222,27 @@ def test_load_cached_data_for_updating(mocker) -> None:
# should return the chached data w/o the last item # should return the chached data w/o the last item
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename, data, start_ts = load_cached_data_for_updating(datadir, 'UNITTEST/BTC', '1m', timerange)
'1m',
timerange)
assert data == test_data[:-1] assert data == test_data[:-1]
assert test_data[-2][0] < start_ts < test_data[-1][0] assert test_data[-2][0] < start_ts < test_data[-1][0]
# no datafile exist # no datafile exist
# should return timestamp start time # should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0) timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'), data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
'1m',
timerange)
assert data == [] assert data == []
assert start_ts == (now_ts - 10000) * 1000 assert start_ts == (now_ts - 10000) * 1000
# same with 'line' timeframe # same with 'line' timeframe
num_lines = 30 num_lines = 30
timerange = TimeRange(None, 'line', 0, -num_lines) timerange = TimeRange(None, 'line', 0, -num_lines)
data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'), data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', timerange)
'1m',
timerange)
assert data == [] assert data == []
assert start_ts == (now_ts - num_lines * 60) * 1000 assert start_ts == (now_ts - num_lines * 60) * 1000
# no datafile exist, no timeframe is set # no datafile exist, no timeframe is set
# should return an empty array and None # should return an empty array and None
data, start_ts = load_cached_data_for_updating(test_filename.with_name('unexist'), data, start_ts = load_cached_data_for_updating(datadir, 'NONEXIST/BTC', '1m', None)
'1m',
None)
assert data == [] assert data == []
assert start_ts is None assert start_ts is None

View File

@ -2,6 +2,7 @@
import math import math
import random import random
from pathlib import Path
from unittest.mock import MagicMock from unittest.mock import MagicMock
import numpy as np import numpy as np
@ -785,10 +786,10 @@ def test_backtest_record(default_conf, fee, mocker):
# reset test to test with strategy name # reset test to test with strategy name
names = [] names = []
records = [] records = []
backtesting._store_backtest_result("backtest-result.json", results, "DefStrat") backtesting._store_backtest_result(Path("backtest-result.json"), results, "DefStrat")
assert len(results) == 4 assert len(results) == 4
# Assert file_dump_json was only called once # Assert file_dump_json was only called once
assert names == ['backtest-result-DefStrat.json'] assert names == [Path('backtest-result-DefStrat.json')]
records = records[0] records = records[0]
# Ensure records are of correct type # Ensure records are of correct type
assert len(records) == 4 assert len(records) == 4

View File

@ -1,6 +1,7 @@
# pragma pylint: disable=missing-docstring,C0103 # pragma pylint: disable=missing-docstring,C0103
import datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock from unittest.mock import MagicMock
from freqtrade.data.converter import parse_ticker_dataframe from freqtrade.data.converter import parse_ticker_dataframe
@ -34,12 +35,12 @@ def test_datesarray_to_datetimearray(ticker_history_list):
def test_file_dump_json(mocker) -> None: def test_file_dump_json(mocker) -> None:
file_open = mocker.patch('freqtrade.misc.open', MagicMock()) file_open = mocker.patch('freqtrade.misc.open', MagicMock())
json_dump = mocker.patch('rapidjson.dump', MagicMock()) json_dump = mocker.patch('rapidjson.dump', MagicMock())
file_dump_json('somefile', [1, 2, 3]) file_dump_json(Path('somefile'), [1, 2, 3])
assert file_open.call_count == 1 assert file_open.call_count == 1
assert json_dump.call_count == 1 assert json_dump.call_count == 1
file_open = mocker.patch('freqtrade.misc.gzip.open', MagicMock()) file_open = mocker.patch('freqtrade.misc.gzip.open', MagicMock())
json_dump = mocker.patch('rapidjson.dump', MagicMock()) json_dump = mocker.patch('rapidjson.dump', MagicMock())
file_dump_json('somefile', [1, 2, 3], True) file_dump_json(Path('somefile'), [1, 2, 3], True)
assert file_open.call_count == 1 assert file_open.call_count == 1
assert json_dump.call_count == 1 assert json_dump.call_count == 1