Stream hyperopt-result in small batches

Avoiding memory-exhaustion on huge hyperopt results

closes #5305
closes #5149
This commit is contained in:
Matthias 2021-08-10 09:48:26 +02:00
parent cf27968b97
commit 039d6384ed
4 changed files with 78 additions and 36 deletions

View File

@ -7,7 +7,7 @@ from freqtrade.exceptions import OperationalException
logger = logging.getLogger(__name__)
def hyperopt_filter_epochs(epochs: List, filteroptions: dict) -> List:
def hyperopt_filter_epochs(epochs: List, filteroptions: dict, log: bool = True) -> List:
"""
Filter our items from the list of hyperopt results
"""
@ -24,11 +24,11 @@ def hyperopt_filter_epochs(epochs: List, filteroptions: dict) -> List:
epochs = _hyperopt_filter_epochs_profit(epochs, filteroptions)
epochs = _hyperopt_filter_epochs_objective(epochs, filteroptions)
logger.info(f"{len(epochs)} " +
("best " if filteroptions['only_best'] else "") +
("profitable " if filteroptions['only_profitable'] else "") +
"epochs found.")
if log:
logger.info(f"{len(epochs)} " +
("best " if filteroptions['only_best'] else "") +
("profitable " if filteroptions['only_profitable'] else "") +
"epochs found.")
return epochs

View File

@ -4,7 +4,7 @@ import logging
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Iterator, List, Optional, Tuple
import numpy as np
import rapidjson
@ -90,37 +90,33 @@ class HyperoptTools():
return any(s in config['spaces'] for s in [space, 'all', 'default'])
@staticmethod
def _read_results(results_file: Path) -> List:
def _read_results(results_file: Path, batch_size: int = 10) -> Iterator[List[Any]]:
"""
Read hyperopt results from file
Stream hyperopt results from file
"""
import rapidjson
logger.info(f"Reading epochs from '{results_file}'")
with results_file.open('r') as f:
data = [rapidjson.loads(line) for line in f]
return data
data = []
for line in f:
data += [rapidjson.loads(line)]
if len(data) >= batch_size:
yield data
data = []
yield data
@staticmethod
def load_previous_results(results_file: Path) -> List:
"""
Load data for epochs from the file if we have one
"""
epochs: List = []
def _test_hyperopt_results_exist(results_file) -> bool:
if results_file.is_file() and results_file.stat().st_size > 0:
if results_file.suffix == '.pickle':
raise OperationalException(
"Legacy hyperopt results are no longer supported."
"Please rerun hyperopt or use an older version to load this file."
)
else:
epochs = HyperoptTools._read_results(results_file)
# Detection of some old format, without 'is_best' field saved
if epochs[0].get('is_best') is None:
raise OperationalException(
"The file with HyperoptTools results is incompatible with this version "
"of Freqtrade and cannot be loaded.")
logger.info(f"Loaded {len(epochs)} previous evaluations from disk.")
return epochs
return True
else:
# No file found.
return False
@staticmethod
def load_filtered_results(results_file: Path, config: Dict[str, Any]) -> Tuple[List, int]:
@ -138,12 +134,24 @@ class HyperoptTools():
'filter_min_objective': config.get('hyperopt_list_min_objective', None),
'filter_max_objective': config.get('hyperopt_list_max_objective', None),
}
if not HyperoptTools._test_hyperopt_results_exist(results_file):
# No file found.
return [], 0
# Previous evaluations
epochs = HyperoptTools.load_previous_results(results_file)
total_epochs = len(epochs)
epochs = []
total_epochs = 0
for epochs_tmp in HyperoptTools._read_results(results_file):
if total_epochs == 0 and epochs_tmp[0].get('is_best') is None:
raise OperationalException(
"The file with HyperoptTools results is incompatible with this version "
"of Freqtrade and cannot be loaded.")
total_epochs += len(epochs_tmp)
epochs += hyperopt_filter_epochs(epochs_tmp, filteroptions, log=False)
epochs = hyperopt_filter_epochs(epochs, filteroptions)
logger.info(f"Loaded {total_epochs} previous evaluations from disk.")
# Final filter run ...
epochs = hyperopt_filter_epochs(epochs, filteroptions, log=True)
return epochs, total_epochs

View File

@ -941,8 +941,16 @@ def test_start_test_pairlist(mocker, caplog, tickers, default_conf, capsys):
def test_hyperopt_list(mocker, capsys, caplog, saved_hyperopt_results, tmpdir):
csv_file = Path(tmpdir) / "test.csv"
mocker.patch(
'freqtrade.optimize.hyperopt_tools.HyperoptTools.load_previous_results',
MagicMock(return_value=saved_hyperopt_results)
'freqtrade.optimize.hyperopt_tools.HyperoptTools._test_hyperopt_results_exist',
return_value=True
)
def fake_iterator(*args, **kwargs):
yield from [saved_hyperopt_results]
mocker.patch(
'freqtrade.optimize.hyperopt_tools.HyperoptTools._read_results',
side_effect=fake_iterator
)
args = [
@ -1175,8 +1183,16 @@ def test_hyperopt_list(mocker, capsys, caplog, saved_hyperopt_results, tmpdir):
def test_hyperopt_show(mocker, capsys, saved_hyperopt_results):
mocker.patch(
'freqtrade.optimize.hyperopt_tools.HyperoptTools.load_previous_results',
MagicMock(return_value=saved_hyperopt_results)
'freqtrade.optimize.hyperopt_tools.HyperoptTools._test_hyperopt_results_exist',
return_value=True
)
def fake_iterator(*args, **kwargs):
yield from [saved_hyperopt_results]
mocker.patch(
'freqtrade.optimize.hyperopt_tools.HyperoptTools._read_results',
side_effect=fake_iterator
)
mocker.patch('freqtrade.commands.hyperopt_commands.show_backtest_result')

View File

@ -20,9 +20,14 @@ def create_results() -> List[Dict]:
def test_save_results_saves_epochs(hyperopt, tmpdir, caplog) -> None:
hyperopt.results_file = Path(tmpdir / 'ut_results.fthypt')
hyperopt_epochs = HyperoptTools.load_filtered_results(hyperopt.results_file, {})
assert hyperopt_epochs == ([], 0)
# Test writing to temp dir and reading again
epochs = create_results()
hyperopt.results_file = Path(tmpdir / 'ut_results.fthypt')
caplog.set_level(logging.DEBUG)
@ -33,15 +38,28 @@ def test_save_results_saves_epochs(hyperopt, tmpdir, caplog) -> None:
hyperopt._save_result(epochs[0])
assert log_has(f"2 epochs saved to '{hyperopt.results_file}'.", caplog)
hyperopt_epochs = HyperoptTools.load_previous_results(hyperopt.results_file)
hyperopt_epochs = HyperoptTools.load_filtered_results(hyperopt.results_file, {})
assert len(hyperopt_epochs) == 2
assert hyperopt_epochs[1] == 2
assert len(hyperopt_epochs[0]) == 2
result_gen = HyperoptTools._read_results(hyperopt.results_file, 1)
epoch = next(result_gen)
assert len(epoch) == 1
assert epoch[0] == epochs[0]
epoch = next(result_gen)
assert len(epoch) == 1
epoch = next(result_gen)
assert len(epoch) == 0
with pytest.raises(StopIteration):
next(result_gen)
def test_load_previous_results2(mocker, testdatadir, caplog) -> None:
results_file = testdatadir / 'hyperopt_results_SampleStrategy.pickle'
with pytest.raises(OperationalException,
match=r"Legacy hyperopt results are no longer supported.*"):
HyperoptTools.load_previous_results(results_file)
HyperoptTools.load_filtered_results(results_file, {})
@pytest.mark.parametrize("spaces, expected_results", [