From 039d6384edf79d87556f5fa66b5917d1a77f9a6b Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 10 Aug 2021 09:48:26 +0200 Subject: [PATCH] Stream hyperopt-result in small batches Avoiding memory-exhaustion on huge hyperopt results closes #5305 closes #5149 --- freqtrade/optimize/hyperopt_epoch_filters.py | 12 ++--- freqtrade/optimize/hyperopt_tools.py | 54 +++++++++++--------- tests/commands/test_commands.py | 24 +++++++-- tests/optimize/test_hyperopt_tools.py | 24 +++++++-- 4 files changed, 78 insertions(+), 36 deletions(-) diff --git a/freqtrade/optimize/hyperopt_epoch_filters.py b/freqtrade/optimize/hyperopt_epoch_filters.py index b70db94af..80cc89d4b 100644 --- a/freqtrade/optimize/hyperopt_epoch_filters.py +++ b/freqtrade/optimize/hyperopt_epoch_filters.py @@ -7,7 +7,7 @@ from freqtrade.exceptions import OperationalException logger = logging.getLogger(__name__) -def hyperopt_filter_epochs(epochs: List, filteroptions: dict) -> List: +def hyperopt_filter_epochs(epochs: List, filteroptions: dict, log: bool = True) -> List: """ Filter our items from the list of hyperopt results """ @@ -24,11 +24,11 @@ def hyperopt_filter_epochs(epochs: List, filteroptions: dict) -> List: epochs = _hyperopt_filter_epochs_profit(epochs, filteroptions) epochs = _hyperopt_filter_epochs_objective(epochs, filteroptions) - - logger.info(f"{len(epochs)} " + - ("best " if filteroptions['only_best'] else "") + - ("profitable " if filteroptions['only_profitable'] else "") + - "epochs found.") + if log: + logger.info(f"{len(epochs)} " + + ("best " if filteroptions['only_best'] else "") + + ("profitable " if filteroptions['only_profitable'] else "") + + "epochs found.") return epochs diff --git a/freqtrade/optimize/hyperopt_tools.py b/freqtrade/optimize/hyperopt_tools.py index 0bb6aba15..b2e024f65 100755 --- a/freqtrade/optimize/hyperopt_tools.py +++ b/freqtrade/optimize/hyperopt_tools.py @@ -4,7 +4,7 @@ import logging from copy import deepcopy from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Iterator, List, Optional, Tuple import numpy as np import rapidjson @@ -90,37 +90,33 @@ class HyperoptTools(): return any(s in config['spaces'] for s in [space, 'all', 'default']) @staticmethod - def _read_results(results_file: Path) -> List: + def _read_results(results_file: Path, batch_size: int = 10) -> Iterator[List[Any]]: """ - Read hyperopt results from file + Stream hyperopt results from file """ import rapidjson logger.info(f"Reading epochs from '{results_file}'") with results_file.open('r') as f: - data = [rapidjson.loads(line) for line in f] - return data + data = [] + for line in f: + data += [rapidjson.loads(line)] + if len(data) >= batch_size: + yield data + data = [] + yield data @staticmethod - def load_previous_results(results_file: Path) -> List: - """ - Load data for epochs from the file if we have one - """ - epochs: List = [] + def _test_hyperopt_results_exist(results_file) -> bool: if results_file.is_file() and results_file.stat().st_size > 0: if results_file.suffix == '.pickle': raise OperationalException( "Legacy hyperopt results are no longer supported." "Please rerun hyperopt or use an older version to load this file." ) - else: - epochs = HyperoptTools._read_results(results_file) - # Detection of some old format, without 'is_best' field saved - if epochs[0].get('is_best') is None: - raise OperationalException( - "The file with HyperoptTools results is incompatible with this version " - "of Freqtrade and cannot be loaded.") - logger.info(f"Loaded {len(epochs)} previous evaluations from disk.") - return epochs + return True + else: + # No file found. + return False @staticmethod def load_filtered_results(results_file: Path, config: Dict[str, Any]) -> Tuple[List, int]: @@ -138,12 +134,24 @@ class HyperoptTools(): 'filter_min_objective': config.get('hyperopt_list_min_objective', None), 'filter_max_objective': config.get('hyperopt_list_max_objective', None), } + if not HyperoptTools._test_hyperopt_results_exist(results_file): + # No file found. + return [], 0 - # Previous evaluations - epochs = HyperoptTools.load_previous_results(results_file) - total_epochs = len(epochs) + epochs = [] + total_epochs = 0 + for epochs_tmp in HyperoptTools._read_results(results_file): + if total_epochs == 0 and epochs_tmp[0].get('is_best') is None: + raise OperationalException( + "The file with HyperoptTools results is incompatible with this version " + "of Freqtrade and cannot be loaded.") + total_epochs += len(epochs_tmp) + epochs += hyperopt_filter_epochs(epochs_tmp, filteroptions, log=False) - epochs = hyperopt_filter_epochs(epochs, filteroptions) + logger.info(f"Loaded {total_epochs} previous evaluations from disk.") + + # Final filter run ... + epochs = hyperopt_filter_epochs(epochs, filteroptions, log=True) return epochs, total_epochs diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py index 80dd04b27..fc5101979 100644 --- a/tests/commands/test_commands.py +++ b/tests/commands/test_commands.py @@ -941,8 +941,16 @@ def test_start_test_pairlist(mocker, caplog, tickers, default_conf, capsys): def test_hyperopt_list(mocker, capsys, caplog, saved_hyperopt_results, tmpdir): csv_file = Path(tmpdir) / "test.csv" mocker.patch( - 'freqtrade.optimize.hyperopt_tools.HyperoptTools.load_previous_results', - MagicMock(return_value=saved_hyperopt_results) + 'freqtrade.optimize.hyperopt_tools.HyperoptTools._test_hyperopt_results_exist', + return_value=True + ) + + def fake_iterator(*args, **kwargs): + yield from [saved_hyperopt_results] + + mocker.patch( + 'freqtrade.optimize.hyperopt_tools.HyperoptTools._read_results', + side_effect=fake_iterator ) args = [ @@ -1175,8 +1183,16 @@ def test_hyperopt_list(mocker, capsys, caplog, saved_hyperopt_results, tmpdir): def test_hyperopt_show(mocker, capsys, saved_hyperopt_results): mocker.patch( - 'freqtrade.optimize.hyperopt_tools.HyperoptTools.load_previous_results', - MagicMock(return_value=saved_hyperopt_results) + 'freqtrade.optimize.hyperopt_tools.HyperoptTools._test_hyperopt_results_exist', + return_value=True + ) + + def fake_iterator(*args, **kwargs): + yield from [saved_hyperopt_results] + + mocker.patch( + 'freqtrade.optimize.hyperopt_tools.HyperoptTools._read_results', + side_effect=fake_iterator ) mocker.patch('freqtrade.commands.hyperopt_commands.show_backtest_result') diff --git a/tests/optimize/test_hyperopt_tools.py b/tests/optimize/test_hyperopt_tools.py index d59a44da7..cbcb13384 100644 --- a/tests/optimize/test_hyperopt_tools.py +++ b/tests/optimize/test_hyperopt_tools.py @@ -20,9 +20,14 @@ def create_results() -> List[Dict]: def test_save_results_saves_epochs(hyperopt, tmpdir, caplog) -> None: + + hyperopt.results_file = Path(tmpdir / 'ut_results.fthypt') + + hyperopt_epochs = HyperoptTools.load_filtered_results(hyperopt.results_file, {}) + assert hyperopt_epochs == ([], 0) + # Test writing to temp dir and reading again epochs = create_results() - hyperopt.results_file = Path(tmpdir / 'ut_results.fthypt') caplog.set_level(logging.DEBUG) @@ -33,15 +38,28 @@ def test_save_results_saves_epochs(hyperopt, tmpdir, caplog) -> None: hyperopt._save_result(epochs[0]) assert log_has(f"2 epochs saved to '{hyperopt.results_file}'.", caplog) - hyperopt_epochs = HyperoptTools.load_previous_results(hyperopt.results_file) + hyperopt_epochs = HyperoptTools.load_filtered_results(hyperopt.results_file, {}) assert len(hyperopt_epochs) == 2 + assert hyperopt_epochs[1] == 2 + assert len(hyperopt_epochs[0]) == 2 + + result_gen = HyperoptTools._read_results(hyperopt.results_file, 1) + epoch = next(result_gen) + assert len(epoch) == 1 + assert epoch[0] == epochs[0] + epoch = next(result_gen) + assert len(epoch) == 1 + epoch = next(result_gen) + assert len(epoch) == 0 + with pytest.raises(StopIteration): + next(result_gen) def test_load_previous_results2(mocker, testdatadir, caplog) -> None: results_file = testdatadir / 'hyperopt_results_SampleStrategy.pickle' with pytest.raises(OperationalException, match=r"Legacy hyperopt results are no longer supported.*"): - HyperoptTools.load_previous_results(results_file) + HyperoptTools.load_filtered_results(results_file, {}) @pytest.mark.parametrize("spaces, expected_results", [