Merge pull request #4930 from freqtrade/hyperopt_memory

Hyperopt memory problems
This commit is contained in:
Matthias 2021-05-15 07:12:57 +02:00 committed by GitHub
commit a6c644161d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 139 additions and 132 deletions

View File

@ -9,11 +9,11 @@ import random
import warnings
from datetime import datetime, timezone
from math import ceil
from operator import itemgetter
from pathlib import Path
from typing import Any, Dict, List, Optional
import progressbar
import rapidjson
from colorama import Fore, Style
from colorama import init as colorama_init
from joblib import Parallel, cpu_count, delayed, dump, load, wrap_non_picklable_objects
@ -86,7 +86,7 @@ class Hyperopt:
time_now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
strategy = str(self.config['strategy'])
self.results_file: Path = (self.config['user_data_dir'] / 'hyperopt_results' /
f'strategy_{strategy}_hyperopt_results_{time_now}.pickle')
f'strategy_{strategy}_{time_now}.fthypt')
self.data_pickle_file = (self.config['user_data_dir'] /
'hyperopt_results' / 'hyperopt_tickerdata.pkl')
self.total_epochs = config.get('epochs', 0)
@ -96,9 +96,7 @@ class Hyperopt:
self.clean_hyperopt()
self.num_epochs_saved = 0
# Previous evaluations
self.epochs: List = []
self.current_best_epoch: Optional[Dict[str, Any]] = None
# Populate functions here (hasattr is slow so should not be run during "regular" operations)
if hasattr(self.custom_hyperopt, 'populate_indicators'):
@ -156,15 +154,18 @@ class Hyperopt:
# and the values are taken from the list of parameters.
return {d.name: v for d, v in zip(dimensions, raw_params)}
def _save_results(self) -> None:
def _save_result(self, epoch: Dict) -> None:
"""
Save hyperopt results to file
Store one line per epoch.
While not a valid json object - this allows appending easily.
:param epoch: result dictionary for this epoch.
"""
num_epochs = len(self.epochs)
if num_epochs > self.num_epochs_saved:
logger.debug(f"Saving {num_epochs} {plural(num_epochs, 'epoch')}.")
dump(self.epochs, self.results_file)
self.num_epochs_saved = num_epochs
with self.results_file.open('a') as f:
rapidjson.dump(epoch, f, default=str, number_mode=rapidjson.NM_NATIVE)
f.write("\n")
self.num_epochs_saved += 1
logger.debug(f"{self.num_epochs_saved} {plural(self.num_epochs_saved, 'epoch')} "
f"saved to '{self.results_file}'.")
# Store hyperopt filename
@ -268,7 +269,7 @@ class Hyperopt:
self.backtesting.strategy.trailing_only_offset_is_reached = \
d['trailing_only_offset_is_reached']
processed = load(self.data_pickle_file)
processed = load(self.data_pickle_file, mmap_mode='r+')
bt_results = self.backtesting.backtest(
processed=processed,
@ -343,12 +344,7 @@ class Hyperopt:
def _set_random_state(self, random_state: Optional[int]) -> int:
return random_state or random.randint(1, 2**16 - 1)
def start(self) -> None:
self.random_state = self._set_random_state(self.config.get('hyperopt_random_state', None))
logger.info(f"Using optimizer random state: {self.random_state}")
self.hyperopt_table_header = -1
# Initialize spaces ...
self.init_spaces()
def prepare_hyperopt_data(self) -> None:
data, timerange = self.backtesting.load_bt_data()
logger.info("Dataload complete. Calculating indicators")
preprocessed = self.backtesting.strategy.ohlcvdata_to_dataframe(data)
@ -365,6 +361,15 @@ class Hyperopt:
dump(preprocessed, self.data_pickle_file)
def start(self) -> None:
self.random_state = self._set_random_state(self.config.get('hyperopt_random_state', None))
logger.info(f"Using optimizer random state: {self.random_state}")
self.hyperopt_table_header = -1
# Initialize spaces ...
self.init_spaces()
self.prepare_hyperopt_data()
# We don't need exchange instance anymore while running hyperopt
self.backtesting.exchange.close()
self.backtesting.exchange._api = None # type: ignore
@ -442,25 +447,21 @@ class Hyperopt:
if is_best:
self.current_best_loss = val['loss']
self.epochs.append(val)
self.current_best_epoch = val
# Save results after each best epoch and every 100 epochs
if is_best or current % 100 == 0:
self._save_results()
self._save_result(val)
pbar.update(current)
except KeyboardInterrupt:
print('User interrupted..')
self._save_results()
logger.info(f"{self.num_epochs_saved} {plural(self.num_epochs_saved, 'epoch')} "
f"saved to '{self.results_file}'.")
if self.epochs:
sorted_epochs = sorted(self.epochs, key=itemgetter('loss'))
best_epoch = sorted_epochs[0]
HyperoptTools.print_epoch_details(best_epoch, self.total_epochs, self.print_json)
if self.current_best_epoch:
HyperoptTools.print_epoch_details(self.current_best_epoch, self.total_epochs,
self.print_json)
else:
# This is printed when Ctrl+C is pressed quickly, before first epochs have
# a chance to be evaluated.

View File

@ -31,15 +31,27 @@ class HyperoptTools():
else:
return any(s in config['spaces'] for s in [space, 'all', 'default'])
@staticmethod
def _read_results_pickle(results_file: Path) -> List:
"""
Read hyperopt results from pickle file
LEGACY method - new files are written as json and cannot be read with this method.
"""
from joblib import load
logger.info(f"Reading pickled epochs from '{results_file}'")
data = load(results_file)
return data
@staticmethod
def _read_results(results_file: Path) -> List:
"""
Read hyperopt results from file
"""
from joblib import load
logger.info("Reading epochs from '%s'", results_file)
data = load(results_file)
import rapidjson
logger.info(f"Reading epochs from '{results_file}'")
with results_file.open('r') as f:
data = [rapidjson.loads(line) for line in f]
return data
@staticmethod
@ -49,6 +61,9 @@ class HyperoptTools():
"""
epochs: List = []
if results_file.is_file() and results_file.stat().st_size > 0:
if results_file.suffix == '.pickle':
epochs = HyperoptTools._read_results_pickle(results_file)
else:
epochs = HyperoptTools._read_results(results_file)
# Detection of some old format, without 'is_best' field saved
if epochs[0].get('is_best') is None:
@ -167,7 +182,7 @@ class HyperoptTools():
@staticmethod
def is_best_loss(results, current_best_loss: float) -> bool:
return results['loss'] < current_best_loss
return bool(results['loss'] < current_best_loss)
@staticmethod
def format_results_explanation_string(results_metrics: Dict, stake_currency: str) -> str:

View File

@ -313,9 +313,9 @@ def generate_strategy_stats(btdata: Dict[str, DataFrame],
'profit_median': results['profit_ratio'].median() if len(results) > 0 else 0,
'profit_total': results['profit_abs'].sum() / starting_balance,
'profit_total_abs': results['profit_abs'].sum(),
'backtest_start': min_date,
'backtest_start': min_date.strftime(DATETIME_PRINT_FORMAT),
'backtest_start_ts': int(min_date.timestamp() * 1000),
'backtest_end': max_date,
'backtest_end': max_date.strftime(DATETIME_PRINT_FORMAT),
'backtest_end_ts': int(max_date.timestamp() * 1000),
'backtest_days': backtest_days,
@ -362,9 +362,9 @@ def generate_strategy_stats(btdata: Dict[str, DataFrame],
strat_stats.update({
'max_drawdown': max_drawdown,
'max_drawdown_abs': drawdown_abs,
'drawdown_start': drawdown_start,
'drawdown_start': drawdown_start.strftime(DATETIME_PRINT_FORMAT),
'drawdown_start_ts': drawdown_start.timestamp() * 1000,
'drawdown_end': drawdown_end,
'drawdown_end': drawdown_end.strftime(DATETIME_PRINT_FORMAT),
'drawdown_end_ts': drawdown_end.timestamp() * 1000,
'max_drawdown_low': low_val,
@ -497,8 +497,8 @@ def text_table_add_metrics(strat_results: Dict) -> str:
best_trade = max(strat_results['trades'], key=lambda x: x['profit_ratio'])
worst_trade = min(strat_results['trades'], key=lambda x: x['profit_ratio'])
metrics = [
('Backtesting from', strat_results['backtest_start'].strftime(DATETIME_PRINT_FORMAT)),
('Backtesting to', strat_results['backtest_end'].strftime(DATETIME_PRINT_FORMAT)),
('Backtesting from', strat_results['backtest_start']),
('Backtesting to', strat_results['backtest_end']),
('Max open trades', strat_results['max_open_trades']),
('', ''), # Empty line to improve readability
('Total trades', strat_results['total_trades']),
@ -546,8 +546,8 @@ def text_table_add_metrics(strat_results: Dict) -> str:
strat_results['stake_currency'])),
('Drawdown low', round_coin_value(strat_results['max_drawdown_low'],
strat_results['stake_currency'])),
('Drawdown Start', strat_results['drawdown_start'].strftime(DATETIME_PRINT_FORMAT)),
('Drawdown End', strat_results['drawdown_end'].strftime(DATETIME_PRINT_FORMAT)),
('Drawdown Start', strat_results['drawdown_start']),
('Drawdown End', strat_results['drawdown_end']),
('Market change', f"{round(strat_results['market_change'] * 100, 2)}%"),
]

View File

@ -31,23 +31,7 @@ from .hyperopts.default_hyperopt import DefaultHyperOpt
# Functions for recurrent object patching
def create_results(mocker, hyperopt, testdatadir) -> List[Dict]:
"""
When creating results, mock the hyperopt so that *by default*
- we don't create any pickle'd files in the filesystem
- we might have a pickle'd file so make sure that we return
false when looking for it
"""
hyperopt.results_file = testdatadir / 'optimize/ut_results.pickle'
mocker.patch.object(Path, "is_file", MagicMock(return_value=False))
stat_mock = MagicMock()
stat_mock.st_size = 1
mocker.patch.object(Path, "stat", MagicMock(return_value=stat_mock))
mocker.patch.object(Path, "unlink", MagicMock(return_value=True))
mocker.patch('freqtrade.optimize.hyperopt.dump', return_value=None)
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
def create_results() -> List[Dict]:
return [{'loss': 1, 'result': 'foo', 'params': {}, 'is_best': True}]
@ -321,54 +305,49 @@ def test_no_log_if_loss_does_not_improve(hyperopt, caplog) -> None:
assert caplog.record_tuples == []
def test_save_results_saves_epochs(mocker, hyperopt, testdatadir, caplog) -> None:
epochs = create_results(mocker, hyperopt, testdatadir)
mock_dump = mocker.patch('freqtrade.optimize.hyperopt.dump', return_value=None)
mock_dump_json = mocker.patch('freqtrade.optimize.hyperopt.file_dump_json', return_value=None)
results_file = testdatadir / 'optimize' / 'ut_results.pickle'
def test_save_results_saves_epochs(mocker, hyperopt, tmpdir, caplog) -> None:
# Test writing to temp dir and reading again
epochs = create_results()
hyperopt.results_file = Path(tmpdir / 'ut_results.fthypt')
caplog.set_level(logging.DEBUG)
hyperopt.epochs = epochs
hyperopt._save_results()
assert log_has(f"1 epoch saved to '{results_file}'.", caplog)
mock_dump.assert_called_once()
mock_dump_json.assert_called_once()
for epoch in epochs:
hyperopt._save_result(epoch)
assert log_has(f"1 epoch saved to '{hyperopt.results_file}'.", caplog)
hyperopt.epochs = epochs + epochs
hyperopt._save_results()
assert log_has(f"2 epochs saved to '{results_file}'.", caplog)
hyperopt._save_result(epochs[0])
assert log_has(f"2 epochs saved to '{hyperopt.results_file}'.", caplog)
hyperopt_epochs = HyperoptTools.load_previous_results(hyperopt.results_file)
assert len(hyperopt_epochs) == 2
def test_read_results_returns_epochs(mocker, hyperopt, testdatadir, caplog) -> None:
epochs = create_results(mocker, hyperopt, testdatadir)
mock_load = mocker.patch('joblib.load', return_value=epochs)
results_file = testdatadir / 'optimize' / 'ut_results.pickle'
hyperopt_epochs = HyperoptTools._read_results(results_file)
assert log_has(f"Reading epochs from '{results_file}'", caplog)
assert hyperopt_epochs == epochs
mock_load.assert_called_once()
def test_load_previous_results(testdatadir, caplog) -> None:
def test_load_previous_results(mocker, hyperopt, testdatadir, caplog) -> None:
epochs = create_results(mocker, hyperopt, testdatadir)
mock_load = mocker.patch('joblib.load', return_value=epochs)
mocker.patch.object(Path, 'is_file', MagicMock(return_value=True))
statmock = MagicMock()
statmock.st_size = 5
# mocker.patch.object(Path, 'stat', MagicMock(return_value=statmock))
results_file = testdatadir / 'optimize' / 'ut_results.pickle'
results_file = testdatadir / 'hyperopt_results_SampleStrategy.pickle'
hyperopt_epochs = HyperoptTools.load_previous_results(results_file)
assert hyperopt_epochs == epochs
mock_load.assert_called_once()
assert len(hyperopt_epochs) == 5
assert log_has_re(r"Reading pickled epochs from .*", caplog)
del epochs[0]['is_best']
mock_load = mocker.patch('joblib.load', return_value=epochs)
caplog.clear()
with pytest.raises(OperationalException):
# Modern version
results_file = testdatadir / 'strategy_SampleStrategy.fthypt'
hyperopt_epochs = HyperoptTools.load_previous_results(results_file)
assert len(hyperopt_epochs) == 5
assert log_has_re(r"Reading epochs from .*", caplog)
def test_load_previous_results2(mocker, testdatadir, caplog) -> None:
mocker.patch('freqtrade.optimize.hyperopt_tools.HyperoptTools._read_results_pickle',
return_value=[{'asdf': '222'}])
results_file = testdatadir / 'hyperopt_results_SampleStrategy.pickle'
with pytest.raises(OperationalException, match=r"The file .* incompatible.*"):
HyperoptTools.load_previous_results(results_file)
@ -386,7 +365,8 @@ def test_roi_table_generation(hyperopt) -> None:
def test_start_calls_optimizer(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
@ -425,9 +405,9 @@ def test_start_calls_optimizer(mocker, hyperopt_conf, capsys) -> None:
out, err = capsys.readouterr()
assert 'Best result:\n\n* 1/1: foo result Objective: 1.00000\n' in out
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
# Should be called for historical candle data
assert dumper.call_count == 1
assert dumper2.call_count == 1
assert hasattr(hyperopt.backtesting.strategy, "advise_sell")
assert hasattr(hyperopt.backtesting.strategy, "advise_buy")
assert hasattr(hyperopt, "max_open_trades")
@ -714,7 +694,8 @@ def test_clean_hyperopt(mocker, hyperopt_conf, caplog):
def test_print_json_spaces_all(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
@ -765,13 +746,14 @@ def test_print_json_spaces_all(mocker, hyperopt_conf, capsys) -> None:
':{},"stoploss":null,"trailing_stop":null}'
)
assert result_str in out # noqa: E501
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
# Should be called for historical candle data
assert dumper.call_count == 1
assert dumper2.call_count == 1
def test_print_json_spaces_default(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
MagicMock(return_value=(MagicMock(), None)))
@ -813,13 +795,14 @@ def test_print_json_spaces_default(mocker, hyperopt_conf, capsys) -> None:
out, err = capsys.readouterr()
assert '{"params":{"mfi-value":null,"sell-mfi-value":null},"minimal_roi":{},"stoploss":null}' in out # noqa: E501
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
# Should be called for historical candle data
assert dumper.call_count == 1
assert dumper2.call_count == 1
def test_print_json_spaces_roi_stoploss(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
MagicMock(return_value=(MagicMock(), None)))
@ -860,13 +843,14 @@ def test_print_json_spaces_roi_stoploss(mocker, hyperopt_conf, capsys) -> None:
out, err = capsys.readouterr()
assert '{"minimal_roi":{},"stoploss":null}' in out
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
assert dumper.call_count == 1
assert dumper2.call_count == 1
def test_simplified_interface_roi_stoploss(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
MagicMock(return_value=(MagicMock(), None)))
@ -908,9 +892,9 @@ def test_simplified_interface_roi_stoploss(mocker, hyperopt_conf, capsys) -> Non
out, err = capsys.readouterr()
assert 'Best result:\n\n* 1/1: foo result Objective: 1.00000\n' in out
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
assert dumper.call_count == 1
assert dumper2.call_count == 1
assert hasattr(hyperopt.backtesting.strategy, "advise_sell")
assert hasattr(hyperopt.backtesting.strategy, "advise_buy")
assert hasattr(hyperopt, "max_open_trades")
@ -946,7 +930,8 @@ def test_simplified_interface_all_failed(mocker, hyperopt_conf) -> None:
def test_simplified_interface_buy(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
MagicMock(return_value=(MagicMock(), None)))
@ -989,8 +974,8 @@ def test_simplified_interface_buy(mocker, hyperopt_conf, capsys) -> None:
out, err = capsys.readouterr()
assert 'Best result:\n\n* 1/1: foo result Objective: 1.00000\n' in out
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
assert dumper.call_count == 1
assert dumper2.call_count == 1
assert hasattr(hyperopt.backtesting.strategy, "advise_sell")
assert hasattr(hyperopt.backtesting.strategy, "advise_buy")
assert hasattr(hyperopt, "max_open_trades")
@ -999,7 +984,8 @@ def test_simplified_interface_buy(mocker, hyperopt_conf, capsys) -> None:
def test_simplified_interface_sell(mocker, hyperopt_conf, capsys) -> None:
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump', MagicMock())
dumper = mocker.patch('freqtrade.optimize.hyperopt.dump')
dumper2 = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt._save_result')
mocker.patch('freqtrade.optimize.hyperopt.file_dump_json')
mocker.patch('freqtrade.optimize.backtesting.Backtesting.load_bt_data',
MagicMock(return_value=(MagicMock(), None)))
@ -1042,8 +1028,8 @@ def test_simplified_interface_sell(mocker, hyperopt_conf, capsys) -> None:
out, err = capsys.readouterr()
assert 'Best result:\n\n* 1/1: foo result Objective: 1.00000\n' in out
assert dumper.called
# Should be called twice, once for historical candle data, once to save evaluations
assert dumper.call_count == 2
assert dumper.call_count == 1
assert dumper2.call_count == 1
assert hasattr(hyperopt.backtesting.strategy, "advise_sell")
assert hasattr(hyperopt.backtesting.strategy, "advise_buy")
assert hasattr(hyperopt, "max_open_trades")

View File

@ -1,5 +1,5 @@
import re
from datetime import datetime, timedelta, timezone
from datetime import timedelta
from pathlib import Path
import pandas as pd
@ -7,7 +7,7 @@ import pytest
from arrow import Arrow
from freqtrade.configuration import TimeRange
from freqtrade.constants import LAST_BT_RESULT_FN
from freqtrade.constants import DATETIME_PRINT_FORMAT, LAST_BT_RESULT_FN
from freqtrade.data import history
from freqtrade.data.btanalysis import get_latest_backtest_filename, load_backtest_data
from freqtrade.edge import PairInfo
@ -97,8 +97,8 @@ def test_generate_backtest_stats(default_conf, testdatadir):
assert 'DefStrat' in stats['strategy']
assert 'strategy_comparison' in stats
strat_stats = stats['strategy']['DefStrat']
assert strat_stats['backtest_start'] == min_date.datetime
assert strat_stats['backtest_end'] == max_date.datetime
assert strat_stats['backtest_start'] == min_date.strftime(DATETIME_PRINT_FORMAT)
assert strat_stats['backtest_end'] == max_date.strftime(DATETIME_PRINT_FORMAT)
assert strat_stats['total_trades'] == len(results['DefStrat']['results'])
# Above sample had no loosing trade
assert strat_stats['max_drawdown'] == 0.0
@ -141,8 +141,8 @@ def test_generate_backtest_stats(default_conf, testdatadir):
strat_stats = stats['strategy']['DefStrat']
assert strat_stats['max_drawdown'] == 0.013803
assert strat_stats['drawdown_start'] == datetime(2017, 11, 14, 22, 10, tzinfo=timezone.utc)
assert strat_stats['drawdown_end'] == datetime(2017, 11, 14, 22, 43, tzinfo=timezone.utc)
assert strat_stats['drawdown_start'] == '2017-11-14 22:10:00'
assert strat_stats['drawdown_end'] == '2017-11-14 22:43:00'
assert strat_stats['drawdown_end_ts'] == 1510699380000
assert strat_stats['drawdown_start_ts'] == 1510697400000
assert strat_stats['pairlist'] == ['UNITTEST/BTC']

Binary file not shown.

File diff suppressed because one or more lines are too long