stable/freqtrade/optimize/hyperopt.py

370 lines
14 KiB
Python
Raw Normal View History

# pragma pylint: disable=too-many-instance-attributes, pointless-string-statement
"""
This module contains the hyperopt logic
"""
import logging
import os
import sys
from operator import itemgetter
2019-01-06 13:47:38 +00:00
from pathlib import Path
from pprint import pprint
from typing import Any, Dict, List
from joblib import Parallel, delayed, dump, load, wrap_non_picklable_objects, cpu_count
2019-01-06 13:47:38 +00:00
from pandas import DataFrame
2018-06-19 06:09:54 +00:00
from skopt import Optimizer
from skopt.space import Dimension
2018-06-18 19:40:36 +00:00
from freqtrade.configuration import Arguments
from freqtrade.data.history import load_data, get_timeframe
2018-03-02 15:22:00 +00:00
from freqtrade.optimize.backtesting import Backtesting
from freqtrade.resolvers.hyperopt_resolver import HyperOptResolver
from freqtrade.optimize.hyperopt_loss import hyperopt_loss_legacy
2018-03-25 19:37:14 +00:00
logger = logging.getLogger(__name__)
2019-05-10 07:54:44 +00:00
INITIAL_POINTS = 30
MAX_LOSS = 100000 # just a big enough number to be bad result in loss optimization
TICKERDATA_PICKLE = os.path.join('user_data', 'hyperopt_tickerdata.pkl')
TRIALSDATA_PICKLE = os.path.join('user_data', 'hyperopt_results.pickle')
HYPEROPT_LOCKFILE = os.path.join('user_data', 'hyperopt.lock')
2018-03-25 19:37:14 +00:00
class Hyperopt(Backtesting):
"""
Hyperopt class, this class contains all the logic to run a hyperopt simulation
To run a backtest:
hyperopt = Hyperopt(config)
hyperopt.start()
"""
def __init__(self, config: Dict[str, Any]) -> None:
super().__init__(config)
self.custom_hyperopt = HyperOptResolver(self.config).hyperopt
# set TARGET_TRADES to suit your number concurrent trades so its realistic
# to the number of days
self.target_trades = 600
self.total_tries = config.get('epochs', 0)
self.current_best_loss = 100
# max average trade duration in minutes
# if eval ends with higher value, we consider it a failed eval
self.max_accepted_trade_duration = 300
# This is assumed to be expected avg profit * expected trade count.
# For example, for 0.35% avg per trade (or 0.0035 as ratio) and 1100 trades,
# self.expected_max_profit = 3.85
# Check that the reported Σ% values do not exceed this!
# Note, this is ratio. 3.85 stated above means 385Σ%.
self.expected_max_profit = 3.0
2019-07-15 18:27:34 +00:00
if self.config.get('hyperopt_clean_state'):
self.clean_hyperopt()
# Previous evaluations
self.trials_file = TRIALSDATA_PICKLE
2018-06-30 06:54:31 +00:00
self.trials: List = []
# Assign loss function
if self.config['loss_function'] == 'legacy':
self.calculate_loss = hyperopt_loss_legacy
elif (self.config['loss_function'] == 'custom' and
hasattr(self.custom_hyperopt, 'hyperopt_loss_custom')):
2019-07-15 19:36:01 +00:00
self.calculate_loss = self.custom_hyperopt.hyperopt_loss_custom # type: ignore
# Implement fallback to avoid odd crashes when custom-hyperopt fails to load.
# TODO: Maybe this should just stop hyperopt completely?
if not hasattr(self.custom_hyperopt, 'hyperopt_loss_custom'):
logger.warning("Could not load hyperopt configuration. "
"Falling back to legacy configuration.")
self.calculate_loss = hyperopt_loss_legacy
# Populate functions here (hasattr is slow so should not be run during "regular" operations)
if hasattr(self.custom_hyperopt, 'populate_buy_trend'):
self.advise_buy = self.custom_hyperopt.populate_buy_trend # type: ignore
if hasattr(self.custom_hyperopt, 'populate_sell_trend'):
self.advise_sell = self.custom_hyperopt.populate_sell_trend # type: ignore
# Use max_open_trades for hyperopt as well, except --disable-max-market-positions is set
if self.config.get('use_max_market_positions', True):
self.max_open_trades = self.config['max_open_trades']
else:
logger.debug('Ignoring max_open_trades (--disable-max-market-positions was used) ...')
self.max_open_trades = 0
def clean_hyperopt(self):
"""
Remove hyperopt pickle files to restart hyperopt.
"""
for f in [TICKERDATA_PICKLE, TRIALSDATA_PICKLE]:
p = Path(f)
if p.is_file():
logger.info(f"Removing `{p}`.")
p.unlink()
2018-06-19 06:09:54 +00:00
def get_args(self, params):
dimensions = self.hyperopt_space()
# Ensure the number of dimensions match
# the number of parameters in the list x.
if len(params) != len(dimensions):
2018-07-03 08:17:41 +00:00
raise ValueError('Mismatch in number of search-space dimensions. '
f'len(dimensions)=={len(dimensions)} and len(x)=={len(params)}')
2018-06-19 06:09:54 +00:00
# Create a dict where the keys are the names of the dimensions
# and the values are taken from the list of parameters x.
arg_dict = {dim.name: value for dim, value in zip(dimensions, params)}
return arg_dict
def save_trials(self) -> None:
"""
Save hyperopt trials to file
"""
if self.trials:
logger.info('Saving %d evaluations to \'%s\'', len(self.trials), self.trials_file)
2018-07-03 19:51:48 +00:00
dump(self.trials, self.trials_file)
def read_trials(self) -> List:
"""
Read hyperopt trials file
"""
2018-03-25 19:37:14 +00:00
logger.info('Reading Trials from \'%s\'', self.trials_file)
2018-07-03 19:51:48 +00:00
trials = load(self.trials_file)
os.remove(self.trials_file)
return trials
def log_trials_result(self) -> None:
"""
Display Best hyperopt result
"""
results = sorted(self.trials, key=itemgetter('loss'))
best_result = results[0]
logger.info(
2019-01-06 13:57:14 +00:00
'Best result:\n%s\nwith values:\n',
best_result['result']
)
2019-01-06 13:57:14 +00:00
pprint(best_result['params'], indent=4)
if 'roi_t1' in best_result['params']:
2019-01-06 13:57:14 +00:00
logger.info('ROI table:')
pprint(self.custom_hyperopt.generate_roi_table(best_result['params']), indent=4)
def log_results(self, results) -> None:
"""
Log results if it is better than any previous evaluation
"""
2019-05-10 07:54:44 +00:00
print_all = self.config.get('print_all', False)
if print_all or results['loss'] < self.current_best_loss:
# Output human-friendly index here (starting from 1)
current = results['current_tries'] + 1
total = results['total_tries']
res = results['result']
loss = results['loss']
self.current_best_loss = results['loss']
2019-05-10 07:54:44 +00:00
log_msg = f'{current:5d}/{total}: {res} Objective: {loss:.5f}'
log_msg = f'*{log_msg}' if results['initial_point'] else f' {log_msg}'
if print_all:
print(log_msg)
else:
print('\n' + log_msg)
else:
print('.', end='')
sys.stdout.flush()
2018-03-17 21:43:36 +00:00
def has_space(self, space: str) -> bool:
"""
Tell if a space value is contained in the configuration
"""
if space in self.config['spaces'] or 'all' in self.config['spaces']:
return True
return False
def hyperopt_space(self) -> List[Dimension]:
"""
Return the space to use during Hyperopt
"""
spaces: List[Dimension] = []
if self.has_space('buy'):
2018-11-07 18:46:04 +00:00
spaces += self.custom_hyperopt.indicator_space()
2019-01-06 09:16:30 +00:00
if self.has_space('sell'):
spaces += self.custom_hyperopt.sell_indicator_space()
# Make sure experimental is enabled
if 'experimental' not in self.config:
self.config['experimental'] = {}
2019-01-06 09:16:30 +00:00
self.config['experimental']['use_sell_signal'] = True
if self.has_space('roi'):
2018-11-07 18:46:04 +00:00
spaces += self.custom_hyperopt.roi_space()
if self.has_space('stoploss'):
2018-11-07 18:46:04 +00:00
spaces += self.custom_hyperopt.stoploss_space()
return spaces
2017-12-26 08:08:10 +00:00
2018-11-07 18:46:04 +00:00
def generate_optimizer(self, _params: Dict) -> Dict:
"""
Used Optimize function. Called once per epoch to optimize whatever is configured.
Keep this function as optimized as possible!
"""
2018-11-07 18:46:04 +00:00
params = self.get_args(_params)
if self.has_space('roi'):
2018-11-07 18:46:04 +00:00
self.strategy.minimal_roi = self.custom_hyperopt.generate_roi_table(params)
if self.has_space('buy'):
2018-11-07 18:46:04 +00:00
self.advise_buy = self.custom_hyperopt.buy_strategy_generator(params)
2019-01-06 09:16:30 +00:00
if self.has_space('sell'):
self.advise_sell = self.custom_hyperopt.sell_strategy_generator(params)
if self.has_space('stoploss'):
self.strategy.stoploss = params['stoploss']
processed = load(TICKERDATA_PICKLE)
2019-07-14 17:56:17 +00:00
2018-11-04 12:43:09 +00:00
min_date, max_date = get_timeframe(processed)
2019-07-14 17:56:17 +00:00
results = self.backtest(
{
'stake_amount': self.config['stake_amount'],
'processed': processed,
'max_open_trades': self.max_open_trades,
2019-07-09 22:45:02 +00:00
'position_stacking': self.config.get('position_stacking', False),
2018-10-16 17:35:16 +00:00
'start_date': min_date,
'end_date': max_date,
}
)
result_explanation = self.format_results(results)
trade_count = len(results.index)
# If this evaluation contains too short amount of trades to be
# interesting -- consider it as 'bad' (assigned max. loss value)
2019-05-01 12:27:58 +00:00
# in order to cast this hyperspace point away from optimization
# path. We do not want to optimize 'hodl' strategies.
if trade_count < self.config['hyperopt_min_trades']:
return {
'loss': MAX_LOSS,
'params': params,
'result': result_explanation,
}
loss = self.calculate_loss(results=results, trade_count=trade_count,
min_date=min_date.datetime, max_date=max_date.datetime)
2018-06-19 18:57:42 +00:00
return {
'loss': loss,
'params': params,
2018-06-19 18:57:42 +00:00
'result': result_explanation,
}
def format_results(self, results: DataFrame) -> str:
"""
Return the format result in a string
"""
trades = len(results.index)
avg_profit = results.profit_percent.mean() * 100.0
total_profit = results.profit_abs.sum()
stake_cur = self.config['stake_currency']
profit = results.profit_percent.sum() * 100.0
duration = results.trade_duration.mean()
return (f'{trades:6d} trades. Avg profit {avg_profit: 5.2f}%. '
f'Total profit {total_profit: 11.8f} {stake_cur} '
f'({profit: 7.2f}Σ%). Avg duration {duration:5.1f} mins.')
2018-07-03 08:46:56 +00:00
def get_optimizer(self, cpu_count) -> Optimizer:
2018-06-24 12:27:53 +00:00
return Optimizer(
self.hyperopt_space(),
base_estimator="ET",
acq_optimizer="auto",
2019-05-10 07:54:44 +00:00
n_initial_points=INITIAL_POINTS,
acq_optimizer_kwargs={'n_jobs': cpu_count},
random_state=self.config.get('hyperopt_random_state', None)
2018-06-24 12:27:53 +00:00
)
def run_optimizer_parallel(self, parallel, asked) -> List:
2018-11-20 16:43:49 +00:00
return parallel(delayed(
wrap_non_picklable_objects(self.generate_optimizer))(v) for v in asked)
2018-06-24 12:27:53 +00:00
def load_previous_results(self):
""" read trials file if we have one """
if os.path.exists(self.trials_file) and os.path.getsize(self.trials_file) > 0:
self.trials = self.read_trials()
logger.info(
'Loaded %d previous evaluations from disk.',
len(self.trials)
)
2018-03-17 21:43:36 +00:00
def start(self) -> None:
2018-06-02 11:59:35 +00:00
timerange = Arguments.parse_timerange(None if self.config.get(
2018-06-02 11:43:51 +00:00
'timerange') is None else str(self.config.get('timerange')))
2018-06-05 21:34:26 +00:00
data = load_data(
2018-12-15 13:10:45 +00:00
datadir=Path(self.config['datadir']) if self.config.get('datadir') else None,
pairs=self.config['exchange']['pair_whitelist'],
ticker_interval=self.ticker_interval,
refresh_pairs=self.config.get('refresh_pairs', False),
exchange=self.exchange,
timerange=timerange
)
if not data:
logger.critical("No data found. Terminating.")
return
min_date, max_date = get_timeframe(data)
logger.info(
'Hyperopting with data from %s up to %s (%s days)..',
min_date.isoformat(),
max_date.isoformat(),
(max_date - min_date).days
)
2019-07-11 20:02:57 +00:00
self.strategy.advise_indicators = \
self.custom_hyperopt.populate_indicators # type: ignore
2019-06-09 23:08:54 +00:00
preprocessed = self.strategy.tickerdata_to_dataframe(data)
dump(preprocessed, TICKERDATA_PICKLE)
# We don't need exchange instance anymore while running hyperopt
2018-06-30 06:54:31 +00:00
self.exchange = None # type: ignore
self.load_previous_results()
cpus = cpu_count()
2018-07-03 08:46:56 +00:00
logger.info(f'Found {cpus} CPU cores. Let\'s make them scream!')
config_jobs = self.config.get('hyperopt_jobs', -1)
logger.info(f'Number of parallel jobs set as: {config_jobs}')
2018-06-21 11:59:36 +00:00
opt = self.get_optimizer(config_jobs)
try:
with Parallel(n_jobs=config_jobs) as parallel:
jobs = parallel._effective_n_jobs()
logger.info(f'Effective number of parallel workers used: {jobs}')
EVALS = max(self.total_tries // jobs, 1)
2018-07-03 18:54:32 +00:00
for i in range(EVALS):
asked = opt.ask(n_points=jobs)
2018-06-24 12:27:53 +00:00
f_val = self.run_optimizer_parallel(parallel, asked)
opt.tell(asked, [i['loss'] for i in f_val])
self.trials += f_val
for j in range(jobs):
2019-05-10 07:54:44 +00:00
current = i * jobs + j
2018-06-24 12:27:53 +00:00
self.log_results({
'loss': f_val[j]['loss'],
2019-05-10 07:54:44 +00:00
'current_tries': current,
'initial_point': current < INITIAL_POINTS,
2018-06-24 12:27:53 +00:00
'total_tries': self.total_tries,
'result': f_val[j]['result'],
})
logger.debug(f"Optimizer params: {f_val[j]['params']}")
for j in range(jobs):
logger.debug(f"Optimizer state: Xi: {opt.Xi[-j-1]}, yi: {opt.yi[-j-1]}")
except KeyboardInterrupt:
print('User interrupted..')
self.save_trials()
self.log_trials_result()