hyperopt shared mode

- shared mode uses one optimizer with shared results
- multi mode runs as many optimizers as jobs and results are
only shared on ask
- a flag to override the strategy when asking more points (--lie-strat)
- make sure to ask with n_points `None` to avoid computing more
points than needed in shared mode
- reduce n of models to 1 in multi mode
- don't load more than the specified number of jobs when loading previous optimizers
- stretch the batch length to reach the epochs limit
- a warning for when no epochs are logged
This commit is contained in:
orehunt 2020-03-14 20:53:56 +01:00
parent ef6efb7117
commit a5b44de0f6
6 changed files with 155 additions and 97 deletions

View File

@ -26,7 +26,7 @@ ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + [
"hyperopt", "hyperopt_path", "position_stacking", "epochs", "spaces", "hyperopt", "hyperopt_path", "position_stacking", "epochs", "spaces",
"use_max_market_positions", "print_all", "print_colorized", "print_json", "hyperopt_jobs", "use_max_market_positions", "print_all", "print_colorized", "print_json", "hyperopt_jobs",
"hyperopt_random_state", "hyperopt_min_trades", "hyperopt_continue", "hyperopt_loss", "effort", "hyperopt_random_state", "hyperopt_min_trades", "hyperopt_continue", "hyperopt_loss", "effort",
"multi_opt", "points_per_opt" "mode", "n_points", "lie_strat"
] ]
ARGS_EDGE = ARGS_COMMON_OPTIMIZE + ["stoploss_range"] ARGS_EDGE = ARGS_COMMON_OPTIMIZE + ["stoploss_range"]

View File

@ -204,20 +204,26 @@ AVAILABLE_CLI_OPTIONS = {
metavar='FLOAT', metavar='FLOAT',
default=constants.HYPEROPT_EFFORT, default=constants.HYPEROPT_EFFORT,
), ),
"multi_opt": "mode":
Arg('--multi', Arg('--mode',
help=('Switches hyperopt to use one optimizer per job, use it', help=('Switches hyperopt to use one optimizer per job, use it',
'when backtesting iterations are cheap (default: %(default)d).'), 'when backtesting iterations are cheap (default: %(default)d).'),
action='store_true', metavar='NAME',
default=False), default=constants.HYPEROPT_MODE),
"points_per_opt": "n_points":
Arg('--points-per-opt', Arg('--n-points',
help=('Controls how many points to ask at each job dispatch to each', help=('Controls how many points to ask to the optimizer',
'optimizer in multi opt mode, increase if cpu usage of each core', 'increase if cpu usage of each core',
'appears low (default: %(default)d).'), 'appears low (default: %(default)d).'),
type=int, type=int,
metavar='INT', metavar='INT',
default=constants.HYPEROPT_POINTS_PER_OPT), default=constants.HYPEROPT_N_POINTS),
"lie_strat":
Arg('--lie-strat',
help=('Sets the strategy that the optimizer uses to lie',
'when asking for more than one point, ',
'no effect if n_point is one (default: %(default)d).'),
default=constants.HYPEROPT_LIE_STRAT),
"spaces": "spaces":
Arg( Arg(
'--spaces', '--spaces',

View File

@ -270,10 +270,13 @@ class Configuration:
logstring='Parameter --effort detected ... ' logstring='Parameter --effort detected ... '
'Parameter --effort detected: {}') 'Parameter --effort detected: {}')
self._args_to_config(config, self._args_to_config(config,
argname='multi_opt', argname='mode',
logstring='Hyperopt will use multiple optimizers ...') logstring='Hyperopt will run in {} mode ...')
self._args_to_config(config, self._args_to_config(config,
argname='points_per_opt', argname='explore',
logstring='Acquisition strategy set to random {}...')
self._args_to_config(config,
argname='n_points',
logstring='Optimizers will be asked for {} points...') logstring='Optimizers will be asked for {} points...')
self._args_to_config(config, self._args_to_config(config,
argname='spaces', argname='spaces',

View File

@ -8,7 +8,9 @@ DEFAULT_EXCHANGE = 'bittrex'
PROCESS_THROTTLE_SECS = 5 # sec PROCESS_THROTTLE_SECS = 5 # sec
HYPEROPT_EPOCH = 0 # epochs HYPEROPT_EPOCH = 0 # epochs
HYPEROPT_EFFORT = 0. # tune max epoch count HYPEROPT_EFFORT = 0. # tune max epoch count
HYPEROPT_POINTS_PER_OPT = 2 # tune iterations between estimations HYPEROPT_N_POINTS = 2 # tune iterations between estimations
HYPEROPT_MODE = 'single'
HYPEROPT_LIE_STRAT = 'default'
RETRY_TIMEOUT = 30 # sec RETRY_TIMEOUT = 30 # sec
DEFAULT_HYPEROPT_LOSS = 'DefaultHyperOptLoss' DEFAULT_HYPEROPT_LOSS = 'DefaultHyperOptLoss'
DEFAULT_DB_PROD_URL = 'sqlite:///tradesv3.sqlite' DEFAULT_DB_PROD_URL = 'sqlite:///tradesv3.sqlite'

View File

@ -97,7 +97,7 @@ class Hyperopt:
# a guessed number extracted by the space dimensions # a guessed number extracted by the space dimensions
self.search_space_size = 0 self.search_space_size = 0
# total number of candles being backtested # total number of candles being backtested
self.n_samples = 0 self.n_candles = 0
self.current_best_loss = VOID_LOSS self.current_best_loss = VOID_LOSS
self.current_best_epoch = 0 self.current_best_epoch = 0
@ -113,37 +113,9 @@ class Hyperopt:
# evaluations # evaluations
self.trials: List = [] self.trials: List = []
# optimizers
self.opts: List[Optimizer] = []
self.opt: Optimizer = None
backend.manager = Manager() # configure multi mode
if 'multi_opt' in self.config and self.config['multi_opt']: self.setup_multi()
self.multi = True
backend.optimizers = backend.manager.Queue()
backend.results_board = backend.manager.Queue(maxsize=1)
backend.results_board.put([])
self.opt_base_estimator = 'GBRT'
self.opt_acq_optimizer = 'sampling'
default_n_points = 2
else:
self.multi = False
backend.results = backend.manager.Queue()
self.opt_base_estimator = 'GP'
self.opt_acq_optimizer = 'lbfgs'
default_n_points = 1
# in single opt assume runs are expensive so default to 1 point per ask
self.n_points = self.config.get('points_per_opt', default_n_points)
# if 0 n_points are given, don't use any base estimator (akin to random search)
if self.n_points < 1:
self.n_points = 1
self.opt_base_estimator = "DUMMY"
self.opt_acq_optimizer = "sampling"
# var used in epochs and batches calculation
self.opt_points = self.n_jobs * self.n_points
# models are only needed for posterior eval
self.n_models = max(16, self.n_jobs)
# Populate functions here (hasattr is slow so should not be run during "regular" operations) # Populate functions here (hasattr is slow so should not be run during "regular" operations)
if hasattr(self.custom_hyperopt, 'populate_indicators'): if hasattr(self.custom_hyperopt, 'populate_indicators'):
@ -174,6 +146,60 @@ class Hyperopt:
self.print_colorized = self.config.get('print_colorized', False) self.print_colorized = self.config.get('print_colorized', False)
self.print_json = self.config.get('print_json', False) self.print_json = self.config.get('print_json', False)
def setup_multi(self):
# optimizers
self.opts: List[Optimizer] = []
self.opt: Optimizer = None
backend.manager = Manager()
self.mode = self.config.get('mode', 'single')
self.shared = False
if self.mode in ('multi', 'shared'):
self.multi = True
if self.mode == 'shared':
self.shared = True
backend.optimizers = backend.manager.Queue()
backend.results_board = backend.manager.Queue(maxsize=1)
backend.results_board.put({})
self.opt_base_estimator = 'GBRT'
self.opt_acq_optimizer = 'sampling'
# in multi opt one model is enough
self.n_models = 1
default_n_points = 2
else:
self.multi = False
backend.results = backend.manager.Queue()
self.opt_base_estimator = 'GP'
self.opt_acq_optimizer = 'lbfgs'
# models are only needed for posterior eval
self.n_models = min(16, self.n_jobs)
default_n_points = 1
# in single opt assume runs are expensive so default to 1 point per ask
self.n_points = self.config.get('n_points', default_n_points)
# if 0 n_points are given, don't use any base estimator (akin to random search)
if self.n_points < 1:
self.n_points = 1
self.opt_base_estimator = "DUMMY"
self.opt_acq_optimizer = "sampling"
if self.n_points < 2:
# ask_points is what is used in the ask call
# because when n_points is None, it doesn't
# waste time generating new points
self.ask_points = None
else:
self.ask_points = self.n_points
# var used in epochs and batches calculation
self.opt_points = self.n_jobs * (self.n_points or 1)
# lie strategy
lie_strat = self.config.get('lie_strat', 'default')
if lie_strat == 'default':
self.lie_strat = lambda: 'cl_min'
elif lie_strat == 'random':
self.lie_strat = self.get_next_point_strategy
else:
self.lie_strat = lambda: lie_strat
@staticmethod @staticmethod
def get_lock_filename(config: Dict[str, Any]) -> str: def get_lock_filename(config: Dict[str, Any]) -> str:
@ -631,7 +657,7 @@ class Hyperopt:
del vals[:], void_filtered[:] del vals[:], void_filtered[:]
if not to_ask: if not to_ask:
to_ask.extend(opt.ask(n_points=self.n_points)) to_ask.extend(opt.ask(n_points=self.n_points, strategy=self.lie_strat()))
a = tuple(to_ask.popleft()) a = tuple(to_ask.popleft())
while a in evald: while a in evald:
logger.info("this point was evaluated before...") logger.info("this point was evaluated before...")
@ -642,55 +668,70 @@ class Hyperopt:
evald.add(a) evald.add(a)
yield a yield a
@staticmethod
def opt_get_past_points(asked: dict, results_board: Queue) -> dict:
""" fetch shared results between optimizers """
results = results_board.get()
results_board.put(results)
for a in asked:
if a in results:
asked[a] = results[a]
return asked
def parallel_opt_objective(self, n: int, optimizers: Queue, jobs: int, results_board: Queue): def parallel_opt_objective(self, n: int, optimizers: Queue, jobs: int, results_board: Queue):
""" """
objective run in multi opt mode, optimizers share the results as soon as they are completed objective run in multi opt mode, optimizers share the results as soon as they are completed
""" """
self.log_results_immediate(n) self.log_results_immediate(n)
# fetch an optimizer instance is_shared = self.shared
# get an optimizer instance
opt = optimizers.get() opt = optimizers.get()
# tell new points if any
results = results_board.get() if is_shared:
past_Xi = [] # get a random number before putting it back to avoid
past_yi = [] # replication with other workers
for idx, res in enumerate(results): rand = opt.rng.randint(0, VOID_LOSS)
unsubscribe = False optimizers.put(opt)
vals = res[0] # res[1] is the counter # switch the seed to get a different point
for v in vals: opt.rng.seed(rand)
if list(v['params_dict'].values()) not in opt.Xi: opt.update_next()
past_Xi.append(list(v['params_dict'].values()))
past_yi.append(v['loss'])
# decrease counter
if not unsubscribe:
unsubscribe = True
if unsubscribe:
results[idx][1] -= 1
if results[idx][1] < 1:
del results[idx]
# put back the updated results
results_board.put(results)
if len(past_Xi) > 0:
opt.tell(past_Xi, past_yi, fit=True)
# ask for points according to config # ask for points according to config
asked = opt.ask(n_points=self.n_points, strategy=self.get_next_point_strategy()) asked = opt.ask(n_points=self.ask_points, strategy=self.lie_strat())
# run the backtest for each point # check if some points have been evaluated by other optimizers
f_val = [self.backtest_params(e) for e in asked] p_asked = self.opt_get_past_points({tuple(a): None for a in asked}, results_board)
Xi_d = [] # done
Xi_t = [] # to do
for a in p_asked:
if p_asked[a] is not None:
Xi_d.append(a)
else:
Xi_t.append(a)
# run the backtest for each point to do (Xi_t)
f_val = [self.backtest_params(a) for a in Xi_t]
# filter losses # filter losses
void_filtered = self.filter_void_losses(f_val, opt) void_filtered = self.filter_void_losses(f_val, opt)
# tell the optimizer the results # add points of the current dispatch if any
if opt.void_loss != VOID_LOSS or len(void_filtered) > 0: if opt.void_loss != VOID_LOSS or len(void_filtered) > 0:
Xi = [list(v['params_dict'].values()) for v in void_filtered] Xi = [*Xi_d, *[list(v['params_dict'].values()) for v in void_filtered]]
yi = [v['loss'] for v in void_filtered] yi = [*[p_asked[a] for a in Xi_d], *[v['loss'] for v in void_filtered]]
# because we fit with points from other runs void = False
# only fit if at the current dispatch there were no points if is_shared:
opt.tell(Xi, yi, fit=(len(past_Xi) < 1)) # refresh the optimizer that stores all the points
# update the board with the new results opt = optimizers.get()
results = results_board.get() opt.tell(Xi, yi, fit=False)
results.append([void_filtered, jobs - 1]) else:
results_board.put(results) void = True
# send back the updated optimizer if not void or not is_shared:
# send back the updated optimizer only in non shared mode
# because in shared mode if all results are void we don't
# fetch it at all
optimizers.put(opt) optimizers.put(opt)
# update the board used to skip already computed points
results = results_board.get()
for v in void_filtered:
results[tuple(v['params_dict'].values())] = v['loss']
results_board.put(results)
return void_filtered return void_filtered
def parallel_objective(self, asked, results: Queue, n=0): def parallel_objective(self, asked, results: Queue, n=0):
@ -839,14 +880,19 @@ class Hyperopt:
""" Setup the optimizers objects, try to load from disk, or create new ones """ """ Setup the optimizers objects, try to load from disk, or create new ones """
# try to load previous optimizers # try to load previous optimizers
opts = self.load_previous_optimizers(self.opts_file) opts = self.load_previous_optimizers(self.opts_file)
n_opts = len(opts)
max_opts = self.n_jobs
if self.multi: if self.multi:
if len(opts) > 0: # when sharing results there is only one optimizer that gets copied
# put the restored optimizers in the queue and clear them from the object if self.shared:
for opt in opts: max_opts = 1
backend.optimizers.put(opt) # put the restored optimizers in the queue
if n_opts > 0:
for n in range(n_opts):
backend.optimizers.put(opts[n])
# generate as many optimizers as are still needed to fill the job count # generate as many optimizers as are still needed to fill the job count
remaining = self.n_jobs - backend.optimizers.qsize() remaining = max_opts - backend.optimizers.qsize()
if remaining > 0: if remaining > 0:
opt = self.get_optimizer(self.dimensions, self.n_jobs, self.opt_n_initial_points) opt = self.get_optimizer(self.dimensions, self.n_jobs, self.opt_n_initial_points)
for _ in range(remaining): # generate optimizers for _ in range(remaining): # generate optimizers
@ -859,7 +905,7 @@ class Hyperopt:
else: else:
# if we have more than 1 optimizer but are using single opt, # if we have more than 1 optimizer but are using single opt,
# pick one discard the rest... # pick one discard the rest...
if len(opts) > 0: if n_opts > 0:
self.opt = opts[-1] self.opt = opts[-1]
else: else:
self.opt = self.get_optimizer( self.opt = self.get_optimizer(
@ -897,7 +943,7 @@ class Hyperopt:
# Trim startup period from analyzed dataframe # Trim startup period from analyzed dataframe
for pair, df in preprocessed.items(): for pair, df in preprocessed.items():
preprocessed[pair] = trim_dataframe(df, timerange) preprocessed[pair] = trim_dataframe(df, timerange)
self.n_samples += len(preprocessed[pair]) self.n_candles += len(preprocessed[pair])
min_date, max_date = get_timerange(data) min_date, max_date = get_timerange(data)
logger.info( logger.info(
@ -944,7 +990,8 @@ class Hyperopt:
# if self.multi: batch_len = batch_len // self.n_points # if self.multi: batch_len = batch_len // self.n_points
# don't go over the limit # don't go over the limit
if epochs_so_far + batch_len * n_points > epochs_limit(): if epochs_so_far + batch_len * n_points > epochs_limit():
batch_len = (epochs_limit() - epochs_so_far) // n_points q, r = divmod(epochs_limit() - epochs_so_far, n_points)
batch_len = q + r
print( print(
f"{epochs_so_far+1}-{epochs_so_far+batch_len*n_points}" f"{epochs_so_far+1}-{epochs_so_far+batch_len*n_points}"
f"/{epochs_limit()}: ", f"/{epochs_limit()}: ",
@ -952,8 +999,8 @@ class Hyperopt:
f_val = jobs_scheduler(parallel, batch_len, epochs_so_far, self.n_jobs) f_val = jobs_scheduler(parallel, batch_len, epochs_so_far, self.n_jobs)
saved = self.log_results(f_val, epochs_so_far, epochs_limit()) saved = self.log_results(f_val, epochs_so_far, epochs_limit())
# stop if no epochs have been evaluated # stop if no epochs have been evaluated
if len(f_val) < 1: if len(f_val) < batch_len:
logger.warning("All epochs evaluated were void, " logger.warning("Some evaluated epochs were void, "
"check the loss function and the search space.") "check the loss function and the search space.")
if (not saved and len(f_val) > 1) or batch_len < 1: if (not saved and len(f_val) > 1) or batch_len < 1:
break break

View File

@ -7,7 +7,7 @@ manager: SyncManager
# stores the optimizers in multi opt mode # stores the optimizers in multi opt mode
optimizers: Queue optimizers: Queue
# stores a list of the results to share between optimizers # stores a list of the results to share between optimizers
# each result is a tuple of the params_dict and a decreasing counter # in the form of dict[tuple(Xi)] = yi
results_board: Queue results_board: Queue
# store the results in single opt mode # store the results in single opt mode
results: Queue results: Queue