hyperopt shared mode
- shared mode uses one optimizer with shared results - multi mode runs as many optimizers as jobs and results are only shared on ask - a flag to override the strategy when asking more points (--lie-strat) - make sure to ask with n_points `None` to avoid computing more points than needed in shared mode - reduce n of models to 1 in multi mode - don't load more than the specified number of jobs when loading previous optimizers - stretch the batch length to reach the epochs limit - a warning for when no epochs are logged
This commit is contained in:
parent
ef6efb7117
commit
a5b44de0f6
@ -26,7 +26,7 @@ ARGS_HYPEROPT = ARGS_COMMON_OPTIMIZE + [
|
|||||||
"hyperopt", "hyperopt_path", "position_stacking", "epochs", "spaces",
|
"hyperopt", "hyperopt_path", "position_stacking", "epochs", "spaces",
|
||||||
"use_max_market_positions", "print_all", "print_colorized", "print_json", "hyperopt_jobs",
|
"use_max_market_positions", "print_all", "print_colorized", "print_json", "hyperopt_jobs",
|
||||||
"hyperopt_random_state", "hyperopt_min_trades", "hyperopt_continue", "hyperopt_loss", "effort",
|
"hyperopt_random_state", "hyperopt_min_trades", "hyperopt_continue", "hyperopt_loss", "effort",
|
||||||
"multi_opt", "points_per_opt"
|
"mode", "n_points", "lie_strat"
|
||||||
]
|
]
|
||||||
|
|
||||||
ARGS_EDGE = ARGS_COMMON_OPTIMIZE + ["stoploss_range"]
|
ARGS_EDGE = ARGS_COMMON_OPTIMIZE + ["stoploss_range"]
|
||||||
|
@ -204,20 +204,26 @@ AVAILABLE_CLI_OPTIONS = {
|
|||||||
metavar='FLOAT',
|
metavar='FLOAT',
|
||||||
default=constants.HYPEROPT_EFFORT,
|
default=constants.HYPEROPT_EFFORT,
|
||||||
),
|
),
|
||||||
"multi_opt":
|
"mode":
|
||||||
Arg('--multi',
|
Arg('--mode',
|
||||||
help=('Switches hyperopt to use one optimizer per job, use it',
|
help=('Switches hyperopt to use one optimizer per job, use it',
|
||||||
'when backtesting iterations are cheap (default: %(default)d).'),
|
'when backtesting iterations are cheap (default: %(default)d).'),
|
||||||
action='store_true',
|
metavar='NAME',
|
||||||
default=False),
|
default=constants.HYPEROPT_MODE),
|
||||||
"points_per_opt":
|
"n_points":
|
||||||
Arg('--points-per-opt',
|
Arg('--n-points',
|
||||||
help=('Controls how many points to ask at each job dispatch to each',
|
help=('Controls how many points to ask to the optimizer',
|
||||||
'optimizer in multi opt mode, increase if cpu usage of each core',
|
'increase if cpu usage of each core',
|
||||||
'appears low (default: %(default)d).'),
|
'appears low (default: %(default)d).'),
|
||||||
type=int,
|
type=int,
|
||||||
metavar='INT',
|
metavar='INT',
|
||||||
default=constants.HYPEROPT_POINTS_PER_OPT),
|
default=constants.HYPEROPT_N_POINTS),
|
||||||
|
"lie_strat":
|
||||||
|
Arg('--lie-strat',
|
||||||
|
help=('Sets the strategy that the optimizer uses to lie',
|
||||||
|
'when asking for more than one point, ',
|
||||||
|
'no effect if n_point is one (default: %(default)d).'),
|
||||||
|
default=constants.HYPEROPT_LIE_STRAT),
|
||||||
"spaces":
|
"spaces":
|
||||||
Arg(
|
Arg(
|
||||||
'--spaces',
|
'--spaces',
|
||||||
|
@ -270,10 +270,13 @@ class Configuration:
|
|||||||
logstring='Parameter --effort detected ... '
|
logstring='Parameter --effort detected ... '
|
||||||
'Parameter --effort detected: {}')
|
'Parameter --effort detected: {}')
|
||||||
self._args_to_config(config,
|
self._args_to_config(config,
|
||||||
argname='multi_opt',
|
argname='mode',
|
||||||
logstring='Hyperopt will use multiple optimizers ...')
|
logstring='Hyperopt will run in {} mode ...')
|
||||||
self._args_to_config(config,
|
self._args_to_config(config,
|
||||||
argname='points_per_opt',
|
argname='explore',
|
||||||
|
logstring='Acquisition strategy set to random {}...')
|
||||||
|
self._args_to_config(config,
|
||||||
|
argname='n_points',
|
||||||
logstring='Optimizers will be asked for {} points...')
|
logstring='Optimizers will be asked for {} points...')
|
||||||
self._args_to_config(config,
|
self._args_to_config(config,
|
||||||
argname='spaces',
|
argname='spaces',
|
||||||
|
@ -8,7 +8,9 @@ DEFAULT_EXCHANGE = 'bittrex'
|
|||||||
PROCESS_THROTTLE_SECS = 5 # sec
|
PROCESS_THROTTLE_SECS = 5 # sec
|
||||||
HYPEROPT_EPOCH = 0 # epochs
|
HYPEROPT_EPOCH = 0 # epochs
|
||||||
HYPEROPT_EFFORT = 0. # tune max epoch count
|
HYPEROPT_EFFORT = 0. # tune max epoch count
|
||||||
HYPEROPT_POINTS_PER_OPT = 2 # tune iterations between estimations
|
HYPEROPT_N_POINTS = 2 # tune iterations between estimations
|
||||||
|
HYPEROPT_MODE = 'single'
|
||||||
|
HYPEROPT_LIE_STRAT = 'default'
|
||||||
RETRY_TIMEOUT = 30 # sec
|
RETRY_TIMEOUT = 30 # sec
|
||||||
DEFAULT_HYPEROPT_LOSS = 'DefaultHyperOptLoss'
|
DEFAULT_HYPEROPT_LOSS = 'DefaultHyperOptLoss'
|
||||||
DEFAULT_DB_PROD_URL = 'sqlite:///tradesv3.sqlite'
|
DEFAULT_DB_PROD_URL = 'sqlite:///tradesv3.sqlite'
|
||||||
|
@ -97,7 +97,7 @@ class Hyperopt:
|
|||||||
# a guessed number extracted by the space dimensions
|
# a guessed number extracted by the space dimensions
|
||||||
self.search_space_size = 0
|
self.search_space_size = 0
|
||||||
# total number of candles being backtested
|
# total number of candles being backtested
|
||||||
self.n_samples = 0
|
self.n_candles = 0
|
||||||
|
|
||||||
self.current_best_loss = VOID_LOSS
|
self.current_best_loss = VOID_LOSS
|
||||||
self.current_best_epoch = 0
|
self.current_best_epoch = 0
|
||||||
@ -113,37 +113,9 @@ class Hyperopt:
|
|||||||
|
|
||||||
# evaluations
|
# evaluations
|
||||||
self.trials: List = []
|
self.trials: List = []
|
||||||
# optimizers
|
|
||||||
self.opts: List[Optimizer] = []
|
|
||||||
self.opt: Optimizer = None
|
|
||||||
|
|
||||||
backend.manager = Manager()
|
# configure multi mode
|
||||||
if 'multi_opt' in self.config and self.config['multi_opt']:
|
self.setup_multi()
|
||||||
self.multi = True
|
|
||||||
backend.optimizers = backend.manager.Queue()
|
|
||||||
backend.results_board = backend.manager.Queue(maxsize=1)
|
|
||||||
backend.results_board.put([])
|
|
||||||
self.opt_base_estimator = 'GBRT'
|
|
||||||
self.opt_acq_optimizer = 'sampling'
|
|
||||||
default_n_points = 2
|
|
||||||
else:
|
|
||||||
self.multi = False
|
|
||||||
backend.results = backend.manager.Queue()
|
|
||||||
self.opt_base_estimator = 'GP'
|
|
||||||
self.opt_acq_optimizer = 'lbfgs'
|
|
||||||
default_n_points = 1
|
|
||||||
|
|
||||||
# in single opt assume runs are expensive so default to 1 point per ask
|
|
||||||
self.n_points = self.config.get('points_per_opt', default_n_points)
|
|
||||||
# if 0 n_points are given, don't use any base estimator (akin to random search)
|
|
||||||
if self.n_points < 1:
|
|
||||||
self.n_points = 1
|
|
||||||
self.opt_base_estimator = "DUMMY"
|
|
||||||
self.opt_acq_optimizer = "sampling"
|
|
||||||
# var used in epochs and batches calculation
|
|
||||||
self.opt_points = self.n_jobs * self.n_points
|
|
||||||
# models are only needed for posterior eval
|
|
||||||
self.n_models = max(16, self.n_jobs)
|
|
||||||
|
|
||||||
# Populate functions here (hasattr is slow so should not be run during "regular" operations)
|
# Populate functions here (hasattr is slow so should not be run during "regular" operations)
|
||||||
if hasattr(self.custom_hyperopt, 'populate_indicators'):
|
if hasattr(self.custom_hyperopt, 'populate_indicators'):
|
||||||
@ -174,6 +146,60 @@ class Hyperopt:
|
|||||||
self.print_colorized = self.config.get('print_colorized', False)
|
self.print_colorized = self.config.get('print_colorized', False)
|
||||||
self.print_json = self.config.get('print_json', False)
|
self.print_json = self.config.get('print_json', False)
|
||||||
|
|
||||||
|
def setup_multi(self):
|
||||||
|
# optimizers
|
||||||
|
self.opts: List[Optimizer] = []
|
||||||
|
self.opt: Optimizer = None
|
||||||
|
|
||||||
|
backend.manager = Manager()
|
||||||
|
self.mode = self.config.get('mode', 'single')
|
||||||
|
self.shared = False
|
||||||
|
if self.mode in ('multi', 'shared'):
|
||||||
|
self.multi = True
|
||||||
|
if self.mode == 'shared':
|
||||||
|
self.shared = True
|
||||||
|
backend.optimizers = backend.manager.Queue()
|
||||||
|
backend.results_board = backend.manager.Queue(maxsize=1)
|
||||||
|
backend.results_board.put({})
|
||||||
|
self.opt_base_estimator = 'GBRT'
|
||||||
|
self.opt_acq_optimizer = 'sampling'
|
||||||
|
# in multi opt one model is enough
|
||||||
|
self.n_models = 1
|
||||||
|
default_n_points = 2
|
||||||
|
else:
|
||||||
|
self.multi = False
|
||||||
|
backend.results = backend.manager.Queue()
|
||||||
|
self.opt_base_estimator = 'GP'
|
||||||
|
self.opt_acq_optimizer = 'lbfgs'
|
||||||
|
# models are only needed for posterior eval
|
||||||
|
self.n_models = min(16, self.n_jobs)
|
||||||
|
default_n_points = 1
|
||||||
|
|
||||||
|
# in single opt assume runs are expensive so default to 1 point per ask
|
||||||
|
self.n_points = self.config.get('n_points', default_n_points)
|
||||||
|
# if 0 n_points are given, don't use any base estimator (akin to random search)
|
||||||
|
if self.n_points < 1:
|
||||||
|
self.n_points = 1
|
||||||
|
self.opt_base_estimator = "DUMMY"
|
||||||
|
self.opt_acq_optimizer = "sampling"
|
||||||
|
if self.n_points < 2:
|
||||||
|
# ask_points is what is used in the ask call
|
||||||
|
# because when n_points is None, it doesn't
|
||||||
|
# waste time generating new points
|
||||||
|
self.ask_points = None
|
||||||
|
else:
|
||||||
|
self.ask_points = self.n_points
|
||||||
|
# var used in epochs and batches calculation
|
||||||
|
self.opt_points = self.n_jobs * (self.n_points or 1)
|
||||||
|
# lie strategy
|
||||||
|
lie_strat = self.config.get('lie_strat', 'default')
|
||||||
|
if lie_strat == 'default':
|
||||||
|
self.lie_strat = lambda: 'cl_min'
|
||||||
|
elif lie_strat == 'random':
|
||||||
|
self.lie_strat = self.get_next_point_strategy
|
||||||
|
else:
|
||||||
|
self.lie_strat = lambda: lie_strat
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_lock_filename(config: Dict[str, Any]) -> str:
|
def get_lock_filename(config: Dict[str, Any]) -> str:
|
||||||
|
|
||||||
@ -631,7 +657,7 @@ class Hyperopt:
|
|||||||
del vals[:], void_filtered[:]
|
del vals[:], void_filtered[:]
|
||||||
|
|
||||||
if not to_ask:
|
if not to_ask:
|
||||||
to_ask.extend(opt.ask(n_points=self.n_points))
|
to_ask.extend(opt.ask(n_points=self.n_points, strategy=self.lie_strat()))
|
||||||
a = tuple(to_ask.popleft())
|
a = tuple(to_ask.popleft())
|
||||||
while a in evald:
|
while a in evald:
|
||||||
logger.info("this point was evaluated before...")
|
logger.info("this point was evaluated before...")
|
||||||
@ -642,55 +668,70 @@ class Hyperopt:
|
|||||||
evald.add(a)
|
evald.add(a)
|
||||||
yield a
|
yield a
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def opt_get_past_points(asked: dict, results_board: Queue) -> dict:
|
||||||
|
""" fetch shared results between optimizers """
|
||||||
|
results = results_board.get()
|
||||||
|
results_board.put(results)
|
||||||
|
for a in asked:
|
||||||
|
if a in results:
|
||||||
|
asked[a] = results[a]
|
||||||
|
return asked
|
||||||
|
|
||||||
def parallel_opt_objective(self, n: int, optimizers: Queue, jobs: int, results_board: Queue):
|
def parallel_opt_objective(self, n: int, optimizers: Queue, jobs: int, results_board: Queue):
|
||||||
"""
|
"""
|
||||||
objective run in multi opt mode, optimizers share the results as soon as they are completed
|
objective run in multi opt mode, optimizers share the results as soon as they are completed
|
||||||
"""
|
"""
|
||||||
self.log_results_immediate(n)
|
self.log_results_immediate(n)
|
||||||
# fetch an optimizer instance
|
is_shared = self.shared
|
||||||
|
# get an optimizer instance
|
||||||
opt = optimizers.get()
|
opt = optimizers.get()
|
||||||
# tell new points if any
|
|
||||||
results = results_board.get()
|
if is_shared:
|
||||||
past_Xi = []
|
# get a random number before putting it back to avoid
|
||||||
past_yi = []
|
# replication with other workers
|
||||||
for idx, res in enumerate(results):
|
rand = opt.rng.randint(0, VOID_LOSS)
|
||||||
unsubscribe = False
|
optimizers.put(opt)
|
||||||
vals = res[0] # res[1] is the counter
|
# switch the seed to get a different point
|
||||||
for v in vals:
|
opt.rng.seed(rand)
|
||||||
if list(v['params_dict'].values()) not in opt.Xi:
|
opt.update_next()
|
||||||
past_Xi.append(list(v['params_dict'].values()))
|
|
||||||
past_yi.append(v['loss'])
|
|
||||||
# decrease counter
|
|
||||||
if not unsubscribe:
|
|
||||||
unsubscribe = True
|
|
||||||
if unsubscribe:
|
|
||||||
results[idx][1] -= 1
|
|
||||||
if results[idx][1] < 1:
|
|
||||||
del results[idx]
|
|
||||||
# put back the updated results
|
|
||||||
results_board.put(results)
|
|
||||||
if len(past_Xi) > 0:
|
|
||||||
opt.tell(past_Xi, past_yi, fit=True)
|
|
||||||
|
|
||||||
# ask for points according to config
|
# ask for points according to config
|
||||||
asked = opt.ask(n_points=self.n_points, strategy=self.get_next_point_strategy())
|
asked = opt.ask(n_points=self.ask_points, strategy=self.lie_strat())
|
||||||
# run the backtest for each point
|
# check if some points have been evaluated by other optimizers
|
||||||
f_val = [self.backtest_params(e) for e in asked]
|
p_asked = self.opt_get_past_points({tuple(a): None for a in asked}, results_board)
|
||||||
|
Xi_d = [] # done
|
||||||
|
Xi_t = [] # to do
|
||||||
|
for a in p_asked:
|
||||||
|
if p_asked[a] is not None:
|
||||||
|
Xi_d.append(a)
|
||||||
|
else:
|
||||||
|
Xi_t.append(a)
|
||||||
|
# run the backtest for each point to do (Xi_t)
|
||||||
|
f_val = [self.backtest_params(a) for a in Xi_t]
|
||||||
# filter losses
|
# filter losses
|
||||||
void_filtered = self.filter_void_losses(f_val, opt)
|
void_filtered = self.filter_void_losses(f_val, opt)
|
||||||
# tell the optimizer the results
|
# add points of the current dispatch if any
|
||||||
if opt.void_loss != VOID_LOSS or len(void_filtered) > 0:
|
if opt.void_loss != VOID_LOSS or len(void_filtered) > 0:
|
||||||
Xi = [list(v['params_dict'].values()) for v in void_filtered]
|
Xi = [*Xi_d, *[list(v['params_dict'].values()) for v in void_filtered]]
|
||||||
yi = [v['loss'] for v in void_filtered]
|
yi = [*[p_asked[a] for a in Xi_d], *[v['loss'] for v in void_filtered]]
|
||||||
# because we fit with points from other runs
|
void = False
|
||||||
# only fit if at the current dispatch there were no points
|
if is_shared:
|
||||||
opt.tell(Xi, yi, fit=(len(past_Xi) < 1))
|
# refresh the optimizer that stores all the points
|
||||||
# update the board with the new results
|
opt = optimizers.get()
|
||||||
results = results_board.get()
|
opt.tell(Xi, yi, fit=False)
|
||||||
results.append([void_filtered, jobs - 1])
|
else:
|
||||||
results_board.put(results)
|
void = True
|
||||||
# send back the updated optimizer
|
if not void or not is_shared:
|
||||||
|
# send back the updated optimizer only in non shared mode
|
||||||
|
# because in shared mode if all results are void we don't
|
||||||
|
# fetch it at all
|
||||||
optimizers.put(opt)
|
optimizers.put(opt)
|
||||||
|
# update the board used to skip already computed points
|
||||||
|
results = results_board.get()
|
||||||
|
for v in void_filtered:
|
||||||
|
results[tuple(v['params_dict'].values())] = v['loss']
|
||||||
|
results_board.put(results)
|
||||||
return void_filtered
|
return void_filtered
|
||||||
|
|
||||||
def parallel_objective(self, asked, results: Queue, n=0):
|
def parallel_objective(self, asked, results: Queue, n=0):
|
||||||
@ -839,14 +880,19 @@ class Hyperopt:
|
|||||||
""" Setup the optimizers objects, try to load from disk, or create new ones """
|
""" Setup the optimizers objects, try to load from disk, or create new ones """
|
||||||
# try to load previous optimizers
|
# try to load previous optimizers
|
||||||
opts = self.load_previous_optimizers(self.opts_file)
|
opts = self.load_previous_optimizers(self.opts_file)
|
||||||
|
n_opts = len(opts)
|
||||||
|
max_opts = self.n_jobs
|
||||||
|
|
||||||
if self.multi:
|
if self.multi:
|
||||||
if len(opts) > 0:
|
# when sharing results there is only one optimizer that gets copied
|
||||||
# put the restored optimizers in the queue and clear them from the object
|
if self.shared:
|
||||||
for opt in opts:
|
max_opts = 1
|
||||||
backend.optimizers.put(opt)
|
# put the restored optimizers in the queue
|
||||||
|
if n_opts > 0:
|
||||||
|
for n in range(n_opts):
|
||||||
|
backend.optimizers.put(opts[n])
|
||||||
# generate as many optimizers as are still needed to fill the job count
|
# generate as many optimizers as are still needed to fill the job count
|
||||||
remaining = self.n_jobs - backend.optimizers.qsize()
|
remaining = max_opts - backend.optimizers.qsize()
|
||||||
if remaining > 0:
|
if remaining > 0:
|
||||||
opt = self.get_optimizer(self.dimensions, self.n_jobs, self.opt_n_initial_points)
|
opt = self.get_optimizer(self.dimensions, self.n_jobs, self.opt_n_initial_points)
|
||||||
for _ in range(remaining): # generate optimizers
|
for _ in range(remaining): # generate optimizers
|
||||||
@ -859,7 +905,7 @@ class Hyperopt:
|
|||||||
else:
|
else:
|
||||||
# if we have more than 1 optimizer but are using single opt,
|
# if we have more than 1 optimizer but are using single opt,
|
||||||
# pick one discard the rest...
|
# pick one discard the rest...
|
||||||
if len(opts) > 0:
|
if n_opts > 0:
|
||||||
self.opt = opts[-1]
|
self.opt = opts[-1]
|
||||||
else:
|
else:
|
||||||
self.opt = self.get_optimizer(
|
self.opt = self.get_optimizer(
|
||||||
@ -897,7 +943,7 @@ class Hyperopt:
|
|||||||
# Trim startup period from analyzed dataframe
|
# Trim startup period from analyzed dataframe
|
||||||
for pair, df in preprocessed.items():
|
for pair, df in preprocessed.items():
|
||||||
preprocessed[pair] = trim_dataframe(df, timerange)
|
preprocessed[pair] = trim_dataframe(df, timerange)
|
||||||
self.n_samples += len(preprocessed[pair])
|
self.n_candles += len(preprocessed[pair])
|
||||||
min_date, max_date = get_timerange(data)
|
min_date, max_date = get_timerange(data)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
@ -944,7 +990,8 @@ class Hyperopt:
|
|||||||
# if self.multi: batch_len = batch_len // self.n_points
|
# if self.multi: batch_len = batch_len // self.n_points
|
||||||
# don't go over the limit
|
# don't go over the limit
|
||||||
if epochs_so_far + batch_len * n_points > epochs_limit():
|
if epochs_so_far + batch_len * n_points > epochs_limit():
|
||||||
batch_len = (epochs_limit() - epochs_so_far) // n_points
|
q, r = divmod(epochs_limit() - epochs_so_far, n_points)
|
||||||
|
batch_len = q + r
|
||||||
print(
|
print(
|
||||||
f"{epochs_so_far+1}-{epochs_so_far+batch_len*n_points}"
|
f"{epochs_so_far+1}-{epochs_so_far+batch_len*n_points}"
|
||||||
f"/{epochs_limit()}: ",
|
f"/{epochs_limit()}: ",
|
||||||
@ -952,8 +999,8 @@ class Hyperopt:
|
|||||||
f_val = jobs_scheduler(parallel, batch_len, epochs_so_far, self.n_jobs)
|
f_val = jobs_scheduler(parallel, batch_len, epochs_so_far, self.n_jobs)
|
||||||
saved = self.log_results(f_val, epochs_so_far, epochs_limit())
|
saved = self.log_results(f_val, epochs_so_far, epochs_limit())
|
||||||
# stop if no epochs have been evaluated
|
# stop if no epochs have been evaluated
|
||||||
if len(f_val) < 1:
|
if len(f_val) < batch_len:
|
||||||
logger.warning("All epochs evaluated were void, "
|
logger.warning("Some evaluated epochs were void, "
|
||||||
"check the loss function and the search space.")
|
"check the loss function and the search space.")
|
||||||
if (not saved and len(f_val) > 1) or batch_len < 1:
|
if (not saved and len(f_val) > 1) or batch_len < 1:
|
||||||
break
|
break
|
||||||
|
@ -7,7 +7,7 @@ manager: SyncManager
|
|||||||
# stores the optimizers in multi opt mode
|
# stores the optimizers in multi opt mode
|
||||||
optimizers: Queue
|
optimizers: Queue
|
||||||
# stores a list of the results to share between optimizers
|
# stores a list of the results to share between optimizers
|
||||||
# each result is a tuple of the params_dict and a decreasing counter
|
# in the form of dict[tuple(Xi)] = yi
|
||||||
results_board: Queue
|
results_board: Queue
|
||||||
# store the results in single opt mode
|
# store the results in single opt mode
|
||||||
results: Queue
|
results: Queue
|
||||||
|
Loading…
Reference in New Issue
Block a user