Optimize only new points

Enforce points returned from `self.opt.ask` have not been already evaluated
This commit is contained in:
Italo 2022-03-20 16:02:03 +00:00
parent fcec071a08
commit e16bb1b34e

View File

@ -410,6 +410,35 @@ class Hyperopt:
# Store non-trimmed data - will be trimmed after signal generation. # Store non-trimmed data - will be trimmed after signal generation.
dump(preprocessed, self.data_pickle_file) dump(preprocessed, self.data_pickle_file)
def get_asked_points(self, n_points: int) -> List[List[Any]]:
'''
Enforce points returned from `self.opt.ask` have not been already evaluated
Steps:
1. Try to get points using `self.opt.ask` first
2. Discard the points that have already been evaluated
3. Retry using `self.opt.ask` up to 3 times
4. If still some points are missing in respect to `n_points`, random sample some points
5. Repeat until at least `n_points` points in the `asked_non_tried` list
6. Return a list with legth truncated at `n_points`
'''
i = 0
asked_non_tried: List[List[Any]] = []
while i < 100:
if len(asked_non_tried) < n_points:
if i < 3:
asked = self.opt.ask(n_points=n_points)
else:
# use random sample if `self.opt.ask` returns points points already tried
asked = self.opt.space.rvs(n_samples=n_points * 5)
asked_non_tried += [x for x in asked
if x not in self.opt.Xi
and x not in asked_non_tried]
i += 1
else:
break
return asked_non_tried[:n_points]
def start(self) -> None: def start(self) -> None:
self.random_state = self._set_random_state(self.config.get('hyperopt_random_state', None)) self.random_state = self._set_random_state(self.config.get('hyperopt_random_state', None))
logger.info(f"Using optimizer random state: {self.random_state}") logger.info(f"Using optimizer random state: {self.random_state}")
@ -474,7 +503,7 @@ class Hyperopt:
n_rest = (i + 1) * jobs - self.total_epochs n_rest = (i + 1) * jobs - self.total_epochs
current_jobs = jobs - n_rest if n_rest > 0 else jobs current_jobs = jobs - n_rest if n_rest > 0 else jobs
asked = self.opt.ask(n_points=current_jobs) asked = self.get_asked_points(n_points=current_jobs)
f_val = self.run_optimizer_parallel(parallel, asked, i) f_val = self.run_optimizer_parallel(parallel, asked, i)
self.opt.tell(asked, [v['loss'] for v in f_val]) self.opt.tell(asked, [v['loss'] for v in f_val])