fix monitor bug, set default values in case user doesnt set params
This commit is contained in:
		| @@ -42,7 +42,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|         self.eval_callback: EvalCallback = None |         self.eval_callback: EvalCallback = None | ||||||
|         self.model_type = self.freqai_info['rl_config']['model_type'] |         self.model_type = self.freqai_info['rl_config']['model_type'] | ||||||
|         self.rl_config = self.freqai_info['rl_config'] |         self.rl_config = self.freqai_info['rl_config'] | ||||||
|         self.continual_retraining = self.rl_config['continual_retraining'] |         self.continual_retraining = self.rl_config.get('continual_retraining', False) | ||||||
|         if self.model_type in SB3_MODELS: |         if self.model_type in SB3_MODELS: | ||||||
|             import_str = 'stable_baselines3' |             import_str = 'stable_baselines3' | ||||||
|         elif self.model_type in SB3_CONTRIB_MODELS: |         elif self.model_type in SB3_CONTRIB_MODELS: | ||||||
| @@ -289,7 +289,7 @@ class MyRLEnv(Base5ActionRLEnv): | |||||||
|             return 0. |             return 0. | ||||||
|  |  | ||||||
|         pnl = self.get_unrealized_profit() |         pnl = self.get_unrealized_profit() | ||||||
|         max_trade_duration = self.rl_config['max_trade_duration_candles'] |         max_trade_duration = self.rl_config.get('max_trade_duration_candles', 100) | ||||||
|         trade_duration = self._current_tick - self._last_trade_tick |         trade_duration = self._current_tick - self._last_trade_tick | ||||||
|  |  | ||||||
|         factor = 1 |         factor = 1 | ||||||
|   | |||||||
| @@ -32,6 +32,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): | |||||||
|             logger.info('Continual training activated - starting training from previously ' |             logger.info('Continual training activated - starting training from previously ' | ||||||
|                         'trained agent.') |                         'trained agent.') | ||||||
|             model = self.dd.model_dictionary[dk.pair] |             model = self.dd.model_dictionary[dk.pair] | ||||||
|  |             model.tensorboard_log = Path(dk.data_path / "tensorboard") | ||||||
|             model.set_env(self.train_env) |             model.set_env(self.train_env) | ||||||
|  |  | ||||||
|         model.learn( |         model.learn( | ||||||
| @@ -61,7 +62,7 @@ class MyRLEnv(Base5ActionRLEnv): | |||||||
|             return 0. |             return 0. | ||||||
|  |  | ||||||
|         pnl = self.get_unrealized_profit() |         pnl = self.get_unrealized_profit() | ||||||
|         max_trade_duration = self.rl_config['max_trade_duration_candles'] |         max_trade_duration = self.rl_config.get('max_trade_duration_candles', 100) | ||||||
|         trade_duration = self._current_tick - self._last_trade_tick |         trade_duration = self._current_tick - self._last_trade_tick | ||||||
|  |  | ||||||
|         factor = 1 |         factor = 1 | ||||||
|   | |||||||
| @@ -26,10 +26,10 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): | |||||||
|  |  | ||||||
|         # model arch |         # model arch | ||||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, |         policy_kwargs = dict(activation_fn=th.nn.ReLU, | ||||||
|                              net_arch=[512, 512, 512]) |                              net_arch=[512, 512, 256]) | ||||||
|  |  | ||||||
|         model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, |         model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, | ||||||
|                                 tensorboard_log=Path(dk.data_path / "tensorboard"), |                                 tensorboard_log=Path(dk.full_path / "tensorboard"), | ||||||
|                                 **self.freqai_info['model_training_parameters'] |                                 **self.freqai_info['model_training_parameters'] | ||||||
|                                 ) |                                 ) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user