PPO policy change + verbose=1

This commit is contained in:
MukavaValkku 2022-08-15 14:05:01 +03:00 committed by robcaulk
parent 926023935f
commit 13cd18dc9a

View File

@ -53,8 +53,8 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel):
policy_kwargs = dict(activation_fn=th.nn.ReLU, policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256, 128]) net_arch=[256, 256, 128])
model = PPO('MultiInputPolicy', train_env, policy_kwargs=policy_kwargs, model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs,
tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, gamma=0.9 tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, gamma=0.9, verbose=1
) )
model.learn( model.learn(