Merge pull request #7866 from initrv/cleanup-tensorboard-callback

Cleanup tensorboard callback
This commit is contained in:
Robert Caulk
2022-12-13 09:05:46 +01:00
committed by GitHub
6 changed files with 73 additions and 36 deletions

View File

@@ -46,9 +46,9 @@ class Base4ActionRLEnv(BaseEnvironment):
self._done = True
self._update_unrealized_total_profit()
step_reward = self.calculate_reward(action)
self.total_reward += step_reward
self.tensorboard_log(self.actions._member_names_[action])
trade_type = None
if self.is_tradesignal(action):

View File

@@ -49,6 +49,7 @@ class Base5ActionRLEnv(BaseEnvironment):
self._update_unrealized_total_profit()
step_reward = self.calculate_reward(action)
self.total_reward += step_reward
self.tensorboard_log(self.actions._member_names_[action])
trade_type = None
if self.is_tradesignal(action):

View File

@@ -2,7 +2,7 @@ import logging
import random
from abc import abstractmethod
from enum import Enum
from typing import Optional, Type
from typing import Optional, Type, Union
import gym
import numpy as np
@@ -78,7 +78,7 @@ class BaseEnvironment(gym.Env):
# set here to default 5Ac, but all children envs can override this
self.actions: Type[Enum] = BaseActions
self.custom_info: dict = {}
self.tensorboard_metrics: dict = {}
self.live: bool = False
if dp:
self.live = dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
@@ -139,20 +139,38 @@ class BaseEnvironment(gym.Env):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def tensorboard_log(self, metric: str, value: Union[int, float] = 1, inc: bool = True):
"""
Function builds the tensorboard_metrics dictionary
to be parsed by the TensorboardCallback. This
function is designed for tracking incremented objects,
events, actions inside the training environment.
For example, a user can call this to track the
frequency of occurence of an `is_valid` call in
their `calculate_reward()`:
def calculate_reward(self, action: int) -> float:
if not self._is_valid(action):
self.tensorboard_log("is_valid")
return -2
:param metric: metric to be tracked and incremented
:param value: value to increment `metric` by
:param inc: sets whether the `value` is incremented or not
"""
if not inc or metric not in self.tensorboard_metrics:
self.tensorboard_metrics[metric] = value
else:
self.tensorboard_metrics[metric] += value
def reset_tensorboard_log(self):
self.tensorboard_metrics = {}
def reset(self):
"""
Reset is called at the beginning of every episode
"""
# custom_info is used for episodic reports and tensorboard logging
self.custom_info["Invalid"] = 0
self.custom_info["Hold"] = 0
self.custom_info["Unknown"] = 0
self.custom_info["pnl_factor"] = 0
self.custom_info["duration_factor"] = 0
self.custom_info["reward_exit"] = 0
self.custom_info["reward_hold"] = 0
for action in self.actions:
self.custom_info[f"{action.name}"] = 0
self.reset_tensorboard_log()
self._done = False

View File

@@ -42,19 +42,18 @@ class TensorboardCallback(BaseCallback):
)
def _on_step(self) -> bool:
custom_info = self.training_env.get_attr("custom_info")[0]
self.logger.record("_state/position", self.locals["infos"][0]["position"])
self.logger.record("_state/trade_duration", self.locals["infos"][0]["trade_duration"])
self.logger.record("_state/current_profit_pct", self.locals["infos"]
[0]["current_profit_pct"])
self.logger.record("_reward/total_profit", self.locals["infos"][0]["total_profit"])
self.logger.record("_reward/total_reward", self.locals["infos"][0]["total_reward"])
self.logger.record_mean("_reward/mean_trade_duration", self.locals["infos"]
[0]["trade_duration"])
self.logger.record("_actions/action", self.locals["infos"][0]["action"])
self.logger.record("_actions/_Invalid", custom_info["Invalid"])
self.logger.record("_actions/_Unknown", custom_info["Unknown"])
self.logger.record("_actions/Hold", custom_info["Hold"])
for action in self.actions:
self.logger.record(f"_actions/{action.name}", custom_info[action.name])
local_info = self.locals["infos"][0]
tensorboard_metrics = self.training_env.get_attr("tensorboard_metrics")[0]
for info in local_info:
if info not in ["episode", "terminal_observation"]:
self.logger.record(f"_info/{info}", local_info[info])
for info in tensorboard_metrics:
if info in [action.name for action in self.actions]:
self.logger.record(f"_actions/{info}", tensorboard_metrics[info])
else:
self.logger.record(f"_custom/{info}", tensorboard_metrics[info])
return True

View File

@@ -100,7 +100,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
"""
# first, penalize if the action is not valid
if not self._is_valid(action):
self.custom_info["Invalid"] += 1
self.tensorboard_log("is_valid")
return -2
pnl = self.get_unrealized_profit()
@@ -109,15 +109,12 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
# reward agent for entering trades
if (action == Actions.Long_enter.value
and self._position == Positions.Neutral):
self.custom_info[f"{Actions.Long_enter.name}"] += 1
return 25
if (action == Actions.Short_enter.value
and self._position == Positions.Neutral):
self.custom_info[f"{Actions.Short_enter.name}"] += 1
return 25
# discourage agent from not entering trades
if action == Actions.Neutral.value and self._position == Positions.Neutral:
self.custom_info[f"{Actions.Neutral.name}"] += 1
return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
@@ -131,22 +128,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
# discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and
action == Actions.Neutral.value):
self.custom_info["Hold"] += 1
return -1 * trade_duration / max_trade_duration
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
self.custom_info[f"{Actions.Long_exit.name}"] += 1
return float(pnl * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
self.custom_info[f"{Actions.Short_exit.name}"] += 1
return float(pnl * factor)
self.custom_info["Unknown"] += 1
return 0.