Merge pull request #7866 from initrv/cleanup-tensorboard-callback
Cleanup tensorboard callback
This commit is contained in:
@@ -46,9 +46,9 @@ class Base4ActionRLEnv(BaseEnvironment):
|
||||
self._done = True
|
||||
|
||||
self._update_unrealized_total_profit()
|
||||
|
||||
step_reward = self.calculate_reward(action)
|
||||
self.total_reward += step_reward
|
||||
self.tensorboard_log(self.actions._member_names_[action])
|
||||
|
||||
trade_type = None
|
||||
if self.is_tradesignal(action):
|
||||
|
@@ -49,6 +49,7 @@ class Base5ActionRLEnv(BaseEnvironment):
|
||||
self._update_unrealized_total_profit()
|
||||
step_reward = self.calculate_reward(action)
|
||||
self.total_reward += step_reward
|
||||
self.tensorboard_log(self.actions._member_names_[action])
|
||||
|
||||
trade_type = None
|
||||
if self.is_tradesignal(action):
|
||||
|
@@ -2,7 +2,7 @@ import logging
|
||||
import random
|
||||
from abc import abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional, Type
|
||||
from typing import Optional, Type, Union
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
@@ -78,7 +78,7 @@ class BaseEnvironment(gym.Env):
|
||||
|
||||
# set here to default 5Ac, but all children envs can override this
|
||||
self.actions: Type[Enum] = BaseActions
|
||||
self.custom_info: dict = {}
|
||||
self.tensorboard_metrics: dict = {}
|
||||
self.live: bool = False
|
||||
if dp:
|
||||
self.live = dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
|
||||
@@ -139,20 +139,38 @@ class BaseEnvironment(gym.Env):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def tensorboard_log(self, metric: str, value: Union[int, float] = 1, inc: bool = True):
|
||||
"""
|
||||
Function builds the tensorboard_metrics dictionary
|
||||
to be parsed by the TensorboardCallback. This
|
||||
function is designed for tracking incremented objects,
|
||||
events, actions inside the training environment.
|
||||
For example, a user can call this to track the
|
||||
frequency of occurence of an `is_valid` call in
|
||||
their `calculate_reward()`:
|
||||
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
if not self._is_valid(action):
|
||||
self.tensorboard_log("is_valid")
|
||||
return -2
|
||||
|
||||
:param metric: metric to be tracked and incremented
|
||||
:param value: value to increment `metric` by
|
||||
:param inc: sets whether the `value` is incremented or not
|
||||
"""
|
||||
if not inc or metric not in self.tensorboard_metrics:
|
||||
self.tensorboard_metrics[metric] = value
|
||||
else:
|
||||
self.tensorboard_metrics[metric] += value
|
||||
|
||||
def reset_tensorboard_log(self):
|
||||
self.tensorboard_metrics = {}
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset is called at the beginning of every episode
|
||||
"""
|
||||
# custom_info is used for episodic reports and tensorboard logging
|
||||
self.custom_info["Invalid"] = 0
|
||||
self.custom_info["Hold"] = 0
|
||||
self.custom_info["Unknown"] = 0
|
||||
self.custom_info["pnl_factor"] = 0
|
||||
self.custom_info["duration_factor"] = 0
|
||||
self.custom_info["reward_exit"] = 0
|
||||
self.custom_info["reward_hold"] = 0
|
||||
for action in self.actions:
|
||||
self.custom_info[f"{action.name}"] = 0
|
||||
self.reset_tensorboard_log()
|
||||
|
||||
self._done = False
|
||||
|
||||
|
@@ -42,19 +42,18 @@ class TensorboardCallback(BaseCallback):
|
||||
)
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
custom_info = self.training_env.get_attr("custom_info")[0]
|
||||
self.logger.record("_state/position", self.locals["infos"][0]["position"])
|
||||
self.logger.record("_state/trade_duration", self.locals["infos"][0]["trade_duration"])
|
||||
self.logger.record("_state/current_profit_pct", self.locals["infos"]
|
||||
[0]["current_profit_pct"])
|
||||
self.logger.record("_reward/total_profit", self.locals["infos"][0]["total_profit"])
|
||||
self.logger.record("_reward/total_reward", self.locals["infos"][0]["total_reward"])
|
||||
self.logger.record_mean("_reward/mean_trade_duration", self.locals["infos"]
|
||||
[0]["trade_duration"])
|
||||
self.logger.record("_actions/action", self.locals["infos"][0]["action"])
|
||||
self.logger.record("_actions/_Invalid", custom_info["Invalid"])
|
||||
self.logger.record("_actions/_Unknown", custom_info["Unknown"])
|
||||
self.logger.record("_actions/Hold", custom_info["Hold"])
|
||||
for action in self.actions:
|
||||
self.logger.record(f"_actions/{action.name}", custom_info[action.name])
|
||||
|
||||
local_info = self.locals["infos"][0]
|
||||
tensorboard_metrics = self.training_env.get_attr("tensorboard_metrics")[0]
|
||||
|
||||
for info in local_info:
|
||||
if info not in ["episode", "terminal_observation"]:
|
||||
self.logger.record(f"_info/{info}", local_info[info])
|
||||
|
||||
for info in tensorboard_metrics:
|
||||
if info in [action.name for action in self.actions]:
|
||||
self.logger.record(f"_actions/{info}", tensorboard_metrics[info])
|
||||
else:
|
||||
self.logger.record(f"_custom/{info}", tensorboard_metrics[info])
|
||||
|
||||
return True
|
||||
|
@@ -100,7 +100,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
|
||||
"""
|
||||
# first, penalize if the action is not valid
|
||||
if not self._is_valid(action):
|
||||
self.custom_info["Invalid"] += 1
|
||||
self.tensorboard_log("is_valid")
|
||||
return -2
|
||||
|
||||
pnl = self.get_unrealized_profit()
|
||||
@@ -109,15 +109,12 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
|
||||
# reward agent for entering trades
|
||||
if (action == Actions.Long_enter.value
|
||||
and self._position == Positions.Neutral):
|
||||
self.custom_info[f"{Actions.Long_enter.name}"] += 1
|
||||
return 25
|
||||
if (action == Actions.Short_enter.value
|
||||
and self._position == Positions.Neutral):
|
||||
self.custom_info[f"{Actions.Short_enter.name}"] += 1
|
||||
return 25
|
||||
# discourage agent from not entering trades
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
self.custom_info[f"{Actions.Neutral.name}"] += 1
|
||||
return -1
|
||||
|
||||
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
|
||||
@@ -131,22 +128,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
|
||||
# discourage sitting in position
|
||||
if (self._position in (Positions.Short, Positions.Long) and
|
||||
action == Actions.Neutral.value):
|
||||
self.custom_info["Hold"] += 1
|
||||
return -1 * trade_duration / max_trade_duration
|
||||
|
||||
# close long
|
||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
self.custom_info[f"{Actions.Long_exit.name}"] += 1
|
||||
return float(pnl * factor)
|
||||
|
||||
# close short
|
||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
self.custom_info[f"{Actions.Short_exit.name}"] += 1
|
||||
return float(pnl * factor)
|
||||
|
||||
self.custom_info["Unknown"] += 1
|
||||
return 0.
|
||||
|
Reference in New Issue
Block a user