From 58604c747e759161f25ad4c90571fbaf6a1c5233 Mon Sep 17 00:00:00 2001
From: initrv <initrv@gmail.com>
Date: Wed, 7 Dec 2022 14:37:55 +0300
Subject: [PATCH 1/5] cleanup tensorboard callback

---
 freqtrade/freqai/RL/BaseEnvironment.py        | 10 ++-----
 freqtrade/freqai/RL/TensorboardCallback.py    | 27 +++++++++----------
 .../prediction_models/ReinforcementLearner.py | 14 +++++-----
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index a31ded0c6..71b423844 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -137,15 +137,9 @@ class BaseEnvironment(gym.Env):
         Reset is called at the beginning of every episode
         """
         # custom_info is used for episodic reports and tensorboard logging
-        self.custom_info["Invalid"] = 0
-        self.custom_info["Hold"] = 0
-        self.custom_info["Unknown"] = 0
-        self.custom_info["pnl_factor"] = 0
-        self.custom_info["duration_factor"] = 0
-        self.custom_info["reward_exit"] = 0
-        self.custom_info["reward_hold"] = 0
+        self.custom_info: dict = {}
         for action in self.actions:
-            self.custom_info[f"{action.name}"] = 0
+            self.custom_info[action.name] = 0
 
         self._done = False
 
diff --git a/freqtrade/freqai/RL/TensorboardCallback.py b/freqtrade/freqai/RL/TensorboardCallback.py
index f590bdf84..d03c040d4 100644
--- a/freqtrade/freqai/RL/TensorboardCallback.py
+++ b/freqtrade/freqai/RL/TensorboardCallback.py
@@ -42,19 +42,18 @@ class TensorboardCallback(BaseCallback):
         )
 
     def _on_step(self) -> bool:
+
+        local_info = self.locals["infos"][0]
         custom_info = self.training_env.get_attr("custom_info")[0]
-        self.logger.record("_state/position", self.locals["infos"][0]["position"])
-        self.logger.record("_state/trade_duration", self.locals["infos"][0]["trade_duration"])
-        self.logger.record("_state/current_profit_pct", self.locals["infos"]
-                           [0]["current_profit_pct"])
-        self.logger.record("_reward/total_profit", self.locals["infos"][0]["total_profit"])
-        self.logger.record("_reward/total_reward", self.locals["infos"][0]["total_reward"])
-        self.logger.record_mean("_reward/mean_trade_duration", self.locals["infos"]
-                                [0]["trade_duration"])
-        self.logger.record("_actions/action", self.locals["infos"][0]["action"])
-        self.logger.record("_actions/_Invalid", custom_info["Invalid"])
-        self.logger.record("_actions/_Unknown", custom_info["Unknown"])
-        self.logger.record("_actions/Hold", custom_info["Hold"])
-        for action in self.actions:
-            self.logger.record(f"_actions/{action.name}", custom_info[action.name])
+
+        for info in local_info:
+            if info not in ["episode", "terminal_observation"]:
+                self.logger.record(f"_info/{info}", local_info[info])
+
+        for info in custom_info:
+            if info in [action.name for action in self.actions]:
+                self.logger.record(f"_actions/{info}", custom_info[info])
+            else:
+                self.logger.record(f"_custom/{info}", custom_info[info])
+
         return True
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
index 47dbaf99e..1383ad15e 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@@ -100,7 +100,6 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             """
             # first, penalize if the action is not valid
             if not self._is_valid(action):
-                self.custom_info["Invalid"] += 1
                 return -2
 
             pnl = self.get_unrealized_profit()
@@ -109,15 +108,15 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             # reward agent for entering trades
             if (action == Actions.Long_enter.value
                     and self._position == Positions.Neutral):
-                self.custom_info[f"{Actions.Long_enter.name}"] += 1
+                self.custom_info[Actions.Long_enter.name] += 1
                 return 25
             if (action == Actions.Short_enter.value
                     and self._position == Positions.Neutral):
-                self.custom_info[f"{Actions.Short_enter.name}"] += 1
+                self.custom_info[Actions.Short_enter.name] += 1
                 return 25
             # discourage agent from not entering trades
             if action == Actions.Neutral.value and self._position == Positions.Neutral:
-                self.custom_info[f"{Actions.Neutral.name}"] += 1
+                self.custom_info[Actions.Neutral.name] += 1
                 return -1
 
             max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
@@ -131,22 +130,21 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             # discourage sitting in position
             if (self._position in (Positions.Short, Positions.Long) and
                     action == Actions.Neutral.value):
-                self.custom_info["Hold"] += 1
+                self.custom_info[Actions.Neutral.name] += 1
                 return -1 * trade_duration / max_trade_duration
 
             # close long
             if action == Actions.Long_exit.value and self._position == Positions.Long:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                self.custom_info[f"{Actions.Long_exit.name}"] += 1
+                self.custom_info[Actions.Long_exit.name] += 1
                 return float(pnl * factor)
 
             # close short
             if action == Actions.Short_exit.value and self._position == Positions.Short:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                self.custom_info[f"{Actions.Short_exit.name}"] += 1
+                self.custom_info[Actions.Short_exit.name] += 1
                 return float(pnl * factor)
 
-            self.custom_info["Unknown"] += 1
             return 0.

From cb8fc3c8c7c392b75493d8da7f748760372040a9 Mon Sep 17 00:00:00 2001
From: initrv <initrv@gmail.com>
Date: Sun, 11 Dec 2022 15:37:45 +0300
Subject: [PATCH 2/5] custom info to tensorboard_metrics

---
 freqtrade/freqai/RL/Base4ActionRLEnv.py                   | 2 +-
 freqtrade/freqai/RL/Base5ActionRLEnv.py                   | 1 +
 freqtrade/freqai/RL/BaseEnvironment.py                    | 8 ++++----
 freqtrade/freqai/RL/TensorboardCallback.py                | 8 ++++----
 .../freqai/prediction_models/ReinforcementLearner.py      | 6 ------
 5 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py
index 79616d778..02e182bbd 100644
--- a/freqtrade/freqai/RL/Base4ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py
@@ -46,9 +46,9 @@ class Base4ActionRLEnv(BaseEnvironment):
             self._done = True
 
         self._update_unrealized_total_profit()
-
         step_reward = self.calculate_reward(action)
         self.total_reward += step_reward
+        self.tensorboard_metrics[self.actions._member_names_[action]] += 1
 
         trade_type = None
         if self.is_tradesignal(action):
diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py
index 1c09f9386..baf7dde9f 100644
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -49,6 +49,7 @@ class Base5ActionRLEnv(BaseEnvironment):
         self._update_unrealized_total_profit()
         step_reward = self.calculate_reward(action)
         self.total_reward += step_reward
+        self.tensorboard_metrics[self.actions._member_names_[action]] += 1
 
         trade_type = None
         if self.is_tradesignal(action):
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 71b423844..0da13db7c 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -77,7 +77,7 @@ class BaseEnvironment(gym.Env):
 
         # set here to default 5Ac, but all children envs can override this
         self.actions: Type[Enum] = BaseActions
-        self.custom_info: dict = {}
+        self.tensorboard_metrics: dict = {}
 
     def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
                   reward_kwargs: dict, starting_point=True):
@@ -136,10 +136,10 @@ class BaseEnvironment(gym.Env):
         """
         Reset is called at the beginning of every episode
         """
-        # custom_info is used for episodic reports and tensorboard logging
-        self.custom_info: dict = {}
+        # tensorboard_metrics is used for episodic reports and tensorboard logging
+        self.tensorboard_metrics: dict = {}
         for action in self.actions:
-            self.custom_info[action.name] = 0
+            self.tensorboard_metrics[action.name] = 0
 
         self._done = False
 
diff --git a/freqtrade/freqai/RL/TensorboardCallback.py b/freqtrade/freqai/RL/TensorboardCallback.py
index d03c040d4..b596742e9 100644
--- a/freqtrade/freqai/RL/TensorboardCallback.py
+++ b/freqtrade/freqai/RL/TensorboardCallback.py
@@ -44,16 +44,16 @@ class TensorboardCallback(BaseCallback):
     def _on_step(self) -> bool:
 
         local_info = self.locals["infos"][0]
-        custom_info = self.training_env.get_attr("custom_info")[0]
+        tensorboard_metrics = self.training_env.get_attr("tensorboard_metrics")[0]
 
         for info in local_info:
             if info not in ["episode", "terminal_observation"]:
                 self.logger.record(f"_info/{info}", local_info[info])
 
-        for info in custom_info:
+        for info in tensorboard_metrics:
             if info in [action.name for action in self.actions]:
-                self.logger.record(f"_actions/{info}", custom_info[info])
+                self.logger.record(f"_actions/{info}", tensorboard_metrics[info])
             else:
-                self.logger.record(f"_custom/{info}", custom_info[info])
+                self.logger.record(f"_custom/{info}", tensorboard_metrics[info])
 
         return True
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
index 1383ad15e..e015b138a 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@@ -108,15 +108,12 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             # reward agent for entering trades
             if (action == Actions.Long_enter.value
                     and self._position == Positions.Neutral):
-                self.custom_info[Actions.Long_enter.name] += 1
                 return 25
             if (action == Actions.Short_enter.value
                     and self._position == Positions.Neutral):
-                self.custom_info[Actions.Short_enter.name] += 1
                 return 25
             # discourage agent from not entering trades
             if action == Actions.Neutral.value and self._position == Positions.Neutral:
-                self.custom_info[Actions.Neutral.name] += 1
                 return -1
 
             max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
@@ -130,21 +127,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             # discourage sitting in position
             if (self._position in (Positions.Short, Positions.Long) and
                     action == Actions.Neutral.value):
-                self.custom_info[Actions.Neutral.name] += 1
                 return -1 * trade_duration / max_trade_duration
 
             # close long
             if action == Actions.Long_exit.value and self._position == Positions.Long:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                self.custom_info[Actions.Long_exit.name] += 1
                 return float(pnl * factor)
 
             # close short
             if action == Actions.Short_exit.value and self._position == Positions.Short:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                self.custom_info[Actions.Short_exit.name] += 1
                 return float(pnl * factor)
 
             return 0.

From 0fd8e214e4f95a4c2c1929e9b26da43c70fd47dc Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sun, 11 Dec 2022 15:31:29 +0100
Subject: [PATCH 3/5] add documentation for tensorboard_log, change how users
 interact with tensorboard_log

---
 docs/freqai-reinforcement-learning.md         | 26 +++++++++++++++
 freqtrade/freqai/RL/Base4ActionRLEnv.py       |  2 +-
 freqtrade/freqai/RL/Base5ActionRLEnv.py       |  2 +-
 freqtrade/freqai/RL/BaseEnvironment.py        | 33 ++++++++++++++++---
 .../prediction_models/ReinforcementLearner.py |  1 +
 5 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index b1a212a92..b831c90a0 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -247,6 +247,32 @@ where `unique-id` is the `identifier` set in the `freqai` configuration file. Th
 
 ![tensorboard](assets/tensorboard.jpg)
 
+
+### Custom logging
+
+FreqAI also provides a built in episodic summary logger called `self.tensorboard_log` for adding custom information to the Tensorboard log. By default, this function is already called once per step inside the environment to record the agent actions. All values accumulated for all steps in a single episode are reported at the conclusion of each episode, followed by a full reset of all metrics to 0 in preparation for the subsequent episode.
+
+
+`self.tensorboard_log` can also be used anywhere inside the environment, for example, it can be added to the `calculate_reward` function to collect more detailed information about how often various parts of the reward were called:
+
+```py
+        class MyRLEnv(Base5ActionRLEnv):
+            """
+            User made custom environment. This class inherits from BaseEnvironment and gym.env.
+            Users can override any functions from those parent classes. Here is an example
+            of a user customized `calculate_reward()` function.
+            """
+            def calculate_reward(self, action: int) -> float:
+                if not self._is_valid(action):
+                    self.tensorboard_log("is_valid")
+                    return -2
+
+```
+
+!!! Note
+    The `self.tensorboard_log()` function is designed for tracking incremented objects only i.e. events, actions inside the training environment. If the event of interest is a float, the float can be passed as the second argument e.g. `self.tensorboard_log("float_metric1", 0.23)` would add 0.23 to `float_metric`.
+
+
 ### Choosing a base environment
 
 FreqAI provides two base environments, `Base4ActionEnvironment` and `Base5ActionEnvironment`. As the names imply, the environments are customized for agents that can select from 4 or 5 actions. In the `Base4ActionEnvironment`, the agent can enter long, enter short, hold neutral, or exit position. Meanwhile, in the `Base5ActionEnvironment`, the agent has the same actions as Base4, but instead of a single exit action, it separates exit long and exit short. The main changes stemming from the environment selection include:
diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py
index 02e182bbd..a3ebfdbfa 100644
--- a/freqtrade/freqai/RL/Base4ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py
@@ -48,7 +48,7 @@ class Base4ActionRLEnv(BaseEnvironment):
         self._update_unrealized_total_profit()
         step_reward = self.calculate_reward(action)
         self.total_reward += step_reward
-        self.tensorboard_metrics[self.actions._member_names_[action]] += 1
+        self.tensorboard_log(self.actions._member_names_[action])
 
         trade_type = None
         if self.is_tradesignal(action):
diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py
index baf7dde9f..22d3cae30 100644
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -49,7 +49,7 @@ class Base5ActionRLEnv(BaseEnvironment):
         self._update_unrealized_total_profit()
         step_reward = self.calculate_reward(action)
         self.total_reward += step_reward
-        self.tensorboard_metrics[self.actions._member_names_[action]] += 1
+        self.tensorboard_log(self.actions._member_names_[action])
 
         trade_type = None
         if self.is_tradesignal(action):
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 0da13db7c..a5cee4def 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -2,7 +2,7 @@ import logging
 import random
 from abc import abstractmethod
 from enum import Enum
-from typing import Optional, Type
+from typing import Optional, Type, Union
 
 import gym
 import numpy as np
@@ -132,14 +132,37 @@ class BaseEnvironment(gym.Env):
         self.np_random, seed = seeding.np_random(seed)
         return [seed]
 
+    def tensorboard_log(self, metric: str, inc: Union[int, float] = 1):
+        """
+        Function builds the tensorboard_metrics dictionary
+        to be parsed by the TensorboardCallback. This
+        function is designed for tracking incremented objects,
+        events, actions inside the training environment.
+        For example, a user can call this to track the
+        frequency of occurence of an `is_valid` call in
+        their `calculate_reward()`:
+
+        def calculate_reward(self, action: int) -> float:
+            if not self._is_valid(action):
+                self.tensorboard_log("is_valid")
+                return -2
+
+        :param metric: metric to be tracked and incremented
+        :param inc: value to increment `metric` by
+        """
+        if metric not in self.tensorboard_metrics:
+            self.tensorboard_metrics[metric] = inc
+        else:
+            self.tensorboard_metrics[metric] += inc
+
+    def reset_tensorboard_log(self):
+        self.tensorboard_metrics = {}
+
     def reset(self):
         """
         Reset is called at the beginning of every episode
         """
-        # tensorboard_metrics is used for episodic reports and tensorboard logging
-        self.tensorboard_metrics: dict = {}
-        for action in self.actions:
-            self.tensorboard_metrics[action.name] = 0
+        self.reset_tensorboard_log()
 
         self._done = False
 
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
index e015b138a..38ea67e69 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@@ -100,6 +100,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             """
             # first, penalize if the action is not valid
             if not self._is_valid(action):
+                self.tensorboard_log("is_valid")
                 return -2
 
             pnl = self.get_unrealized_profit()

From f9b7d35900b50cc786f8fee4943d5e301e3123b8 Mon Sep 17 00:00:00 2001
From: initrv <initrv@gmail.com>
Date: Mon, 12 Dec 2022 14:14:23 +0300
Subject: [PATCH 4/5] add increment param for tensorboard_log

---
 freqtrade/freqai/RL/BaseEnvironment.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 5a90d381e..5a5a950e7 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -139,7 +139,7 @@ class BaseEnvironment(gym.Env):
         self.np_random, seed = seeding.np_random(seed)
         return [seed]
 
-    def tensorboard_log(self, metric: str, inc: Union[int, float] = 1):
+    def tensorboard_log(self, metric: str, value: Union[int, float] = 1, inc: bool = True):
         """
         Function builds the tensorboard_metrics dictionary
         to be parsed by the TensorboardCallback. This
@@ -155,12 +155,13 @@ class BaseEnvironment(gym.Env):
                 return -2
 
         :param metric: metric to be tracked and incremented
-        :param inc: value to increment `metric` by
+        :param value: value to increment `metric` by
+        :param inc: sets whether the `value` is incremented or not
         """
-        if metric not in self.tensorboard_metrics:
-            self.tensorboard_metrics[metric] = inc
+        if not inc or metric not in self.tensorboard_metrics:
+            self.tensorboard_metrics[metric] = value
         else:
-            self.tensorboard_metrics[metric] += inc
+            self.tensorboard_metrics[metric] += value
 
     def reset_tensorboard_log(self):
         self.tensorboard_metrics = {}

From f940280d5e82d3574628af99f29d1fa0e2dd695a Mon Sep 17 00:00:00 2001
From: initrv <initrv@gmail.com>
Date: Mon, 12 Dec 2022 14:35:44 +0300
Subject: [PATCH 5/5] Fix tensorboard_log incrementing note

---
 docs/freqai-reinforcement-learning.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index b831c90a0..f3d6c97f8 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -270,7 +270,7 @@ FreqAI also provides a built in episodic summary logger called `self.tensorboard
 ```
 
 !!! Note
-    The `self.tensorboard_log()` function is designed for tracking incremented objects only i.e. events, actions inside the training environment. If the event of interest is a float, the float can be passed as the second argument e.g. `self.tensorboard_log("float_metric1", 0.23)` would add 0.23 to `float_metric`.
+    The `self.tensorboard_log()` function is designed for tracking incremented objects only i.e. events, actions inside the training environment. If the event of interest is a float, the float can be passed as the second argument e.g. `self.tensorboard_log("float_metric1", 0.23)` would add 0.23 to `float_metric`. In this case you can also disable incrementing using `inc=False` parameter.
 
 
 ### Choosing a base environment