From 81fd2e588ff8f97225f45071c59a46d42c88a269 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 26 Nov 2022 12:11:59 +0100
Subject: [PATCH] ensure typing, remove unsued code

---
 docs/freqai-reinforcement-learning.md         |  2 +-
 freqtrade/freqai/RL/Base5ActionRLEnv.py       |  2 +-
 freqtrade/freqai/RL/BaseEnvironment.py        | 74 ++++++++++---------
 .../prediction_models/ReinforcementLearner.py |  6 +-
 .../ReinforcementLearner_test_4ac.py          |  6 +-
 5 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index 48118bb2a..2a1ffc250 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -195,7 +195,7 @@ As you begin to modify the strategy and the prediction model, you will quickly r
             Users can override any functions from those parent classes. Here is an example
             of a user customized `calculate_reward()` function.
             """
-            def calculate_reward(self, action):
+            def calculate_reward(self, action: int) -> float:
                 # first, penalize if the action is not valid
                 if not self._is_valid(action):
                     return -2
diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py
index 0d7672b2f..8012ff1af 100644
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -158,7 +158,7 @@ class Base5ActionRLEnv(BaseEnvironment):
                     (action == Actions.Long_exit.value and self._position == Positions.Short) or
                     (action == Actions.Long_exit.value and self._position == Positions.Neutral))
 
-    def _is_valid(self, action: int):
+    def _is_valid(self, action: int) -> bool:
         # trade signal
         """
         Determine if the signal is valid.
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 6853377cb..7aa571697 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -208,13 +208,13 @@ class BaseEnvironment(gym.Env):
         """
         return
 
-    def _is_valid(self, action: int):
+    def _is_valid(self, action: int) -> bool:
         """
         Determine if the signal is valid.This is
         unique to the actions in the environment, and therefore must be
         inherited.
         """
-        return
+        return True
 
     def add_entry_fee(self, price):
         return price * (1 + self.fee)
@@ -230,7 +230,7 @@ class BaseEnvironment(gym.Env):
             self.history[key].append(value)
 
     @abstractmethod
-    def calculate_reward(self, action):
+    def calculate_reward(self, action: int) -> float:
         """
         An example reward function. This is the one function that users will likely
         wish to inject their own creativity into.
@@ -263,38 +263,40 @@ class BaseEnvironment(gym.Env):
             # assumes unit stake and no compounding
             self._total_profit += pnl
 
-    def most_recent_return(self, action: int):
-        """
-        Calculate the tick to tick return if in a trade.
-        Return is generated from rising prices in Long
-        and falling prices in Short positions.
-        The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
-        """
-        # Long positions
-        if self._position == Positions.Long:
-            current_price = self.prices.iloc[self._current_tick].open
-            previous_price = self.prices.iloc[self._current_tick - 1].open
-
-            if (self._position_history[self._current_tick - 1] == Positions.Short
-                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
-                previous_price = self.add_entry_fee(previous_price)
-
-            return np.log(current_price) - np.log(previous_price)
-
-        # Short positions
-        if self._position == Positions.Short:
-            current_price = self.prices.iloc[self._current_tick].open
-            previous_price = self.prices.iloc[self._current_tick - 1].open
-            if (self._position_history[self._current_tick - 1] == Positions.Long
-                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
-                previous_price = self.add_exit_fee(previous_price)
-
-            return np.log(previous_price) - np.log(current_price)
-
-        return 0
-
-    def update_portfolio_log_returns(self, action):
-        self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
-
     def current_price(self) -> float:
         return self.prices.iloc[self._current_tick].open
+
+    # Keeping around incase we want to start building more complex environment
+    # templates in the future.
+    # def most_recent_return(self):
+    #     """
+    #     Calculate the tick to tick return if in a trade.
+    #     Return is generated from rising prices in Long
+    #     and falling prices in Short positions.
+    #     The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
+    #     """
+    #     # Long positions
+    #     if self._position == Positions.Long:
+    #         current_price = self.prices.iloc[self._current_tick].open
+    #         previous_price = self.prices.iloc[self._current_tick - 1].open
+
+    #         if (self._position_history[self._current_tick - 1] == Positions.Short
+    #                 or self._position_history[self._current_tick - 1] == Positions.Neutral):
+    #             previous_price = self.add_entry_fee(previous_price)
+
+    #         return np.log(current_price) - np.log(previous_price)
+
+    #     # Short positions
+    #     if self._position == Positions.Short:
+    #         current_price = self.prices.iloc[self._current_tick].open
+    #         previous_price = self.prices.iloc[self._current_tick - 1].open
+    #         if (self._position_history[self._current_tick - 1] == Positions.Long
+    #                 or self._position_history[self._current_tick - 1] == Positions.Neutral):
+    #             previous_price = self.add_exit_fee(previous_price)
+
+    #         return np.log(previous_price) - np.log(current_price)
+
+    #     return 0
+
+    # def update_portfolio_log_returns(self, action):
+    #     self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
index dcf7cf54b..61b01e21b 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@@ -89,7 +89,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
         sets a custom reward based on profit and trade duration.
         """
 
-        def calculate_reward(self, action):
+        def calculate_reward(self, action: int) -> float:
             """
             An example reward function. This is the one function that users will likely
             wish to inject their own creativity into.
@@ -103,7 +103,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
                 return -2
 
             pnl = self.get_unrealized_profit()
-            factor = 100
+            factor = 100.
 
             # reward agent for entering trades
             if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
@@ -114,7 +114,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
                 return -1
 
             max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
-            trade_duration = self._current_tick - self._last_trade_tick
+            trade_duration = self._current_tick - self._last_trade_tick  # type: ignore
 
             if trade_duration <= max_trade_duration:
                 factor *= 1.5
diff --git a/tests/freqai/test_models/ReinforcementLearner_test_4ac.py b/tests/freqai/test_models/ReinforcementLearner_test_4ac.py
index 9861acfd8..29e3e3b64 100644
--- a/tests/freqai/test_models/ReinforcementLearner_test_4ac.py
+++ b/tests/freqai/test_models/ReinforcementLearner_test_4ac.py
@@ -20,7 +20,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
         sets a custom reward based on profit and trade duration.
         """
 
-        def calculate_reward(self, action):
+        def calculate_reward(self, action: int) -> float:
 
             # first, penalize if the action is not valid
             if not self._is_valid(action):
@@ -28,7 +28,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
 
             pnl = self.get_unrealized_profit()
             rew = np.sign(pnl) * (pnl + 1)
-            factor = 100
+            factor = 100.
 
             # reward agent for entering trades
             if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
@@ -39,7 +39,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
                 return -1
 
             max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
-            trade_duration = self._current_tick - self._last_trade_tick
+            trade_duration = self._current_tick - self._last_trade_tick  # type: ignore
 
             if trade_duration <= max_trade_duration:
                 factor *= 1.5