ensure typing, remove unsued code

2022-11-26 12:11:59 +01:00 · 2022-11-26 12:11:59 +01:00 · 81fd2e588f
commit 81fd2e588f
parent 8dbfd2cacf
5 changed files with 46 additions and 44 deletions
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@ -195,7 +195,7 @@ As you begin to modify the strategy and the prediction model, you will quickly r
            Users can override any functions from those parent classes. Here is an example
            of a user customized `calculate_reward()` function.
            """
-            def calculate_reward(self, action):
+            def calculate_reward(self, action: int) -> float:
                # first, penalize if the action is not valid
                if not self._is_valid(action):
                    return -2
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@ -158,7 +158,7 @@ class Base5ActionRLEnv(BaseEnvironment):
                    (action == Actions.Long_exit.value and self._position == Positions.Short) or
                    (action == Actions.Long_exit.value and self._position == Positions.Neutral))
-    def _is_valid(self, action: int):
+    def _is_valid(self, action: int) -> bool:
        # trade signal
        """
        Determine if the signal is valid.
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@ -208,13 +208,13 @@ class BaseEnvironment(gym.Env):
        """
        return
-    def _is_valid(self, action: int):
+    def _is_valid(self, action: int) -> bool:
        """
        Determine if the signal is valid.This is
        unique to the actions in the environment, and therefore must be
        inherited.
        """
-        return
+        return True
    def add_entry_fee(self, price):
        return price * (1 + self.fee)
@ -230,7 +230,7 @@ class BaseEnvironment(gym.Env):
            self.history[key].append(value)
    @abstractmethod
-    def calculate_reward(self, action):
+    def calculate_reward(self, action: int) -> float:
        """
        An example reward function. This is the one function that users will likely
        wish to inject their own creativity into.
@ -263,38 +263,40 @@ class BaseEnvironment(gym.Env):
            # assumes unit stake and no compounding
            self._total_profit += pnl
    def most_recent_return(self, action: int):
        """
        Calculate the tick to tick return if in a trade.
        Return is generated from rising prices in Long
        and falling prices in Short positions.
        The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
        """
        # Long positions
        if self._position == Positions.Long:
            current_price = self.prices.iloc[self._current_tick].open
            previous_price = self.prices.iloc[self._current_tick - 1].open
            if (self._position_history[self._current_tick - 1] == Positions.Short
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_entry_fee(previous_price)
            return np.log(current_price) - np.log(previous_price)
        # Short positions
        if self._position == Positions.Short:
            current_price = self.prices.iloc[self._current_tick].open
            previous_price = self.prices.iloc[self._current_tick - 1].open
            if (self._position_history[self._current_tick - 1] == Positions.Long
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_exit_fee(previous_price)
            return np.log(previous_price) - np.log(current_price)
        return 0
    def update_portfolio_log_returns(self, action):
        self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
    def current_price(self) -> float:
        return self.prices.iloc[self._current_tick].open
    # Keeping around incase we want to start building more complex environment
    # templates in the future.
    # def most_recent_return(self):
    #     """
    #     Calculate the tick to tick return if in a trade.
    #     Return is generated from rising prices in Long
    #     and falling prices in Short positions.
    #     The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
    #     """
    #     # Long positions
    #     if self._position == Positions.Long:
    #         current_price = self.prices.iloc[self._current_tick].open
    #         previous_price = self.prices.iloc[self._current_tick - 1].open
    #         if (self._position_history[self._current_tick - 1] == Positions.Short
    #                 or self._position_history[self._current_tick - 1] == Positions.Neutral):
    #             previous_price = self.add_entry_fee(previous_price)
    #         return np.log(current_price) - np.log(previous_price)
    #     # Short positions
    #     if self._position == Positions.Short:
    #         current_price = self.prices.iloc[self._current_tick].open
    #         previous_price = self.prices.iloc[self._current_tick - 1].open
    #         if (self._position_history[self._current_tick - 1] == Positions.Long
    #                 or self._position_history[self._current_tick - 1] == Positions.Neutral):
    #             previous_price = self.add_exit_fee(previous_price)
    #         return np.log(previous_price) - np.log(current_price)
    #     return 0
    # def update_portfolio_log_returns(self, action):
    #     self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@ -89,7 +89,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
        sets a custom reward based on profit and trade duration.
        """
-        def calculate_reward(self, action):
+        def calculate_reward(self, action: int) -> float:
            """
            An example reward function. This is the one function that users will likely
            wish to inject their own creativity into.
@ -103,7 +103,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
                return -2
            pnl = self.get_unrealized_profit()
-            factor = 100
+            factor = 100.
            # reward agent for entering trades
            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
@ -114,7 +114,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
                return -1
            max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
-            trade_duration = self._current_tick - self._last_trade_tick
+            trade_duration = self._current_tick - self._last_trade_tick  # type: ignore
            if trade_duration <= max_trade_duration:
                factor *= 1.5
--- a/tests/freqai/test_models/ReinforcementLearner_test_4ac.py
+++ b/tests/freqai/test_models/ReinforcementLearner_test_4ac.py
@ -20,7 +20,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
        sets a custom reward based on profit and trade duration.
        """
-        def calculate_reward(self, action):
+        def calculate_reward(self, action: int) -> float:
            # first, penalize if the action is not valid
            if not self._is_valid(action):
@ -28,7 +28,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
            pnl = self.get_unrealized_profit()
            rew = np.sign(pnl) * (pnl + 1)
-            factor = 100
+            factor = 100.
            # reward agent for entering trades
            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
@ -39,7 +39,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner):
                return -1
            max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
-            trade_duration = self._current_tick - self._last_trade_tick
+            trade_duration = self._current_tick - self._last_trade_tick  # type: ignore
            if trade_duration <= max_trade_duration:
                factor *= 1.5