From 81fd2e588ff8f97225f45071c59a46d42c88a269 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 26 Nov 2022 12:11:59 +0100 Subject: [PATCH] ensure typing, remove unsued code --- docs/freqai-reinforcement-learning.md | 2 +- freqtrade/freqai/RL/Base5ActionRLEnv.py | 2 +- freqtrade/freqai/RL/BaseEnvironment.py | 74 ++++++++++--------- .../prediction_models/ReinforcementLearner.py | 6 +- .../ReinforcementLearner_test_4ac.py | 6 +- 5 files changed, 46 insertions(+), 44 deletions(-) diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md index 48118bb2a..2a1ffc250 100644 --- a/docs/freqai-reinforcement-learning.md +++ b/docs/freqai-reinforcement-learning.md @@ -195,7 +195,7 @@ As you begin to modify the strategy and the prediction model, you will quickly r Users can override any functions from those parent classes. Here is an example of a user customized `calculate_reward()` function. """ - def calculate_reward(self, action): + def calculate_reward(self, action: int) -> float: # first, penalize if the action is not valid if not self._is_valid(action): return -2 diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 0d7672b2f..8012ff1af 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -158,7 +158,7 @@ class Base5ActionRLEnv(BaseEnvironment): (action == Actions.Long_exit.value and self._position == Positions.Short) or (action == Actions.Long_exit.value and self._position == Positions.Neutral)) - def _is_valid(self, action: int): + def _is_valid(self, action: int) -> bool: # trade signal """ Determine if the signal is valid. diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py index 6853377cb..7aa571697 100644 --- a/freqtrade/freqai/RL/BaseEnvironment.py +++ b/freqtrade/freqai/RL/BaseEnvironment.py @@ -208,13 +208,13 @@ class BaseEnvironment(gym.Env): """ return - def _is_valid(self, action: int): + def _is_valid(self, action: int) -> bool: """ Determine if the signal is valid.This is unique to the actions in the environment, and therefore must be inherited. """ - return + return True def add_entry_fee(self, price): return price * (1 + self.fee) @@ -230,7 +230,7 @@ class BaseEnvironment(gym.Env): self.history[key].append(value) @abstractmethod - def calculate_reward(self, action): + def calculate_reward(self, action: int) -> float: """ An example reward function. This is the one function that users will likely wish to inject their own creativity into. @@ -263,38 +263,40 @@ class BaseEnvironment(gym.Env): # assumes unit stake and no compounding self._total_profit += pnl - def most_recent_return(self, action: int): - """ - Calculate the tick to tick return if in a trade. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_entry_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_exit_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - def current_price(self) -> float: return self.prices.iloc[self._current_tick].open + + # Keeping around incase we want to start building more complex environment + # templates in the future. + # def most_recent_return(self): + # """ + # Calculate the tick to tick return if in a trade. + # Return is generated from rising prices in Long + # and falling prices in Short positions. + # The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. + # """ + # # Long positions + # if self._position == Positions.Long: + # current_price = self.prices.iloc[self._current_tick].open + # previous_price = self.prices.iloc[self._current_tick - 1].open + + # if (self._position_history[self._current_tick - 1] == Positions.Short + # or self._position_history[self._current_tick - 1] == Positions.Neutral): + # previous_price = self.add_entry_fee(previous_price) + + # return np.log(current_price) - np.log(previous_price) + + # # Short positions + # if self._position == Positions.Short: + # current_price = self.prices.iloc[self._current_tick].open + # previous_price = self.prices.iloc[self._current_tick - 1].open + # if (self._position_history[self._current_tick - 1] == Positions.Long + # or self._position_history[self._current_tick - 1] == Positions.Neutral): + # previous_price = self.add_exit_fee(previous_price) + + # return np.log(previous_price) - np.log(current_price) + + # return 0 + + # def update_portfolio_log_returns(self, action): + # self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index dcf7cf54b..61b01e21b 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -89,7 +89,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): sets a custom reward based on profit and trade duration. """ - def calculate_reward(self, action): + def calculate_reward(self, action: int) -> float: """ An example reward function. This is the one function that users will likely wish to inject their own creativity into. @@ -103,7 +103,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): return -2 pnl = self.get_unrealized_profit() - factor = 100 + factor = 100. # reward agent for entering trades if (action in (Actions.Long_enter.value, Actions.Short_enter.value) @@ -114,7 +114,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): return -1 max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) - trade_duration = self._current_tick - self._last_trade_tick + trade_duration = self._current_tick - self._last_trade_tick # type: ignore if trade_duration <= max_trade_duration: factor *= 1.5 diff --git a/tests/freqai/test_models/ReinforcementLearner_test_4ac.py b/tests/freqai/test_models/ReinforcementLearner_test_4ac.py index 9861acfd8..29e3e3b64 100644 --- a/tests/freqai/test_models/ReinforcementLearner_test_4ac.py +++ b/tests/freqai/test_models/ReinforcementLearner_test_4ac.py @@ -20,7 +20,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner): sets a custom reward based on profit and trade duration. """ - def calculate_reward(self, action): + def calculate_reward(self, action: int) -> float: # first, penalize if the action is not valid if not self._is_valid(action): @@ -28,7 +28,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner): pnl = self.get_unrealized_profit() rew = np.sign(pnl) * (pnl + 1) - factor = 100 + factor = 100. # reward agent for entering trades if (action in (Actions.Long_enter.value, Actions.Short_enter.value) @@ -39,7 +39,7 @@ class ReinforcementLearner_test_4ac(ReinforcementLearner): return -1 max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) - trade_duration = self._current_tick - self._last_trade_tick + trade_duration = self._current_tick - self._last_trade_tick # type: ignore if trade_duration <= max_trade_duration: factor *= 1.5