diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py index b4fe78b71..0c719ea92 100644 --- a/freqtrade/freqai/RL/Base4ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py @@ -74,6 +74,7 @@ class Base4ActionRLEnv(BaseEnvironment): self._last_trade_tick = self._current_tick elif action == Actions.Exit.value: self._position = Positions.Neutral + self._update_total_profit() trade_type = "neutral" self._last_trade_tick = None else: diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 663ecc77e..b6ebcf703 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -75,8 +75,6 @@ class Base5ActionRLEnv(BaseEnvironment): if self._current_tick == self._end_tick: self._done = True - self.update_portfolio_log_returns(action) - self._update_unrealized_total_profit() step_reward = self.calculate_reward(action) self.total_reward += step_reward diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py index 6633bf3e8..3b56fc2c4 100644 --- a/freqtrade/freqai/RL/BaseEnvironment.py +++ b/freqtrade/freqai/RL/BaseEnvironment.py @@ -165,12 +165,12 @@ class BaseEnvironment(gym.Env): if self._position == Positions.Neutral: return 0. elif self._position == Positions.Short: - current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price) / last_trade_price - elif self._position == Positions.Long: current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) + return (last_trade_price - current_price) / last_trade_price + elif self._position == Positions.Long: + current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) return (current_price - last_trade_price) / last_trade_price else: return 0. @@ -210,9 +210,8 @@ class BaseEnvironment(gym.Env): """ An example reward function. This is the one function that users will likely wish to inject their own creativity into. - :params: - action: int = The action made by the agent for the current candle. - :returns: + :param action: int = The action made by the agent for the current candle. + :return: float = the reward to give to the agent for current step (used for optimization of weights in NN) """ @@ -234,7 +233,7 @@ class BaseEnvironment(gym.Env): def _update_total_profit(self): pnl = self.get_unrealized_profit() if self.compound_trades: - # assumes unite stake and compounding + # assumes unitestake and compounding self._total_profit = self._total_profit * (1 + pnl) else: # assumes unit stake and no compounding diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 885918ffb..85756ad8f 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -180,17 +180,12 @@ class BaseReinforcementLearningModel(IFreqaiModel): if self.data_provider._exchange is None: # type: ignore logger.error('No exchange available.') else: - current_value = self.data_provider._exchange.get_rate( # type: ignore + current_rate = self.data_provider._exchange.get_rate( # type: ignore pair, refresh=False, side="exit", is_short=trade.is_short) - openrate = trade.open_rate + now = datetime.now(timezone.utc).timestamp() - trade_duration = int((now - trade.open_date.timestamp()) / self.base_tf_seconds) - if 'long' in str(trade.enter_tag): - market_side = 1 - current_profit = (current_value - openrate) / openrate - else: - market_side = 0 - current_profit = (openrate - current_value) / openrate + trade_duration = int((now - trade.open_date_utc) / self.base_tf_seconds) + current_profit = trade.calc_profit_ratio(current_rate) return market_side, current_profit, int(trade_duration)