diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 00b031e54..574e71857 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -26,23 +26,23 @@ class Positions(Enum): def opposite(self): return Positions.Short if self == Positions.Long else Positions.Long - def mean_over_std(x): std = np.std(x, ddof=1) mean = np.mean(x) return mean / std if std > 0 else 0 - class Base5ActionRLEnv(gym.Env): """ Base class for a 5 action environment """ metadata = {'render.modes': ['human']} - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): + def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, + id: str = 'baseenv-1', seed: int = 1): assert df.ndim == 2 - self.seed() + self.id = id + self.seed(seed) self.df = df self.signal_features = self.df self.prices = prices @@ -73,7 +73,7 @@ class Base5ActionRLEnv(gym.Env): self.history = None self.trade_history = [] - def seed(self, seed=None): + def seed(self, seed: int = 1): self.np_random, seed = seeding.np_random(seed) return [seed] @@ -102,7 +102,7 @@ class Base5ActionRLEnv(gym.Env): return self._get_observation() - def step(self, action): + def step(self, action: int): self._done = False self._current_tick += 1 @@ -191,7 +191,7 @@ class Base5ActionRLEnv(gym.Env): else: return 0. - def is_tradesignal(self, action): + def is_tradesignal(self, action: int): # trade signal """ not trade signal is : @@ -200,29 +200,29 @@ class Base5ActionRLEnv(gym.Env): Action: Short, position: Short -> Hold Short """ return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or + (action == Actions.Neutral.value and self._position == Positions.Short) or + (action == Actions.Neutral.value and self._position == Positions.Long) or (action == Actions.Short_buy.value and self._position == Positions.Short) or - (action == Actions.Short_sell.value and self._position == Positions.Short) or (action == Actions.Short_buy.value and self._position == Positions.Long) or + (action == Actions.Short_sell.value and self._position == Positions.Short) or (action == Actions.Short_sell.value and self._position == Positions.Long) or - + (action == Actions.Short_sell.value and self._position == Positions.Neutral) or (action == Actions.Long_buy.value and self._position == Positions.Long) or - (action == Actions.Long_sell.value and self._position == Positions.Long) or (action == Actions.Long_buy.value and self._position == Positions.Short) or - (action == Actions.Long_sell.value and self._position == Positions.Short)) + (action == Actions.Long_sell.value and self._position == Positions.Long) or + (action == Actions.Long_sell.value and self._position == Positions.Short) or + (action == Actions.Long_sell.value and self._position == Positions.Neutral)) def _is_trade(self, action: Actions): - return ((action == Actions.Long_buy.value and self._position == Positions.Short) or - (action == Actions.Short_buy.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) or - - (action == Actions.Neutral.Short_sell and self._position == Positions.Long) or - (action == Actions.Neutral.Long_sell and self._position == Positions.Short) - ) + return ((action == Actions.Long_buy.value and self._position == Positions.Neutral) or + (action == Actions.Short_buy.value and self._position == Positions.Neutral)) def is_hold(self, action): - return ((action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) + return ((action == Actions.Short_buy.value and self._position == Positions.Short) or + (action == Actions.Long_buy.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Short) or + (action == Actions.Neutral.value and self._position == Positions.Neutral)) def add_buy_fee(self, price): return price * (1 + self.fee) @@ -240,6 +240,52 @@ class Base5ActionRLEnv(gym.Env): def get_sharpe_ratio(self): return mean_over_std(self.get_portfolio_log_returns()) + def calculate_reward(self, action): + + if self._last_trade_tick is None: + return 0. + + # close long + if action == Actions.Long_sell.value and self._position == Positions.Long: + if len(self.close_trade_profit): + # aim x2 rw + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + # less than aim x1 rw + elif self.close_trade_profit[-1] < self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + # # less than RR SL x2 neg rw + # elif self.close_trade_profit[-1] < (self.profit_aim * -1): + # last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + # current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + # return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1 + + + # close short + if action == Actions.Short_buy.value and self._position == Positions.Short: + if len(self.close_trade_profit): + # aim x2 rw + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + # less than aim x1 rw + elif self.close_trade_profit[-1] < self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + # # less than RR SL x2 neg rw + # elif self.close_trade_profit[-1] > self.profit_aim * self.rr: + # last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + # current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + # return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1 + return 0. + + def _update_profit(self, action): # if self._is_trade(action) or self._done: if self._is_trade(action) or self._done: @@ -255,7 +301,7 @@ class Base5ActionRLEnv(gym.Env): self._profits.append((self._current_tick, self._total_profit)) self.close_trade_profit.append(pnl) - def most_recent_return(self, action): + def most_recent_return(self, action: int): """ We support Long, Neutral and Short positions. Return is generated from rising prices in Long @@ -265,7 +311,6 @@ class Base5ActionRLEnv(gym.Env): # Long positions if self._position == Positions.Long: current_price = self.prices.iloc[self._current_tick].open - # if action == Actions.Short.value or action == Actions.Neutral.value: if action == Actions.Short_buy.value or action == Actions.Neutral.value: current_price = self.add_sell_fee(current_price) @@ -280,7 +325,6 @@ class Base5ActionRLEnv(gym.Env): # Short positions if self._position == Positions.Short: current_price = self.prices.iloc[self._current_tick].open - # if action == Actions.Long.value or action == Actions.Neutral.value: if action == Actions.Long_buy.value or action == Actions.Neutral.value: current_price = self.add_buy_fee(current_price) @@ -296,9 +340,6 @@ class Base5ActionRLEnv(gym.Env): def get_portfolio_log_returns(self): return self.portfolio_log_returns[1:self._current_tick + 1] - def get_trading_log_return(self): - return self.portfolio_log_returns[self._start_tick:] - def update_portfolio_log_returns(self, action): self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) @@ -314,37 +355,3 @@ class Base5ActionRLEnv(gym.Env): returns = np.array(self.close_trade_profit) reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) return reward - - def get_bnh_log_return(self): - return np.diff(np.log(self.prices['open'][self._start_tick:])) - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if action == Actions.Long_sell.value and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - - # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if action == Actions.Short_buy.value and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - - return 0.