5ac base fixes in logic

2022-08-17 08:36:10 +03:00
parent 16cec7dfbd
commit 2080ff86ed
1 changed files with 68 additions and 61 deletions
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -26,23 +26,23 @@ class Positions(Enum):
    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long

-
 def mean_over_std(x):
    std = np.std(x, ddof=1)
    mean = np.mean(x)
    return mean / std if std > 0 else 0

-
 class Base5ActionRLEnv(gym.Env):
    """
    Base class for a 5 action environment
    """
    metadata = {'render.modes': ['human']}

-    def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ):
+    def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True,
+                 id: str = 'baseenv-1', seed: int = 1):
        assert df.ndim == 2

-        self.seed()
+        self.id = id
+        self.seed(seed)
        self.df = df
        self.signal_features = self.df
        self.prices = prices
@@ -73,7 +73,7 @@ class Base5ActionRLEnv(gym.Env):
        self.history = None
        self.trade_history = []

-    def seed(self, seed=None):
+    def seed(self, seed: int = 1):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

@@ -102,7 +102,7 @@ class Base5ActionRLEnv(gym.Env):

        return self._get_observation()

-    def step(self, action):
+    def step(self, action: int):
        self._done = False
        self._current_tick += 1

@@ -191,7 +191,7 @@ class Base5ActionRLEnv(gym.Env):
        else:
            return 0.

-    def is_tradesignal(self, action):
+    def is_tradesignal(self, action: int):
        # trade signal
        """
        not trade signal is :
@@ -200,29 +200,29 @@ class Base5ActionRLEnv(gym.Env):
        Action: Short, position: Short -> Hold Short
        """
        return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
+                    (action == Actions.Neutral.value and self._position == Positions.Short) or
+                    (action == Actions.Neutral.value and self._position == Positions.Long) or
                    (action == Actions.Short_buy.value and self._position == Positions.Short) or
-                    (action == Actions.Short_sell.value and self._position == Positions.Short) or
                    (action == Actions.Short_buy.value and self._position == Positions.Long) or
+                    (action == Actions.Short_sell.value and self._position == Positions.Short) or
                    (action == Actions.Short_sell.value and self._position == Positions.Long) or
-
+                    (action == Actions.Short_sell.value and self._position == Positions.Neutral) or
                    (action == Actions.Long_buy.value and self._position == Positions.Long) or
-                    (action == Actions.Long_sell.value and self._position == Positions.Long) or
                    (action == Actions.Long_buy.value and self._position == Positions.Short) or
-                    (action == Actions.Long_sell.value and self._position == Positions.Short))
+                    (action == Actions.Long_sell.value and self._position == Positions.Long) or
+                    (action == Actions.Long_sell.value and self._position == Positions.Short) or
+                    (action == Actions.Long_sell.value and self._position == Positions.Neutral))

    def _is_trade(self, action: Actions):
-        return ((action == Actions.Long_buy.value and self._position == Positions.Short) or
-                (action == Actions.Short_buy.value and self._position == Positions.Long) or
-                (action == Actions.Neutral.value and self._position == Positions.Long) or
-                (action == Actions.Neutral.value and self._position == Positions.Short) or
-
-                (action == Actions.Neutral.Short_sell and self._position == Positions.Long) or
-                (action == Actions.Neutral.Long_sell and self._position == Positions.Short)
-                )
+        return ((action == Actions.Long_buy.value and self._position == Positions.Neutral) or
+                (action == Actions.Short_buy.value and self._position == Positions.Neutral))

    def is_hold(self, action):
-        return ((action == Actions.Short.value and self._position == Positions.Short)
-                or (action == Actions.Long.value and self._position == Positions.Long))
+        return ((action == Actions.Short_buy.value and self._position == Positions.Short) or
+                (action == Actions.Long_buy.value and self._position == Positions.Long) or
+                (action == Actions.Neutral.value and self._position == Positions.Long) or
+                (action == Actions.Neutral.value and self._position == Positions.Short) or
+                (action == Actions.Neutral.value and self._position == Positions.Neutral))

    def add_buy_fee(self, price):
        return price * (1 + self.fee)
@@ -240,6 +240,52 @@ class Base5ActionRLEnv(gym.Env):
    def get_sharpe_ratio(self):
        return mean_over_std(self.get_portfolio_log_returns())

+    def calculate_reward(self, action):
+
+        if self._last_trade_tick is None:
+            return 0.
+
+        # close long
+        if action == Actions.Long_sell.value and self._position == Positions.Long:
+            if len(self.close_trade_profit):
+                # aim x2 rw
+                if self.close_trade_profit[-1] > self.profit_aim * self.rr:
+                    last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
+                    current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
+                    return float((np.log(current_price) - np.log(last_trade_price)) * 2)
+                # less than aim x1 rw
+                elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
+                    last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
+                    current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
+                    return float(np.log(current_price) - np.log(last_trade_price))
+                # # less than RR SL x2 neg rw
+                # elif self.close_trade_profit[-1] < (self.profit_aim * -1):
+                #     last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
+                #     current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
+                #     return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1
+
+
+        # close short
+        if action == Actions.Short_buy.value and self._position == Positions.Short:
+            if len(self.close_trade_profit):
+                # aim x2 rw
+                if self.close_trade_profit[-1] > self.profit_aim * self.rr:
+                    last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
+                    current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
+                    return float((np.log(last_trade_price) - np.log(current_price)) * 2)
+                # less than aim x1 rw
+                elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
+                    last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
+                    current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
+                    return float(np.log(last_trade_price) - np.log(current_price))
+                # # less than RR SL x2 neg rw
+                # elif self.close_trade_profit[-1] > self.profit_aim * self.rr:
+                #     last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
+                #     current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
+                #     return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1
+        return 0.
+
+
    def _update_profit(self, action):
        # if self._is_trade(action) or self._done:
        if self._is_trade(action) or self._done:
@@ -255,7 +301,7 @@ class Base5ActionRLEnv(gym.Env):
                self._profits.append((self._current_tick, self._total_profit))
                self.close_trade_profit.append(pnl)

-    def most_recent_return(self, action):
+    def most_recent_return(self, action: int):
        """
        We support Long, Neutral and Short positions.
        Return is generated from rising prices in Long
@@ -265,7 +311,6 @@ class Base5ActionRLEnv(gym.Env):
        # Long positions
        if self._position == Positions.Long:
            current_price = self.prices.iloc[self._current_tick].open
-            # if action == Actions.Short.value or action == Actions.Neutral.value:
            if action == Actions.Short_buy.value or action == Actions.Neutral.value:
                current_price = self.add_sell_fee(current_price)

@@ -280,7 +325,6 @@ class Base5ActionRLEnv(gym.Env):
        # Short positions
        if self._position == Positions.Short:
            current_price = self.prices.iloc[self._current_tick].open
-            # if action == Actions.Long.value or action == Actions.Neutral.value:
            if action == Actions.Long_buy.value or action == Actions.Neutral.value:
                current_price = self.add_buy_fee(current_price)

@@ -296,9 +340,6 @@ class Base5ActionRLEnv(gym.Env):
    def get_portfolio_log_returns(self):
        return self.portfolio_log_returns[1:self._current_tick + 1]

-    def get_trading_log_return(self):
-        return self.portfolio_log_returns[self._start_tick:]
-
    def update_portfolio_log_returns(self, action):
        self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)

@@ -314,37 +355,3 @@ class Base5ActionRLEnv(gym.Env):
        returns = np.array(self.close_trade_profit)
        reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
        return reward
-
-    def get_bnh_log_return(self):
-        return np.diff(np.log(self.prices['open'][self._start_tick:]))
-
-    def calculate_reward(self, action):
-
-        if self._last_trade_tick is None:
-            return 0.
-
-        # close long
-        if action == Actions.Long_sell.value and self._position == Positions.Long:
-            last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
-            current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
-            return float(np.log(current_price) - np.log(last_trade_price))
-
-        if action == Actions.Long_sell.value and self._position == Positions.Long:
-            if self.close_trade_profit[-1] > self.profit_aim * self.rr:
-                last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
-                current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
-                return float((np.log(current_price) - np.log(last_trade_price)) * 2)
-
-        # close short
-        if action == Actions.Short_buy.value and self._position == Positions.Short:
-            last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
-            current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
-            return float(np.log(last_trade_price) - np.log(current_price))
-
-        if action == Actions.Short_buy.value and self._position == Positions.Short:
-            if self.close_trade_profit[-1] > self.profit_aim * self.rr:
-                last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
-                current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
-                return float((np.log(last_trade_price) - np.log(current_price)) * 2)
-
-        return 0.