5ac base fixes in logic

This commit is contained in:
MukavaValkku 2022-08-17 08:36:10 +03:00 committed by robcaulk
parent 16cec7dfbd
commit 2080ff86ed
1 changed files with 68 additions and 61 deletions

View File

@ -26,23 +26,23 @@ class Positions(Enum):
def opposite(self):
return Positions.Short if self == Positions.Long else Positions.Long
def mean_over_std(x):
std = np.std(x, ddof=1)
mean = np.mean(x)
return mean / std if std > 0 else 0
class Base5ActionRLEnv(gym.Env):
"""
Base class for a 5 action environment
"""
metadata = {'render.modes': ['human']}
def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ):
def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1):
assert df.ndim == 2
self.seed()
self.id = id
self.seed(seed)
self.df = df
self.signal_features = self.df
self.prices = prices
@ -73,7 +73,7 @@ class Base5ActionRLEnv(gym.Env):
self.history = None
self.trade_history = []
def seed(self, seed=None):
def seed(self, seed: int = 1):
self.np_random, seed = seeding.np_random(seed)
return [seed]
@ -102,7 +102,7 @@ class Base5ActionRLEnv(gym.Env):
return self._get_observation()
def step(self, action):
def step(self, action: int):
self._done = False
self._current_tick += 1
@ -191,7 +191,7 @@ class Base5ActionRLEnv(gym.Env):
else:
return 0.
def is_tradesignal(self, action):
def is_tradesignal(self, action: int):
# trade signal
"""
not trade signal is :
@ -200,29 +200,29 @@ class Base5ActionRLEnv(gym.Env):
Action: Short, position: Short -> Hold Short
"""
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Short_buy.value and self._position == Positions.Short) or
(action == Actions.Short_sell.value and self._position == Positions.Short) or
(action == Actions.Short_buy.value and self._position == Positions.Long) or
(action == Actions.Short_sell.value and self._position == Positions.Short) or
(action == Actions.Short_sell.value and self._position == Positions.Long) or
(action == Actions.Short_sell.value and self._position == Positions.Neutral) or
(action == Actions.Long_buy.value and self._position == Positions.Long) or
(action == Actions.Long_sell.value and self._position == Positions.Long) or
(action == Actions.Long_buy.value and self._position == Positions.Short) or
(action == Actions.Long_sell.value and self._position == Positions.Short))
(action == Actions.Long_sell.value and self._position == Positions.Long) or
(action == Actions.Long_sell.value and self._position == Positions.Short) or
(action == Actions.Long_sell.value and self._position == Positions.Neutral))
def _is_trade(self, action: Actions):
return ((action == Actions.Long_buy.value and self._position == Positions.Short) or
(action == Actions.Short_buy.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.Short_sell and self._position == Positions.Long) or
(action == Actions.Neutral.Long_sell and self._position == Positions.Short)
)
return ((action == Actions.Long_buy.value and self._position == Positions.Neutral) or
(action == Actions.Short_buy.value and self._position == Positions.Neutral))
def is_hold(self, action):
return ((action == Actions.Short.value and self._position == Positions.Short)
or (action == Actions.Long.value and self._position == Positions.Long))
return ((action == Actions.Short_buy.value and self._position == Positions.Short) or
(action == Actions.Long_buy.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Neutral))
def add_buy_fee(self, price):
return price * (1 + self.fee)
@ -240,6 +240,52 @@ class Base5ActionRLEnv(gym.Env):
def get_sharpe_ratio(self):
return mean_over_std(self.get_portfolio_log_returns())
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if action == Actions.Long_sell.value and self._position == Positions.Long:
if len(self.close_trade_profit):
# aim x2 rw
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# less than aim x1 rw
elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
# # less than RR SL x2 neg rw
# elif self.close_trade_profit[-1] < (self.profit_aim * -1):
# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
# return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1
# close short
if action == Actions.Short_buy.value and self._position == Positions.Short:
if len(self.close_trade_profit):
# aim x2 rw
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
# less than aim x1 rw
elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
# # less than RR SL x2 neg rw
# elif self.close_trade_profit[-1] > self.profit_aim * self.rr:
# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
# return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1
return 0.
def _update_profit(self, action):
# if self._is_trade(action) or self._done:
if self._is_trade(action) or self._done:
@ -255,7 +301,7 @@ class Base5ActionRLEnv(gym.Env):
self._profits.append((self._current_tick, self._total_profit))
self.close_trade_profit.append(pnl)
def most_recent_return(self, action):
def most_recent_return(self, action: int):
"""
We support Long, Neutral and Short positions.
Return is generated from rising prices in Long
@ -265,7 +311,6 @@ class Base5ActionRLEnv(gym.Env):
# Long positions
if self._position == Positions.Long:
current_price = self.prices.iloc[self._current_tick].open
# if action == Actions.Short.value or action == Actions.Neutral.value:
if action == Actions.Short_buy.value or action == Actions.Neutral.value:
current_price = self.add_sell_fee(current_price)
@ -280,7 +325,6 @@ class Base5ActionRLEnv(gym.Env):
# Short positions
if self._position == Positions.Short:
current_price = self.prices.iloc[self._current_tick].open
# if action == Actions.Long.value or action == Actions.Neutral.value:
if action == Actions.Long_buy.value or action == Actions.Neutral.value:
current_price = self.add_buy_fee(current_price)
@ -296,9 +340,6 @@ class Base5ActionRLEnv(gym.Env):
def get_portfolio_log_returns(self):
return self.portfolio_log_returns[1:self._current_tick + 1]
def get_trading_log_return(self):
return self.portfolio_log_returns[self._start_tick:]
def update_portfolio_log_returns(self, action):
self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
@ -314,37 +355,3 @@ class Base5ActionRLEnv(gym.Env):
returns = np.array(self.close_trade_profit)
reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
return reward
def get_bnh_log_return(self):
return np.diff(np.log(self.prices['open'][self._start_tick:]))
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if action == Actions.Long_sell.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_sell.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# close short
if action == Actions.Short_buy.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_buy.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
return 0.