refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling
This commit is contained in:
		| @@ -1,14 +1,11 @@ | ||||
| import logging | ||||
| from enum import Enum | ||||
| from typing import Optional | ||||
|  | ||||
| import gym | ||||
| import numpy as np | ||||
| from gym import spaces | ||||
| from gym.utils import seeding | ||||
| from pandas import DataFrame | ||||
| import pandas as pd | ||||
| from abc import abstractmethod | ||||
|  | ||||
| from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @@ -19,95 +16,13 @@ class Actions(Enum): | ||||
|     Short_enter = 3 | ||||
|  | ||||
|  | ||||
|  | ||||
| class Positions(Enum): | ||||
|     Short = 0 | ||||
|     Long = 1 | ||||
|     Neutral = 0.5 | ||||
|  | ||||
|     def opposite(self): | ||||
|         return Positions.Short if self == Positions.Long else Positions.Long | ||||
|  | ||||
|  | ||||
| def mean_over_std(x): | ||||
|     std = np.std(x, ddof=1) | ||||
|     mean = np.mean(x) | ||||
|     return mean / std if std > 0 else 0 | ||||
|  | ||||
|  | ||||
| class Base4ActionRLEnv(gym.Env): | ||||
| class Base4ActionRLEnv(BaseEnvironment): | ||||
|     """ | ||||
|     Base class for a 5 action environment | ||||
|     Base class for a 4 action environment | ||||
|     """ | ||||
|     metadata = {'render.modes': ['human']} | ||||
|  | ||||
|     def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), | ||||
|                  reward_kwargs: dict = {}, window_size=10, starting_point=True, | ||||
|                  id: str = 'baseenv-1', seed: int = 1, config: dict = {}): | ||||
|  | ||||
|         self.rl_config = config['freqai']['rl_config'] | ||||
|         self.id = id | ||||
|         self.seed(seed) | ||||
|         self.reset_env(df, prices, window_size, reward_kwargs, starting_point) | ||||
|  | ||||
|     def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, | ||||
|                   reward_kwargs: dict, starting_point=True): | ||||
|         self.df = df | ||||
|         self.signal_features = self.df | ||||
|         self.prices = prices | ||||
|         self.window_size = window_size | ||||
|         self.starting_point = starting_point | ||||
|         self.rr = reward_kwargs["rr"] | ||||
|         self.profit_aim = reward_kwargs["profit_aim"] | ||||
|  | ||||
|         self.fee = 0.0015 | ||||
|  | ||||
|         # # spaces | ||||
|         self.shape = (window_size, self.signal_features.shape[1] + 3) | ||||
|     def set_action_space(self): | ||||
|         self.action_space = spaces.Discrete(len(Actions)) | ||||
|         self.observation_space = spaces.Box( | ||||
|             low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) | ||||
|  | ||||
|         # episode | ||||
|         self._start_tick: int = self.window_size | ||||
|         self._end_tick: int = len(self.prices) - 1 | ||||
|         self._done: bool = False | ||||
|         self._current_tick: int = self._start_tick | ||||
|         self._last_trade_tick: Optional[int] = None | ||||
|         self._position = Positions.Neutral | ||||
|         self._position_history: list = [None] | ||||
|         self.total_reward: float = 0 | ||||
|         self._total_profit: float = 1 | ||||
|         self.history: dict = {} | ||||
|         self.trade_history: list = [] | ||||
|  | ||||
|     def seed(self, seed: int = 1): | ||||
|         self.np_random, seed = seeding.np_random(seed) | ||||
|         return [seed] | ||||
|  | ||||
|     def reset(self): | ||||
|  | ||||
|         self._done = False | ||||
|  | ||||
|         if self.starting_point is True: | ||||
|             self._position_history = (self._start_tick * [None]) + [self._position] | ||||
|         else: | ||||
|             self._position_history = (self.window_size * [None]) + [self._position] | ||||
|  | ||||
|         self._current_tick = self._start_tick | ||||
|         self._last_trade_tick = None | ||||
|         self._position = Positions.Neutral | ||||
|  | ||||
|         self.total_reward = 0. | ||||
|         self._total_profit = 1.  # unit | ||||
|         self.history = {} | ||||
|         self.trade_history = [] | ||||
|         self.portfolio_log_returns = np.zeros(len(self.prices)) | ||||
|  | ||||
|         self._profits = [(self._start_tick, 1)] | ||||
|         self.close_trade_profit = [] | ||||
|  | ||||
|         return self._get_observation() | ||||
|  | ||||
|     def step(self, action: int): | ||||
|         self._done = False | ||||
| @@ -181,43 +96,6 @@ class Base4ActionRLEnv(gym.Env): | ||||
|  | ||||
|         return observation, step_reward, self._done, info | ||||
|  | ||||
|     def _get_observation(self): | ||||
|         features_window = self.signal_features[( | ||||
|             self._current_tick - self.window_size):self._current_tick] | ||||
|         features_and_state = DataFrame(np.zeros((len(features_window), 3)), | ||||
|                                        columns=['current_profit_pct', 'position', 'trade_duration'], | ||||
|                                        index=features_window.index) | ||||
|  | ||||
|         features_and_state['current_profit_pct'] = self.get_unrealized_profit() | ||||
|         features_and_state['position'] = self._position.value | ||||
|         features_and_state['trade_duration'] = self.get_trade_duration() | ||||
|         features_and_state = pd.concat([features_window, features_and_state], axis=1) | ||||
|         return features_and_state | ||||
|  | ||||
|     def get_trade_duration(self): | ||||
|         if self._last_trade_tick is None: | ||||
|             return 0 | ||||
|         else: | ||||
|             return self._current_tick - self._last_trade_tick | ||||
|  | ||||
|     def get_unrealized_profit(self): | ||||
|  | ||||
|         if self._last_trade_tick is None: | ||||
|             return 0. | ||||
|  | ||||
|         if self._position == Positions.Neutral: | ||||
|             return 0. | ||||
|         elif self._position == Positions.Short: | ||||
|             current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (last_trade_price - current_price) / last_trade_price | ||||
|         elif self._position == Positions.Long: | ||||
|             current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (current_price - last_trade_price) / last_trade_price | ||||
|         else: | ||||
|             return 0. | ||||
|  | ||||
|     def is_tradesignal(self, action: int): | ||||
|         # trade signal | ||||
|         """ | ||||
| @@ -228,7 +106,7 @@ class Base4ActionRLEnv(gym.Env): | ||||
|                     (action == Actions.Neutral.value and self._position == Positions.Short) or | ||||
|                     (action == Actions.Neutral.value and self._position == Positions.Long) or | ||||
|                     (action == Actions.Short_enter.value and self._position == Positions.Short) or | ||||
|                     (action == Actions.Short_enter.value and self._position == Positions.Long) or                  | ||||
|                     (action == Actions.Short_enter.value and self._position == Positions.Long) or | ||||
|                     (action == Actions.Exit.value and self._position == Positions.Neutral) or | ||||
|                     (action == Actions.Long_enter.value and self._position == Positions.Long) or | ||||
|                     (action == Actions.Long_enter.value and self._position == Positions.Short)) | ||||
| @@ -240,7 +118,7 @@ class Base4ActionRLEnv(gym.Env): | ||||
|         e.g.: agent wants a Actions.Long_exit while it is in a Positions.short | ||||
|         """ | ||||
|         # Agent should only try to exit if it is in position | ||||
|         if action in (Actions.Exit.value): | ||||
|         if action == Actions.Exit.value: | ||||
|             if self._position not in (Positions.Short, Positions.Long): | ||||
|                 return False | ||||
|  | ||||
| @@ -250,97 +128,3 @@ class Base4ActionRLEnv(gym.Env): | ||||
|                 return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _is_trade(self, action: Actions): | ||||
|         return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or | ||||
|                 (action == Actions.Short_enter.value and self._position == Positions.Neutral)) | ||||
|  | ||||
|     def is_hold(self, action): | ||||
|         return ((action == Actions.Short_enter.value and self._position == Positions.Short) or | ||||
|                 (action == Actions.Long_enter.value and self._position == Positions.Long) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Long) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Short) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Neutral)) | ||||
|  | ||||
|     def add_entry_fee(self, price): | ||||
|         return price * (1 + self.fee) | ||||
|  | ||||
|     def add_exit_fee(self, price): | ||||
|         return price / (1 + self.fee) | ||||
|  | ||||
|     def _update_history(self, info): | ||||
|         if not self.history: | ||||
|             self.history = {key: [] for key in info.keys()} | ||||
|  | ||||
|         for key, value in info.items(): | ||||
|             self.history[key].append(value) | ||||
|  | ||||
|     def get_sharpe_ratio(self): | ||||
|         return mean_over_std(self.get_portfolio_log_returns()) | ||||
|  | ||||
|     @abstractmethod | ||||
|     def calculate_reward(self, action): | ||||
|         """ | ||||
|         Reward is created by BaseReinforcementLearningModel and can | ||||
|         be inherited/edited by the user made ReinforcementLearner file. | ||||
|         """ | ||||
|  | ||||
|         return 0. | ||||
|  | ||||
|     def _update_profit(self, action): | ||||
|         if self._is_trade(action) or self._done: | ||||
|             pnl = self.get_unrealized_profit() | ||||
|  | ||||
|             if self._position in (Positions.Long, Positions.Short): | ||||
|                 self._total_profit *= (1 + pnl) | ||||
|                 self._profits.append((self._current_tick, self._total_profit)) | ||||
|                 self.close_trade_profit.append(pnl) | ||||
|  | ||||
|     def most_recent_return(self, action: int): | ||||
|         """ | ||||
|         Calculate the tick to tick return if in a trade. | ||||
|         Return is generated from rising prices in Long | ||||
|         and falling prices in Short positions. | ||||
|         The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. | ||||
|         """ | ||||
|         # Long positions | ||||
|         if self._position == Positions.Long: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Short | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_entry_fee(previous_price) | ||||
|  | ||||
|             return np.log(current_price) - np.log(previous_price) | ||||
|  | ||||
|         # Short positions | ||||
|         if self._position == Positions.Short: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Long | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_exit_fee(previous_price) | ||||
|  | ||||
|             return np.log(previous_price) - np.log(current_price) | ||||
|  | ||||
|         return 0 | ||||
|  | ||||
|     def get_portfolio_log_returns(self): | ||||
|         return self.portfolio_log_returns[1:self._current_tick + 1] | ||||
|  | ||||
|     def update_portfolio_log_returns(self, action): | ||||
|         self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) | ||||
|  | ||||
|     def current_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick].open | ||||
|  | ||||
|     def prev_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|     def sharpe_ratio(self): | ||||
|         if len(self.close_trade_profit) == 0: | ||||
|             return 0. | ||||
|         returns = np.array(self.close_trade_profit) | ||||
|         reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) | ||||
|         return reward | ||||
|   | ||||
| @@ -1,14 +1,14 @@ | ||||
| import logging | ||||
| from enum import Enum | ||||
| from typing import Optional | ||||
|  | ||||
| import gym | ||||
| import numpy as np | ||||
| from gym import spaces | ||||
| from gym.utils import seeding | ||||
| from pandas import DataFrame | ||||
| import pandas as pd | ||||
| from abc import abstractmethod | ||||
| from gym import spaces | ||||
| from pandas import DataFrame | ||||
|  | ||||
| from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @@ -20,70 +20,19 @@ class Actions(Enum): | ||||
|     Short_exit = 4 | ||||
|  | ||||
|  | ||||
| class Positions(Enum): | ||||
|     Short = 0 | ||||
|     Long = 1 | ||||
|     Neutral = 0.5 | ||||
|  | ||||
|     def opposite(self): | ||||
|         return Positions.Short if self == Positions.Long else Positions.Long | ||||
|  | ||||
|  | ||||
| def mean_over_std(x): | ||||
|     std = np.std(x, ddof=1) | ||||
|     mean = np.mean(x) | ||||
|     return mean / std if std > 0 else 0 | ||||
|  | ||||
|  | ||||
| class Base5ActionRLEnv(gym.Env): | ||||
| class Base5ActionRLEnv(BaseEnvironment): | ||||
|     """ | ||||
|     Base class for a 5 action environment | ||||
|     """ | ||||
|     metadata = {'render.modes': ['human']} | ||||
|  | ||||
|     def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), | ||||
|                  reward_kwargs: dict = {}, window_size=10, starting_point=True, | ||||
|                  id: str = 'baseenv-1', seed: int = 1, config: dict = {}): | ||||
|  | ||||
|         self.rl_config = config['freqai']['rl_config'] | ||||
|         self.id = id | ||||
|         self.seed(seed) | ||||
|         self.reset_env(df, prices, window_size, reward_kwargs, starting_point) | ||||
|  | ||||
|     def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, | ||||
|                   reward_kwargs: dict, starting_point=True): | ||||
|         self.df = df | ||||
|         self.signal_features = self.df | ||||
|         self.prices = prices | ||||
|         self.window_size = window_size | ||||
|         self.starting_point = starting_point | ||||
|         self.rr = reward_kwargs["rr"] | ||||
|         self.profit_aim = reward_kwargs["profit_aim"] | ||||
|  | ||||
|         self.fee = 0.0015 | ||||
|  | ||||
|         # # spaces | ||||
|         self.shape = (window_size, self.signal_features.shape[1] + 3) | ||||
|     def set_action_space(self): | ||||
|         self.action_space = spaces.Discrete(len(Actions)) | ||||
|         self.observation_space = spaces.Box( | ||||
|             low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) | ||||
|  | ||||
|         # episode | ||||
|         self._start_tick: int = self.window_size | ||||
|         self._end_tick: int = len(self.prices) - 1 | ||||
|         self._done: bool = False | ||||
|         self._current_tick: int = self._start_tick | ||||
|         self._last_trade_tick: Optional[int] = None | ||||
|         self._position = Positions.Neutral | ||||
|         self._position_history: list = [None] | ||||
|         self.total_reward: float = 0 | ||||
|         self._total_profit: float = 1 | ||||
|         self.history: dict = {} | ||||
|         self.trade_history: list = [] | ||||
|  | ||||
|     def seed(self, seed: int = 1): | ||||
|         self.np_random, seed = seeding.np_random(seed) | ||||
|         return [seed] | ||||
|  | ||||
|     def reset(self): | ||||
|  | ||||
| @@ -106,6 +55,7 @@ class Base5ActionRLEnv(gym.Env): | ||||
|  | ||||
|         self._profits = [(self._start_tick, 1)] | ||||
|         self.close_trade_profit = [] | ||||
|         self._total_unrealized_profit = 1 | ||||
|  | ||||
|         return self._get_observation() | ||||
|  | ||||
| @@ -118,7 +68,7 @@ class Base5ActionRLEnv(gym.Env): | ||||
|  | ||||
|         self.update_portfolio_log_returns(action) | ||||
|  | ||||
|         self._update_profit(action) | ||||
|         self._update_unrealized_total_profit() | ||||
|         step_reward = self.calculate_reward(action) | ||||
|         self.total_reward += step_reward | ||||
|  | ||||
| @@ -148,10 +98,12 @@ class Base5ActionRLEnv(gym.Env): | ||||
|                 trade_type = "short" | ||||
|                 self._last_trade_tick = self._current_tick | ||||
|             elif action == Actions.Long_exit.value: | ||||
|                 self._update_total_profit() | ||||
|                 self._position = Positions.Neutral | ||||
|                 trade_type = "neutral" | ||||
|                 self._last_trade_tick = None | ||||
|             elif action == Actions.Short_exit.value: | ||||
|                 self._update_total_profit() | ||||
|                 self._position = Positions.Neutral | ||||
|                 trade_type = "neutral" | ||||
|                 self._last_trade_tick = None | ||||
| @@ -163,7 +115,8 @@ class Base5ActionRLEnv(gym.Env): | ||||
|                     {'price': self.current_price(), 'index': self._current_tick, | ||||
|                      'type': trade_type}) | ||||
|  | ||||
|         if self._total_profit < 1 - self.rl_config.get('max_training_drawdown_pct', 0.8): | ||||
|         if (self._total_profit < self.max_drawdown or | ||||
|                 self._total_unrealized_profit < self.max_drawdown): | ||||
|             self._done = True | ||||
|  | ||||
|         self._position_history.append(self._position) | ||||
| @@ -200,24 +153,6 @@ class Base5ActionRLEnv(gym.Env): | ||||
|         else: | ||||
|             return self._current_tick - self._last_trade_tick | ||||
|  | ||||
|     def get_unrealized_profit(self): | ||||
|  | ||||
|         if self._last_trade_tick is None: | ||||
|             return 0. | ||||
|  | ||||
|         if self._position == Positions.Neutral: | ||||
|             return 0. | ||||
|         elif self._position == Positions.Short: | ||||
|             current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (last_trade_price - current_price) / last_trade_price | ||||
|         elif self._position == Positions.Long: | ||||
|             current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (current_price - last_trade_price) / last_trade_price | ||||
|         else: | ||||
|             return 0. | ||||
|  | ||||
|     def is_tradesignal(self, action: int): | ||||
|         # trade signal | ||||
|         """ | ||||
| @@ -253,97 +188,3 @@ class Base5ActionRLEnv(gym.Env): | ||||
|                 return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _is_trade(self, action: Actions): | ||||
|         return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or | ||||
|                 (action == Actions.Short_enter.value and self._position == Positions.Neutral)) | ||||
|  | ||||
|     def is_hold(self, action): | ||||
|         return ((action == Actions.Short_enter.value and self._position == Positions.Short) or | ||||
|                 (action == Actions.Long_enter.value and self._position == Positions.Long) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Long) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Short) or | ||||
|                 (action == Actions.Neutral.value and self._position == Positions.Neutral)) | ||||
|  | ||||
|     def add_entry_fee(self, price): | ||||
|         return price * (1 + self.fee) | ||||
|  | ||||
|     def add_exit_fee(self, price): | ||||
|         return price / (1 + self.fee) | ||||
|  | ||||
|     def _update_history(self, info): | ||||
|         if not self.history: | ||||
|             self.history = {key: [] for key in info.keys()} | ||||
|  | ||||
|         for key, value in info.items(): | ||||
|             self.history[key].append(value) | ||||
|  | ||||
|     def get_sharpe_ratio(self): | ||||
|         return mean_over_std(self.get_portfolio_log_returns()) | ||||
|  | ||||
|     @abstractmethod | ||||
|     def calculate_reward(self, action): | ||||
|         """ | ||||
|         Reward is created by BaseReinforcementLearningModel and can | ||||
|         be inherited/edited by the user made ReinforcementLearner file. | ||||
|         """ | ||||
|  | ||||
|         return 0. | ||||
|  | ||||
|     def _update_profit(self, action): | ||||
|         if self._is_trade(action) or self._done: | ||||
|             pnl = self.get_unrealized_profit() | ||||
|  | ||||
|             if self._position in (Positions.Long, Positions.Short): | ||||
|                 self._total_profit *= (1 + pnl) | ||||
|                 self._profits.append((self._current_tick, self._total_profit)) | ||||
|                 self.close_trade_profit.append(pnl) | ||||
|  | ||||
|     def most_recent_return(self, action: int): | ||||
|         """ | ||||
|         Calculate the tick to tick return if in a trade. | ||||
|         Return is generated from rising prices in Long | ||||
|         and falling prices in Short positions. | ||||
|         The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. | ||||
|         """ | ||||
|         # Long positions | ||||
|         if self._position == Positions.Long: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Short | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_entry_fee(previous_price) | ||||
|  | ||||
|             return np.log(current_price) - np.log(previous_price) | ||||
|  | ||||
|         # Short positions | ||||
|         if self._position == Positions.Short: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Long | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_exit_fee(previous_price) | ||||
|  | ||||
|             return np.log(previous_price) - np.log(current_price) | ||||
|  | ||||
|         return 0 | ||||
|  | ||||
|     def get_portfolio_log_returns(self): | ||||
|         return self.portfolio_log_returns[1:self._current_tick + 1] | ||||
|  | ||||
|     def update_portfolio_log_returns(self, action): | ||||
|         self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) | ||||
|  | ||||
|     def current_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick].open | ||||
|  | ||||
|     def prev_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|     def sharpe_ratio(self): | ||||
|         if len(self.close_trade_profit) == 0: | ||||
|             return 0. | ||||
|         returns = np.array(self.close_trade_profit) | ||||
|         reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) | ||||
|         return reward | ||||
|   | ||||
							
								
								
									
										270
									
								
								freqtrade/freqai/RL/BaseEnvironment.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										270
									
								
								freqtrade/freqai/RL/BaseEnvironment.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,270 @@ | ||||
| import logging | ||||
| from abc import abstractmethod | ||||
| from enum import Enum | ||||
| from typing import Optional | ||||
|  | ||||
| import gym | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
| from gym import spaces | ||||
| from gym.utils import seeding | ||||
| from pandas import DataFrame | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class Positions(Enum): | ||||
|     Short = 0 | ||||
|     Long = 1 | ||||
|     Neutral = 0.5 | ||||
|  | ||||
|     def opposite(self): | ||||
|         return Positions.Short if self == Positions.Long else Positions.Long | ||||
|  | ||||
|  | ||||
| class BaseEnvironment(gym.Env): | ||||
|     """ | ||||
|     Base class for environments. This class is agnostic to action count. | ||||
|     Inherited classes customize this to include varying action counts/types, | ||||
|     See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), | ||||
|                  reward_kwargs: dict = {}, window_size=10, starting_point=True, | ||||
|                  id: str = 'baseenv-1', seed: int = 1, config: dict = {}): | ||||
|  | ||||
|         self.rl_config = config['freqai']['rl_config'] | ||||
|         self.id = id | ||||
|         self.seed(seed) | ||||
|         self.reset_env(df, prices, window_size, reward_kwargs, starting_point) | ||||
|         self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) | ||||
|         self.compound_trades = config['stake_amount'] == 'unlimited' | ||||
|  | ||||
|     def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, | ||||
|                   reward_kwargs: dict, starting_point=True): | ||||
|         self.df = df | ||||
|         self.signal_features = self.df | ||||
|         self.prices = prices | ||||
|         self.window_size = window_size | ||||
|         self.starting_point = starting_point | ||||
|         self.rr = reward_kwargs["rr"] | ||||
|         self.profit_aim = reward_kwargs["profit_aim"] | ||||
|  | ||||
|         self.fee = 0.0015 | ||||
|  | ||||
|         # # spaces | ||||
|         self.shape = (window_size, self.signal_features.shape[1] + 3) | ||||
|         self.set_action_space() | ||||
|         self.observation_space = spaces.Box( | ||||
|             low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) | ||||
|  | ||||
|         # episode | ||||
|         self._start_tick: int = self.window_size | ||||
|         self._end_tick: int = len(self.prices) - 1 | ||||
|         self._done: bool = False | ||||
|         self._current_tick: int = self._start_tick | ||||
|         self._last_trade_tick: Optional[int] = None | ||||
|         self._position = Positions.Neutral | ||||
|         self._position_history: list = [None] | ||||
|         self.total_reward: float = 0 | ||||
|         self._total_profit: float = 1 | ||||
|         self._total_unrealized_profit: float = 1 | ||||
|         self.history: dict = {} | ||||
|         self.trade_history: list = [] | ||||
|  | ||||
|     @abstractmethod | ||||
|     def set_action_space(self): | ||||
|         """ | ||||
|         Unique to the environment action count. Must be inherited. | ||||
|         """ | ||||
|  | ||||
|     def seed(self, seed: int = 1): | ||||
|         self.np_random, seed = seeding.np_random(seed) | ||||
|         return [seed] | ||||
|  | ||||
|     def reset(self): | ||||
|  | ||||
|         self._done = False | ||||
|  | ||||
|         if self.starting_point is True: | ||||
|             self._position_history = (self._start_tick * [None]) + [self._position] | ||||
|         else: | ||||
|             self._position_history = (self.window_size * [None]) + [self._position] | ||||
|  | ||||
|         self._current_tick = self._start_tick | ||||
|         self._last_trade_tick = None | ||||
|         self._position = Positions.Neutral | ||||
|  | ||||
|         self.total_reward = 0. | ||||
|         self._total_profit = 1.  # unit | ||||
|         self.history = {} | ||||
|         self.trade_history = [] | ||||
|         self.portfolio_log_returns = np.zeros(len(self.prices)) | ||||
|  | ||||
|         self._profits = [(self._start_tick, 1)] | ||||
|         self.close_trade_profit = [] | ||||
|         self._total_unrealized_profit = 1 | ||||
|  | ||||
|         return self._get_observation() | ||||
|  | ||||
|     @abstractmethod | ||||
|     def step(self, action: int): | ||||
|         """ | ||||
|         Step depeneds on action types, this must be inherited. | ||||
|         """ | ||||
|         return | ||||
|  | ||||
|     def _get_observation(self): | ||||
|         """ | ||||
|         This may or may not be independent of action types, user can inherit | ||||
|         this in their custom "MyRLEnv" | ||||
|         """ | ||||
|         features_window = self.signal_features[( | ||||
|             self._current_tick - self.window_size):self._current_tick] | ||||
|         features_and_state = DataFrame(np.zeros((len(features_window), 3)), | ||||
|                                        columns=['current_profit_pct', 'position', 'trade_duration'], | ||||
|                                        index=features_window.index) | ||||
|  | ||||
|         features_and_state['current_profit_pct'] = self.get_unrealized_profit() | ||||
|         features_and_state['position'] = self._position.value | ||||
|         features_and_state['trade_duration'] = self.get_trade_duration() | ||||
|         features_and_state = pd.concat([features_window, features_and_state], axis=1) | ||||
|         return features_and_state | ||||
|  | ||||
|     def get_trade_duration(self): | ||||
|         if self._last_trade_tick is None: | ||||
|             return 0 | ||||
|         else: | ||||
|             return self._current_tick - self._last_trade_tick | ||||
|  | ||||
|     def get_unrealized_profit(self): | ||||
|  | ||||
|         if self._last_trade_tick is None: | ||||
|             return 0. | ||||
|  | ||||
|         if self._position == Positions.Neutral: | ||||
|             return 0. | ||||
|         elif self._position == Positions.Short: | ||||
|             current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (last_trade_price - current_price) / last_trade_price | ||||
|         elif self._position == Positions.Long: | ||||
|             current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) | ||||
|             last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) | ||||
|             return (current_price - last_trade_price) / last_trade_price | ||||
|         else: | ||||
|             return 0. | ||||
|  | ||||
|     @abstractmethod | ||||
|     def is_tradesignal(self, action: int): | ||||
|         # trade signal | ||||
|         """ | ||||
|         Determine if the signal is a trade signal. This is | ||||
|         unique to the actions in the environment, and therefore must be | ||||
|         inherited. | ||||
|         """ | ||||
|         return | ||||
|  | ||||
|     def _is_valid(self, action: int): | ||||
|         # trade signal | ||||
|         """ | ||||
|         Determine if the signal is valid.This is | ||||
|         unique to the actions in the environment, and therefore must be | ||||
|         inherited. | ||||
|         """ | ||||
|         return | ||||
|  | ||||
|     def add_entry_fee(self, price): | ||||
|         return price * (1 + self.fee) | ||||
|  | ||||
|     def add_exit_fee(self, price): | ||||
|         return price / (1 + self.fee) | ||||
|  | ||||
|     def _update_history(self, info): | ||||
|         if not self.history: | ||||
|             self.history = {key: [] for key in info.keys()} | ||||
|  | ||||
|         for key, value in info.items(): | ||||
|             self.history[key].append(value) | ||||
|  | ||||
|     @abstractmethod | ||||
|     def calculate_reward(self, action): | ||||
|         """ | ||||
|         Reward is created by BaseReinforcementLearningModel and can | ||||
|         be inherited/edited by the user made ReinforcementLearner file. | ||||
|         """ | ||||
|  | ||||
|         return 0. | ||||
|  | ||||
|     def _update_unrealized_total_profit(self): | ||||
|         """ | ||||
|         Update the unrealized total profit incase of episode end. | ||||
|         """ | ||||
|         if self._position in (Positions.Long, Positions.Short): | ||||
|             pnl = self.get_unrealized_profit() | ||||
|             if self.compound_trades: | ||||
|                 # assumes unit stake and compounding | ||||
|                 unrl_profit = self._total_profit * (1 + pnl) | ||||
|             else: | ||||
|                 # assumes unit stake and no compounding | ||||
|                 unrl_profit = self._total_profit + pnl | ||||
|             self._total_unrealized_profit = unrl_profit | ||||
|  | ||||
|     def _update_total_profit(self): | ||||
|         pnl = self.get_unrealized_profit() | ||||
|         if self.compound_trades: | ||||
|             # assumes unite stake and compounding | ||||
|             self._total_profit = self._total_profit * (1 + pnl) | ||||
|         else: | ||||
|             # assumes unit stake and no compounding | ||||
|             self._total_profit += pnl | ||||
|  | ||||
|     def most_recent_return(self, action: int): | ||||
|         """ | ||||
|         Calculate the tick to tick return if in a trade. | ||||
|         Return is generated from rising prices in Long | ||||
|         and falling prices in Short positions. | ||||
|         The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. | ||||
|         """ | ||||
|         # Long positions | ||||
|         if self._position == Positions.Long: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Short | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_entry_fee(previous_price) | ||||
|  | ||||
|             return np.log(current_price) - np.log(previous_price) | ||||
|  | ||||
|         # Short positions | ||||
|         if self._position == Positions.Short: | ||||
|             current_price = self.prices.iloc[self._current_tick].open | ||||
|             previous_price = self.prices.iloc[self._current_tick - 1].open | ||||
|             if (self._position_history[self._current_tick - 1] == Positions.Long | ||||
|                     or self._position_history[self._current_tick - 1] == Positions.Neutral): | ||||
|                 previous_price = self.add_exit_fee(previous_price) | ||||
|  | ||||
|             return np.log(previous_price) - np.log(current_price) | ||||
|  | ||||
|         return 0 | ||||
|  | ||||
|     def get_portfolio_log_returns(self): | ||||
|         return self.portfolio_log_returns[1:self._current_tick + 1] | ||||
|  | ||||
|     def update_portfolio_log_returns(self, action): | ||||
|         self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) | ||||
|  | ||||
|     def current_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick].open | ||||
|  | ||||
|     def prev_price(self) -> float: | ||||
|         return self.prices.iloc[self._current_tick - 1].open | ||||
|  | ||||
|     def sharpe_ratio(self): | ||||
|         if len(self.close_trade_profit) == 0: | ||||
|             return 0. | ||||
|         returns = np.array(self.close_trade_profit) | ||||
|         reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) | ||||
|         return reward | ||||
| @@ -1,25 +1,28 @@ | ||||
| import logging | ||||
| from typing import Any, Dict, Tuple | ||||
| from abc import abstractmethod | ||||
| from datetime import datetime, timezone | ||||
| from pathlib import Path | ||||
| from typing import Any, Callable, Dict, Tuple | ||||
|  | ||||
| import gym | ||||
| import numpy as np | ||||
| import numpy.typing as npt | ||||
| import pandas as pd | ||||
| import torch as th | ||||
| import torch.multiprocessing | ||||
| from pandas import DataFrame | ||||
| from abc import abstractmethod | ||||
| from stable_baselines3.common.callbacks import EvalCallback | ||||
| from stable_baselines3.common.monitor import Monitor | ||||
| from stable_baselines3.common.utils import set_random_seed | ||||
|  | ||||
| from freqtrade.exceptions import OperationalException | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.freqai_interface import IFreqaiModel | ||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions | ||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv | ||||
| from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions | ||||
| from freqtrade.persistence import Trade | ||||
| import torch.multiprocessing | ||||
| from stable_baselines3.common.callbacks import EvalCallback | ||||
| from stable_baselines3.common.monitor import Monitor | ||||
| import torch as th | ||||
| from typing import Callable | ||||
| from datetime import datetime, timezone | ||||
| from stable_baselines3.common.utils import set_random_seed | ||||
| import gym | ||||
| from pathlib import Path | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| torch.multiprocessing.set_sharing_strategy('file_system') | ||||
| @@ -37,8 +40,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): | ||||
|         super().__init__(config=kwargs['config']) | ||||
|         th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4)) | ||||
|         self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] | ||||
|         self.train_env: Base5ActionRLEnv = None | ||||
|         self.eval_env: Base5ActionRLEnv = None | ||||
|         self.train_env: BaseEnvironment = None | ||||
|         self.eval_env: BaseEnvironment = None | ||||
|         self.eval_callback: EvalCallback = None | ||||
|         self.model_type = self.freqai_info['rl_config']['model_type'] | ||||
|         self.rl_config = self.freqai_info['rl_config'] | ||||
| @@ -194,7 +197,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): | ||||
|         def _predict(window): | ||||
|             market_side, current_profit, trade_duration = self.get_state_info(dk.pair) | ||||
|             observations = dataframe.iloc[window.index] | ||||
|             observations['current_profit'] = current_profit | ||||
|             observations['current_profit_pct'] = current_profit | ||||
|             observations['position'] = market_side | ||||
|             observations['trade_duration'] = trade_duration | ||||
|             res, _ = model.predict(observations, deterministic=True) | ||||
| @@ -306,7 +309,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): | ||||
|         return | ||||
|  | ||||
|  | ||||
| def make_env(MyRLEnv: Base5ActionRLEnv, env_id: str, rank: int, | ||||
| def make_env(MyRLEnv: BaseEnvironment, env_id: str, rank: int, | ||||
|              seed: int, train_df: DataFrame, price: DataFrame, | ||||
|              reward_params: Dict[str, int], window_size: int, monitor: bool = False, | ||||
|              config: Dict[str, Any] = {}) -> Callable: | ||||
|   | ||||
| @@ -1,19 +1,20 @@ | ||||
| import logging | ||||
| import torch as th | ||||
| from pathlib import Path | ||||
| from typing import Any, Dict, List, Optional, Tuple, Type, Union | ||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||
|  | ||||
| import gym | ||||
| import torch as th | ||||
| from stable_baselines3 import DQN | ||||
| from stable_baselines3.common.buffers import ReplayBuffer | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from pathlib import Path | ||||
| from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, | ||||
|                                             QNetwork) | ||||
| from torch import nn | ||||
| import gym | ||||
| from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, | ||||
|                                                    FlattenExtractor) | ||||
| from stable_baselines3.common.type_aliases import GymEnv, Schedule | ||||
| from stable_baselines3.common.policies import BasePolicy | ||||
| from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor | ||||
| from stable_baselines3.common.type_aliases import GymEnv, Schedule | ||||
| from stable_baselines3.dqn.policies import CnnPolicy, DQNPolicy, MlpPolicy, QNetwork | ||||
| from torch import nn | ||||
|  | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ import time | ||||
| from abc import ABC, abstractmethod | ||||
| from pathlib import Path | ||||
| from threading import Lock | ||||
| from typing import Any, Dict, Tuple, Optional | ||||
| from typing import Any, Dict, Optional, Tuple | ||||
|  | ||||
| import numpy as np | ||||
| import pandas as pd | ||||
|   | ||||
| @@ -1,15 +1,14 @@ | ||||
| import logging | ||||
| from pathlib import Path | ||||
| from typing import Any, Dict | ||||
|  | ||||
| import torch as th | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Positions | ||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||
| from pathlib import Path | ||||
| # from pandas import DataFrame | ||||
| # from stable_baselines3.common.callbacks import EvalCallback | ||||
| # from stable_baselines3.common.monitor import Monitor | ||||
| import numpy as np | ||||
| import torch as th | ||||
|  | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions | ||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -53,7 +52,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): | ||||
|  | ||||
|         return model | ||||
|  | ||||
|     class MyRLEnv(BaseReinforcementLearningModel.MyRLEnv): | ||||
|     class MyRLEnv(Base5ActionRLEnv): | ||||
|         """ | ||||
|         User can override any function in BaseRLEnv and gym.Env. Here the user | ||||
|         sets a custom reward based on profit and trade duration. | ||||
|   | ||||
| @@ -1,15 +1,16 @@ | ||||
| import logging | ||||
| from pathlib import Path | ||||
| from typing import Any, Dict  # , Tuple | ||||
|  | ||||
| # import numpy.typing as npt | ||||
| import torch as th | ||||
| from stable_baselines3.common.callbacks import EvalCallback | ||||
| from stable_baselines3.common.vec_env import SubprocVecEnv | ||||
|  | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel, | ||||
|                                                                 make_env) | ||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||
|  | ||||
| from pathlib import Path | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| @@ -26,7 +27,7 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): | ||||
|  | ||||
|         # model arch | ||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, | ||||
|                              net_arch=[256, 256]) | ||||
|                              net_arch=[256, 256, 128]) | ||||
|  | ||||
|         if dk.pair not in self.dd.model_dictionary or not self.continual_learning: | ||||
|             model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, | ||||
| @@ -64,9 +65,9 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): | ||||
|         test_df = data_dictionary["test_features"] | ||||
|  | ||||
|         env_id = "train_env" | ||||
|         num_cpu = int(self.freqai_info["rl_config"]["thread_count"] / 2) | ||||
|         num_cpu = int(self.freqai_info["rl_config"]["thread_count"]) | ||||
|         self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, | ||||
|                                         self.reward_params, self.CONV_WIDTH, | ||||
|                                         self.reward_params, self.CONV_WIDTH, monitor=True, | ||||
|                                         config=self.config) for i | ||||
|                                         in range(num_cpu)]) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user