From 7766350c1558ae257cc540a22dadc7aefcafe384 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 19:21:57 +0200 Subject: [PATCH] refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling --- freqtrade/freqai/RL/Base4ActionRLEnv.py | 234 +-------------- freqtrade/freqai/RL/Base5ActionRLEnv.py | 187 +----------- freqtrade/freqai/RL/BaseEnvironment.py | 270 ++++++++++++++++++ .../RL/BaseReinforcementLearningModel.py | 35 +-- .../RL/ReinforcementLearnerCustomAgent.py | 23 +- freqtrade/freqai/freqai_interface.py | 2 +- .../prediction_models/ReinforcementLearner.py | 17 +- .../ReinforcementLearner_multiproc.py | 11 +- 8 files changed, 339 insertions(+), 440 deletions(-) create mode 100644 freqtrade/freqai/RL/BaseEnvironment.py diff --git a/freqtrade/freqai/RL/Base4ActionRLEnv.py b/freqtrade/freqai/RL/Base4ActionRLEnv.py index 478507639..ef5b1c107 100644 --- a/freqtrade/freqai/RL/Base4ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base4ActionRLEnv.py @@ -1,14 +1,11 @@ import logging from enum import Enum -from typing import Optional -import gym -import numpy as np from gym import spaces -from gym.utils import seeding -from pandas import DataFrame -import pandas as pd -from abc import abstractmethod + +from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions + + logger = logging.getLogger(__name__) @@ -19,95 +16,13 @@ class Actions(Enum): Short_enter = 3 - -class Positions(Enum): - Short = 0 - Long = 1 - Neutral = 0.5 - - def opposite(self): - return Positions.Short if self == Positions.Long else Positions.Long - - -def mean_over_std(x): - std = np.std(x, ddof=1) - mean = np.mean(x) - return mean / std if std > 0 else 0 - - -class Base4ActionRLEnv(gym.Env): +class Base4ActionRLEnv(BaseEnvironment): """ - Base class for a 5 action environment + Base class for a 4 action environment """ - metadata = {'render.modes': ['human']} - def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), - reward_kwargs: dict = {}, window_size=10, starting_point=True, - id: str = 'baseenv-1', seed: int = 1, config: dict = {}): - - self.rl_config = config['freqai']['rl_config'] - self.id = id - self.seed(seed) - self.reset_env(df, prices, window_size, reward_kwargs, starting_point) - - def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, - reward_kwargs: dict, starting_point=True): - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] - - self.fee = 0.0015 - - # # spaces - self.shape = (window_size, self.signal_features.shape[1] + 3) + def set_action_space(self): self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) - - # episode - self._start_tick: int = self.window_size - self._end_tick: int = len(self.prices) - 1 - self._done: bool = False - self._current_tick: int = self._start_tick - self._last_trade_tick: Optional[int] = None - self._position = Positions.Neutral - self._position_history: list = [None] - self.total_reward: float = 0 - self._total_profit: float = 1 - self.history: dict = {} - self.trade_history: list = [] - - def seed(self, seed: int = 1): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def reset(self): - - self._done = False - - if self.starting_point is True: - self._position_history = (self._start_tick * [None]) + [self._position] - else: - self._position_history = (self.window_size * [None]) + [self._position] - - self._current_tick = self._start_tick - self._last_trade_tick = None - self._position = Positions.Neutral - - self.total_reward = 0. - self._total_profit = 1. # unit - self.history = {} - self.trade_history = [] - self.portfolio_log_returns = np.zeros(len(self.prices)) - - self._profits = [(self._start_tick, 1)] - self.close_trade_profit = [] - - return self._get_observation() def step(self, action: int): self._done = False @@ -181,43 +96,6 @@ class Base4ActionRLEnv(gym.Env): return observation, step_reward, self._done, info - def _get_observation(self): - features_window = self.signal_features[( - self._current_tick - self.window_size):self._current_tick] - features_and_state = DataFrame(np.zeros((len(features_window), 3)), - columns=['current_profit_pct', 'position', 'trade_duration'], - index=features_window.index) - - features_and_state['current_profit_pct'] = self.get_unrealized_profit() - features_and_state['position'] = self._position.value - features_and_state['trade_duration'] = self.get_trade_duration() - features_and_state = pd.concat([features_window, features_and_state], axis=1) - return features_and_state - - def get_trade_duration(self): - if self._last_trade_tick is None: - return 0 - else: - return self._current_tick - self._last_trade_tick - - def get_unrealized_profit(self): - - if self._last_trade_tick is None: - return 0. - - if self._position == Positions.Neutral: - return 0. - elif self._position == Positions.Short: - current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price) / last_trade_price - elif self._position == Positions.Long: - current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) - return (current_price - last_trade_price) / last_trade_price - else: - return 0. - def is_tradesignal(self, action: int): # trade signal """ @@ -228,7 +106,7 @@ class Base4ActionRLEnv(gym.Env): (action == Actions.Neutral.value and self._position == Positions.Short) or (action == Actions.Neutral.value and self._position == Positions.Long) or (action == Actions.Short_enter.value and self._position == Positions.Short) or - (action == Actions.Short_enter.value and self._position == Positions.Long) or + (action == Actions.Short_enter.value and self._position == Positions.Long) or (action == Actions.Exit.value and self._position == Positions.Neutral) or (action == Actions.Long_enter.value and self._position == Positions.Long) or (action == Actions.Long_enter.value and self._position == Positions.Short)) @@ -240,7 +118,7 @@ class Base4ActionRLEnv(gym.Env): e.g.: agent wants a Actions.Long_exit while it is in a Positions.short """ # Agent should only try to exit if it is in position - if action in (Actions.Exit.value): + if action == Actions.Exit.value: if self._position not in (Positions.Short, Positions.Long): return False @@ -250,97 +128,3 @@ class Base4ActionRLEnv(gym.Env): return False return True - - def _is_trade(self, action: Actions): - return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or - (action == Actions.Short_enter.value and self._position == Positions.Neutral)) - - def is_hold(self, action): - return ((action == Actions.Short_enter.value and self._position == Positions.Short) or - (action == Actions.Long_enter.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) or - (action == Actions.Neutral.value and self._position == Positions.Neutral)) - - def add_entry_fee(self, price): - return price * (1 + self.fee) - - def add_exit_fee(self, price): - return price / (1 + self.fee) - - def _update_history(self, info): - if not self.history: - self.history = {key: [] for key in info.keys()} - - for key, value in info.items(): - self.history[key].append(value) - - def get_sharpe_ratio(self): - return mean_over_std(self.get_portfolio_log_returns()) - - @abstractmethod - def calculate_reward(self, action): - """ - Reward is created by BaseReinforcementLearningModel and can - be inherited/edited by the user made ReinforcementLearner file. - """ - - return 0. - - def _update_profit(self, action): - if self._is_trade(action) or self._done: - pnl = self.get_unrealized_profit() - - if self._position in (Positions.Long, Positions.Short): - self._total_profit *= (1 + pnl) - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - def most_recent_return(self, action: int): - """ - Calculate the tick to tick return if in a trade. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_entry_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_exit_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def get_portfolio_log_returns(self): - return self.portfolio_log_returns[1:self._current_tick + 1] - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - - def current_price(self) -> float: - return self.prices.iloc[self._current_tick].open - - def prev_price(self) -> float: - return self.prices.iloc[self._current_tick - 1].open - - def sharpe_ratio(self): - if len(self.close_trade_profit) == 0: - return 0. - returns = np.array(self.close_trade_profit) - reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) - return reward diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index b93d6e6ff..e0a38f9d1 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -1,14 +1,14 @@ import logging from enum import Enum -from typing import Optional -import gym import numpy as np -from gym import spaces -from gym.utils import seeding -from pandas import DataFrame import pandas as pd -from abc import abstractmethod +from gym import spaces +from pandas import DataFrame + +from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions + + logger = logging.getLogger(__name__) @@ -20,70 +20,19 @@ class Actions(Enum): Short_exit = 4 -class Positions(Enum): - Short = 0 - Long = 1 - Neutral = 0.5 - - def opposite(self): - return Positions.Short if self == Positions.Long else Positions.Long - - def mean_over_std(x): std = np.std(x, ddof=1) mean = np.mean(x) return mean / std if std > 0 else 0 -class Base5ActionRLEnv(gym.Env): +class Base5ActionRLEnv(BaseEnvironment): """ Base class for a 5 action environment """ - metadata = {'render.modes': ['human']} - def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), - reward_kwargs: dict = {}, window_size=10, starting_point=True, - id: str = 'baseenv-1', seed: int = 1, config: dict = {}): - - self.rl_config = config['freqai']['rl_config'] - self.id = id - self.seed(seed) - self.reset_env(df, prices, window_size, reward_kwargs, starting_point) - - def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, - reward_kwargs: dict, starting_point=True): - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] - - self.fee = 0.0015 - - # # spaces - self.shape = (window_size, self.signal_features.shape[1] + 3) + def set_action_space(self): self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) - - # episode - self._start_tick: int = self.window_size - self._end_tick: int = len(self.prices) - 1 - self._done: bool = False - self._current_tick: int = self._start_tick - self._last_trade_tick: Optional[int] = None - self._position = Positions.Neutral - self._position_history: list = [None] - self.total_reward: float = 0 - self._total_profit: float = 1 - self.history: dict = {} - self.trade_history: list = [] - - def seed(self, seed: int = 1): - self.np_random, seed = seeding.np_random(seed) - return [seed] def reset(self): @@ -106,6 +55,7 @@ class Base5ActionRLEnv(gym.Env): self._profits = [(self._start_tick, 1)] self.close_trade_profit = [] + self._total_unrealized_profit = 1 return self._get_observation() @@ -118,7 +68,7 @@ class Base5ActionRLEnv(gym.Env): self.update_portfolio_log_returns(action) - self._update_profit(action) + self._update_unrealized_total_profit() step_reward = self.calculate_reward(action) self.total_reward += step_reward @@ -148,10 +98,12 @@ class Base5ActionRLEnv(gym.Env): trade_type = "short" self._last_trade_tick = self._current_tick elif action == Actions.Long_exit.value: + self._update_total_profit() self._position = Positions.Neutral trade_type = "neutral" self._last_trade_tick = None elif action == Actions.Short_exit.value: + self._update_total_profit() self._position = Positions.Neutral trade_type = "neutral" self._last_trade_tick = None @@ -163,7 +115,8 @@ class Base5ActionRLEnv(gym.Env): {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type}) - if self._total_profit < 1 - self.rl_config.get('max_training_drawdown_pct', 0.8): + if (self._total_profit < self.max_drawdown or + self._total_unrealized_profit < self.max_drawdown): self._done = True self._position_history.append(self._position) @@ -200,24 +153,6 @@ class Base5ActionRLEnv(gym.Env): else: return self._current_tick - self._last_trade_tick - def get_unrealized_profit(self): - - if self._last_trade_tick is None: - return 0. - - if self._position == Positions.Neutral: - return 0. - elif self._position == Positions.Short: - current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price) / last_trade_price - elif self._position == Positions.Long: - current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) - return (current_price - last_trade_price) / last_trade_price - else: - return 0. - def is_tradesignal(self, action: int): # trade signal """ @@ -253,97 +188,3 @@ class Base5ActionRLEnv(gym.Env): return False return True - - def _is_trade(self, action: Actions): - return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or - (action == Actions.Short_enter.value and self._position == Positions.Neutral)) - - def is_hold(self, action): - return ((action == Actions.Short_enter.value and self._position == Positions.Short) or - (action == Actions.Long_enter.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) or - (action == Actions.Neutral.value and self._position == Positions.Neutral)) - - def add_entry_fee(self, price): - return price * (1 + self.fee) - - def add_exit_fee(self, price): - return price / (1 + self.fee) - - def _update_history(self, info): - if not self.history: - self.history = {key: [] for key in info.keys()} - - for key, value in info.items(): - self.history[key].append(value) - - def get_sharpe_ratio(self): - return mean_over_std(self.get_portfolio_log_returns()) - - @abstractmethod - def calculate_reward(self, action): - """ - Reward is created by BaseReinforcementLearningModel and can - be inherited/edited by the user made ReinforcementLearner file. - """ - - return 0. - - def _update_profit(self, action): - if self._is_trade(action) or self._done: - pnl = self.get_unrealized_profit() - - if self._position in (Positions.Long, Positions.Short): - self._total_profit *= (1 + pnl) - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - def most_recent_return(self, action: int): - """ - Calculate the tick to tick return if in a trade. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_entry_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_exit_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def get_portfolio_log_returns(self): - return self.portfolio_log_returns[1:self._current_tick + 1] - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - - def current_price(self) -> float: - return self.prices.iloc[self._current_tick].open - - def prev_price(self) -> float: - return self.prices.iloc[self._current_tick - 1].open - - def sharpe_ratio(self): - if len(self.close_trade_profit) == 0: - return 0. - returns = np.array(self.close_trade_profit) - reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) - return reward diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py new file mode 100644 index 000000000..bba3c4a1b --- /dev/null +++ b/freqtrade/freqai/RL/BaseEnvironment.py @@ -0,0 +1,270 @@ +import logging +from abc import abstractmethod +from enum import Enum +from typing import Optional + +import gym +import numpy as np +import pandas as pd +from gym import spaces +from gym.utils import seeding +from pandas import DataFrame + + +logger = logging.getLogger(__name__) + + +class Positions(Enum): + Short = 0 + Long = 1 + Neutral = 0.5 + + def opposite(self): + return Positions.Short if self == Positions.Long else Positions.Long + + +class BaseEnvironment(gym.Env): + """ + Base class for environments. This class is agnostic to action count. + Inherited classes customize this to include varying action counts/types, + See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py + """ + + def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), + reward_kwargs: dict = {}, window_size=10, starting_point=True, + id: str = 'baseenv-1', seed: int = 1, config: dict = {}): + + self.rl_config = config['freqai']['rl_config'] + self.id = id + self.seed(seed) + self.reset_env(df, prices, window_size, reward_kwargs, starting_point) + self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) + self.compound_trades = config['stake_amount'] == 'unlimited' + + def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, + reward_kwargs: dict, starting_point=True): + self.df = df + self.signal_features = self.df + self.prices = prices + self.window_size = window_size + self.starting_point = starting_point + self.rr = reward_kwargs["rr"] + self.profit_aim = reward_kwargs["profit_aim"] + + self.fee = 0.0015 + + # # spaces + self.shape = (window_size, self.signal_features.shape[1] + 3) + self.set_action_space() + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) + + # episode + self._start_tick: int = self.window_size + self._end_tick: int = len(self.prices) - 1 + self._done: bool = False + self._current_tick: int = self._start_tick + self._last_trade_tick: Optional[int] = None + self._position = Positions.Neutral + self._position_history: list = [None] + self.total_reward: float = 0 + self._total_profit: float = 1 + self._total_unrealized_profit: float = 1 + self.history: dict = {} + self.trade_history: list = [] + + @abstractmethod + def set_action_space(self): + """ + Unique to the environment action count. Must be inherited. + """ + + def seed(self, seed: int = 1): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def reset(self): + + self._done = False + + if self.starting_point is True: + self._position_history = (self._start_tick * [None]) + [self._position] + else: + self._position_history = (self.window_size * [None]) + [self._position] + + self._current_tick = self._start_tick + self._last_trade_tick = None + self._position = Positions.Neutral + + self.total_reward = 0. + self._total_profit = 1. # unit + self.history = {} + self.trade_history = [] + self.portfolio_log_returns = np.zeros(len(self.prices)) + + self._profits = [(self._start_tick, 1)] + self.close_trade_profit = [] + self._total_unrealized_profit = 1 + + return self._get_observation() + + @abstractmethod + def step(self, action: int): + """ + Step depeneds on action types, this must be inherited. + """ + return + + def _get_observation(self): + """ + This may or may not be independent of action types, user can inherit + this in their custom "MyRLEnv" + """ + features_window = self.signal_features[( + self._current_tick - self.window_size):self._current_tick] + features_and_state = DataFrame(np.zeros((len(features_window), 3)), + columns=['current_profit_pct', 'position', 'trade_duration'], + index=features_window.index) + + features_and_state['current_profit_pct'] = self.get_unrealized_profit() + features_and_state['position'] = self._position.value + features_and_state['trade_duration'] = self.get_trade_duration() + features_and_state = pd.concat([features_window, features_and_state], axis=1) + return features_and_state + + def get_trade_duration(self): + if self._last_trade_tick is None: + return 0 + else: + return self._current_tick - self._last_trade_tick + + def get_unrealized_profit(self): + + if self._last_trade_tick is None: + return 0. + + if self._position == Positions.Neutral: + return 0. + elif self._position == Positions.Short: + current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) + return (last_trade_price - current_price) / last_trade_price + elif self._position == Positions.Long: + current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) + return (current_price - last_trade_price) / last_trade_price + else: + return 0. + + @abstractmethod + def is_tradesignal(self, action: int): + # trade signal + """ + Determine if the signal is a trade signal. This is + unique to the actions in the environment, and therefore must be + inherited. + """ + return + + def _is_valid(self, action: int): + # trade signal + """ + Determine if the signal is valid.This is + unique to the actions in the environment, and therefore must be + inherited. + """ + return + + def add_entry_fee(self, price): + return price * (1 + self.fee) + + def add_exit_fee(self, price): + return price / (1 + self.fee) + + def _update_history(self, info): + if not self.history: + self.history = {key: [] for key in info.keys()} + + for key, value in info.items(): + self.history[key].append(value) + + @abstractmethod + def calculate_reward(self, action): + """ + Reward is created by BaseReinforcementLearningModel and can + be inherited/edited by the user made ReinforcementLearner file. + """ + + return 0. + + def _update_unrealized_total_profit(self): + """ + Update the unrealized total profit incase of episode end. + """ + if self._position in (Positions.Long, Positions.Short): + pnl = self.get_unrealized_profit() + if self.compound_trades: + # assumes unit stake and compounding + unrl_profit = self._total_profit * (1 + pnl) + else: + # assumes unit stake and no compounding + unrl_profit = self._total_profit + pnl + self._total_unrealized_profit = unrl_profit + + def _update_total_profit(self): + pnl = self.get_unrealized_profit() + if self.compound_trades: + # assumes unite stake and compounding + self._total_profit = self._total_profit * (1 + pnl) + else: + # assumes unit stake and no compounding + self._total_profit += pnl + + def most_recent_return(self, action: int): + """ + Calculate the tick to tick return if in a trade. + Return is generated from rising prices in Long + and falling prices in Short positions. + The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. + """ + # Long positions + if self._position == Positions.Long: + current_price = self.prices.iloc[self._current_tick].open + previous_price = self.prices.iloc[self._current_tick - 1].open + + if (self._position_history[self._current_tick - 1] == Positions.Short + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_entry_fee(previous_price) + + return np.log(current_price) - np.log(previous_price) + + # Short positions + if self._position == Positions.Short: + current_price = self.prices.iloc[self._current_tick].open + previous_price = self.prices.iloc[self._current_tick - 1].open + if (self._position_history[self._current_tick - 1] == Positions.Long + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_exit_fee(previous_price) + + return np.log(previous_price) - np.log(current_price) + + return 0 + + def get_portfolio_log_returns(self): + return self.portfolio_log_returns[1:self._current_tick + 1] + + def update_portfolio_log_returns(self, action): + self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) + + def current_price(self) -> float: + return self.prices.iloc[self._current_tick].open + + def prev_price(self) -> float: + return self.prices.iloc[self._current_tick - 1].open + + def sharpe_ratio(self): + if len(self.close_trade_profit) == 0: + return 0. + returns = np.array(self.close_trade_profit) + reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) + return reward diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 5a7ae4372..77db9c655 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -1,25 +1,28 @@ import logging -from typing import Any, Dict, Tuple +from abc import abstractmethod +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable, Dict, Tuple +import gym import numpy as np import numpy.typing as npt import pandas as pd +import torch as th +import torch.multiprocessing from pandas import DataFrame -from abc import abstractmethod +from stable_baselines3.common.callbacks import EvalCallback +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.utils import set_random_seed + from freqtrade.exceptions import OperationalException from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.freqai_interface import IFreqaiModel -from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv +from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions from freqtrade.persistence import Trade -import torch.multiprocessing -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor -import torch as th -from typing import Callable -from datetime import datetime, timezone -from stable_baselines3.common.utils import set_random_seed -import gym -from pathlib import Path + + logger = logging.getLogger(__name__) torch.multiprocessing.set_sharing_strategy('file_system') @@ -37,8 +40,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): super().__init__(config=kwargs['config']) th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4)) self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] - self.train_env: Base5ActionRLEnv = None - self.eval_env: Base5ActionRLEnv = None + self.train_env: BaseEnvironment = None + self.eval_env: BaseEnvironment = None self.eval_callback: EvalCallback = None self.model_type = self.freqai_info['rl_config']['model_type'] self.rl_config = self.freqai_info['rl_config'] @@ -194,7 +197,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): def _predict(window): market_side, current_profit, trade_duration = self.get_state_info(dk.pair) observations = dataframe.iloc[window.index] - observations['current_profit'] = current_profit + observations['current_profit_pct'] = current_profit observations['position'] = market_side observations['trade_duration'] = trade_duration res, _ = model.predict(observations, deterministic=True) @@ -306,7 +309,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): return -def make_env(MyRLEnv: Base5ActionRLEnv, env_id: str, rank: int, +def make_env(MyRLEnv: BaseEnvironment, env_id: str, rank: int, seed: int, train_df: DataFrame, price: DataFrame, reward_params: Dict[str, int], window_size: int, monitor: bool = False, config: Dict[str, Any] = {}) -> Callable: diff --git a/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py b/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py index fcd813ce6..4ad95c214 100644 --- a/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py +++ b/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py @@ -1,19 +1,20 @@ import logging -import torch as th +from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Type, Union -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel + +import gym +import torch as th from stable_baselines3 import DQN from stable_baselines3.common.buffers import ReplayBuffer -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from pathlib import Path -from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, - QNetwork) -from torch import nn -import gym -from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, - FlattenExtractor) -from stable_baselines3.common.type_aliases import GymEnv, Schedule from stable_baselines3.common.policies import BasePolicy +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor +from stable_baselines3.common.type_aliases import GymEnv, Schedule +from stable_baselines3.dqn.policies import CnnPolicy, DQNPolicy, MlpPolicy, QNetwork +from torch import nn + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel + logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 21b79e003..b3367f9de 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -7,7 +7,7 @@ import time from abc import ABC, abstractmethod from pathlib import Path from threading import Lock -from typing import Any, Dict, Tuple, Optional +from typing import Any, Dict, Optional, Tuple import numpy as np import pandas as pd diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index a72a56e20..0e156d28e 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -1,15 +1,14 @@ import logging +from pathlib import Path from typing import Any, Dict -import torch as th -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Positions -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel -from pathlib import Path -# from pandas import DataFrame -# from stable_baselines3.common.callbacks import EvalCallback -# from stable_baselines3.common.monitor import Monitor import numpy as np +import torch as th + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel + logger = logging.getLogger(__name__) @@ -53,7 +52,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel): return model - class MyRLEnv(BaseReinforcementLearningModel.MyRLEnv): + class MyRLEnv(Base5ActionRLEnv): """ User can override any function in BaseRLEnv and gym.Env. Here the user sets a custom reward based on profit and trade duration. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py index f301da981..9f6a66729 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py @@ -1,15 +1,16 @@ import logging +from pathlib import Path from typing import Any, Dict # , Tuple # import numpy.typing as npt import torch as th from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.vec_env import SubprocVecEnv + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel, make_env) -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from pathlib import Path logger = logging.getLogger(__name__) @@ -26,7 +27,7 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): # model arch policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[256, 256]) + net_arch=[256, 256, 128]) if dk.pair not in self.dd.model_dictionary or not self.continual_learning: model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, @@ -64,9 +65,9 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): test_df = data_dictionary["test_features"] env_id = "train_env" - num_cpu = int(self.freqai_info["rl_config"]["thread_count"] / 2) + num_cpu = int(self.freqai_info["rl_config"]["thread_count"]) self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, - self.reward_params, self.CONV_WIDTH, + self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i in range(num_cpu)])