From 4fc0edb8b7d16b817ed9e36f88c5f1309b37b554 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 10 Feb 2023 14:45:50 +0100 Subject: [PATCH 1/3] add pair to environment for access inside calculate_reward --- docs/freqai-reinforcement-learning.md | 18 +++++++--- freqtrade/freqai/RL/BaseEnvironment.py | 33 ++++++++++--------- .../RL/BaseReinforcementLearningModel.py | 18 +++++++--- .../ReinforcementLearner_multiproc.py | 2 +- setup.cfg | 2 ++ 5 files changed, 48 insertions(+), 25 deletions(-) diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md index 5c9733403..58eb638b8 100644 --- a/docs/freqai-reinforcement-learning.md +++ b/docs/freqai-reinforcement-learning.md @@ -175,10 +175,20 @@ As you begin to modify the strategy and the prediction model, you will quickly r pnl = self.get_unrealized_profit() factor = 100 - # reward agent for entering trades - if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ - and self._position == Positions.Neutral: - return 25 + + # you can use feature values from dataframe + rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_" + f"{self.config['timeframe']}"].iloc[self._current_tick] + + # reward agent for entering trades + if (action in (Actions.Long_enter.value, Actions.Short_enter.value) + and self._position == Positions.Neutral): + if rsi_now < 40: + factor = 40 / rsi_now + else: + factor = 1 + return 25 * factor + # discourage agent from not entering trades if action == Actions.Neutral.value and self._position == Positions.Neutral: return -1 diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py index ef1c02a3b..3d9527d4f 100644 --- a/freqtrade/freqai/RL/BaseEnvironment.py +++ b/freqtrade/freqai/RL/BaseEnvironment.py @@ -45,7 +45,7 @@ class BaseEnvironment(gym.Env): def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False, - fee: float = 0.0015, can_short: bool = False): + fee: float = 0.0015, can_short: bool = False, pair: str = ""): """ Initializes the training/eval environment. :param df: dataframe of features @@ -60,12 +60,13 @@ class BaseEnvironment(gym.Env): :param fee: The fee to use for environmental interactions. :param can_short: Whether or not the environment can short """ - self.config = config - self.rl_config = config['freqai']['rl_config'] - self.add_state_info = self.rl_config.get('add_state_info', False) - self.id = id - self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) - self.compound_trades = config['stake_amount'] == 'unlimited' + self.config: dict = config + self.rl_config: dict = config['freqai']['rl_config'] + self.add_state_info: bool = self.rl_config.get('add_state_info', False) + self.id: str = id + self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) + self.compound_trades: bool = config['stake_amount'] == 'unlimited' + self.pair: str = pair if self.config.get('fee', None) is not None: self.fee = self.config['fee'] else: @@ -74,8 +75,8 @@ class BaseEnvironment(gym.Env): # set here to default 5Ac, but all children envs can override this self.actions: Type[Enum] = BaseActions self.tensorboard_metrics: dict = {} - self.can_short = can_short - self.live = live + self.can_short: bool = can_short + self.live: bool = live if not self.live and self.add_state_info: self.add_state_info = False logger.warning("add_state_info is not available in backtesting. Deactivating.") @@ -93,13 +94,13 @@ class BaseEnvironment(gym.Env): :param reward_kwargs: extra config settings assigned by user in `rl_config` :param starting_point: start at edge of window or not """ - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] + self.df: DataFrame = df + self.signal_features: DataFrame = self.df + self.prices: DataFrame = prices + self.window_size: int = window_size + self.starting_point: bool = starting_point + self.rr: float = reward_kwargs["rr"] + self.profit_aim: float = reward_kwargs["profit_aim"] # # spaces if self.add_state_info: diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 3a4d0d0e6..13882660e 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -143,7 +143,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - env_info = self.pack_env_dict() + env_info = self.pack_env_dict(dk.pair) self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, @@ -158,7 +158,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): actions = self.train_env.get_actions() self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions) - def pack_env_dict(self) -> Dict[str, Any]: + def pack_env_dict(self, pair: str) -> Dict[str, Any]: """ Create dictionary of environment arguments """ @@ -166,7 +166,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): "reward_kwargs": self.reward_params, "config": self.config, "live": self.live, - "can_short": self.can_short} + "can_short": self.can_short, + "pair": pair} if self.data_provider: env_info["fee"] = self.data_provider._exchange \ .get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore @@ -363,10 +364,19 @@ class BaseReinforcementLearningModel(IFreqaiModel): pnl = self.get_unrealized_profit() factor = 100. + # you can use feature values from dataframe + rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_" + f"{self.config['timeframe']}"].iloc[self._current_tick] + # reward agent for entering trades if (action in (Actions.Long_enter.value, Actions.Short_enter.value) and self._position == Positions.Neutral): - return 25 + if rsi_now < 40: + factor = 40 / rsi_now + else: + factor = 1 + return 25 * factor + # discourage agent from not entering trades if action == Actions.Neutral.value and self._position == Positions.Neutral: return -1 diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py index a9be87b0b..9ee035c95 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py @@ -34,7 +34,7 @@ class ReinforcementLearner_multiproc(ReinforcementLearner): train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - env_info = self.pack_env_dict() + env_info = self.pack_env_dict(dk.pair) env_id = "train_env" self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, diff --git a/setup.cfg b/setup.cfg index 60ec8a75f..8a63004a7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,3 +49,5 @@ exclude = __pycache__, .eggs, user_data, +per-file-ignores = + freqtrade/freqai/RL/BaseReinforcementLearningModel.py:C901 From 154b6711b3725db753232b9e9659ff03f4bc07d9 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 10 Feb 2023 15:26:17 +0100 Subject: [PATCH 2/3] use function level noqa ignore --- freqtrade/freqai/RL/BaseReinforcementLearningModel.py | 2 +- setup.cfg | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 13882660e..8987c35fe 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -348,7 +348,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): sets a custom reward based on profit and trade duration. """ - def calculate_reward(self, action: int) -> float: + def calculate_reward(self, action: int) -> float: # noqa: C901 """ An example reward function. This is the one function that users will likely wish to inject their own creativity into. diff --git a/setup.cfg b/setup.cfg index 8a63004a7..60ec8a75f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,5 +49,3 @@ exclude = __pycache__, .eggs, user_data, -per-file-ignores = - freqtrade/freqai/RL/BaseReinforcementLearningModel.py:C901 From 8873a565eec61806d039d853b2e4077671b600ce Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 10 Feb 2023 15:48:18 +0100 Subject: [PATCH 3/3] expose raw features to the environment for use in calculate_reward --- docs/freqai-reinforcement-learning.md | 2 +- freqtrade/freqai/RL/BaseEnvironment.py | 7 ++++--- freqtrade/freqai/RL/BaseReinforcementLearningModel.py | 10 +++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md index 58eb638b8..7e4110b0b 100644 --- a/docs/freqai-reinforcement-learning.md +++ b/docs/freqai-reinforcement-learning.md @@ -177,7 +177,7 @@ As you begin to modify the strategy and the prediction model, you will quickly r factor = 100 # you can use feature values from dataframe - rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_" + rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"].iloc[self._current_tick] # reward agent for entering trades diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py index 3d9527d4f..7a4467bf7 100644 --- a/freqtrade/freqai/RL/BaseEnvironment.py +++ b/freqtrade/freqai/RL/BaseEnvironment.py @@ -45,7 +45,8 @@ class BaseEnvironment(gym.Env): def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False, - fee: float = 0.0015, can_short: bool = False, pair: str = ""): + fee: float = 0.0015, can_short: bool = False, pair: str = "", + df_raw: DataFrame = DataFrame()): """ Initializes the training/eval environment. :param df: dataframe of features @@ -67,6 +68,7 @@ class BaseEnvironment(gym.Env): self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) self.compound_trades: bool = config['stake_amount'] == 'unlimited' self.pair: str = pair + self.raw_features: DataFrame = df_raw if self.config.get('fee', None) is not None: self.fee = self.config['fee'] else: @@ -94,8 +96,7 @@ class BaseEnvironment(gym.Env): :param reward_kwargs: extra config settings assigned by user in `rl_config` :param starting_point: start at edge of window or not """ - self.df: DataFrame = df - self.signal_features: DataFrame = self.df + self.signal_features: DataFrame = df self.prices: DataFrame = prices self.window_size: int = window_size self.starting_point: bool = starting_point diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 8987c35fe..a8ef69394 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -1,3 +1,4 @@ +import copy import importlib import logging from abc import abstractmethod @@ -50,6 +51,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): self.eval_callback: Optional[EvalCallback] = None self.model_type = self.freqai_info['rl_config']['model_type'] self.rl_config = self.freqai_info['rl_config'] + self.df_raw: DataFrame = DataFrame() self.continual_learning = self.freqai_info.get('continual_learning', False) if self.model_type in SB3_MODELS: import_str = 'stable_baselines3' @@ -107,6 +109,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): data_dictionary: Dict[str, Any] = dk.make_train_test_datasets( features_filtered, labels_filtered) + self.df_raw = copy.deepcopy(data_dictionary["train_features"]) dk.fit_labels() # FIXME useless for now, but just satiating append methods # normalize all data based on train_dataset only @@ -167,7 +170,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): "config": self.config, "live": self.live, "can_short": self.can_short, - "pair": pair} + "pair": pair, + "df_raw": self.df_raw} if self.data_provider: env_info["fee"] = self.data_provider._exchange \ .get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore @@ -365,8 +369,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): factor = 100. # you can use feature values from dataframe - rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_" - f"{self.config['timeframe']}"].iloc[self._current_tick] + rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_" + f"{self.config['timeframe']}"].iloc[self._current_tick] # reward agent for entering trades if (action in (Actions.Long_enter.value, Actions.Short_enter.value)