From 4fc0edb8b7d16b817ed9e36f88c5f1309b37b554 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 10 Feb 2023 14:45:50 +0100
Subject: [PATCH 1/3] add pair to environment for access inside
 calculate_reward

---
 docs/freqai-reinforcement-learning.md         | 18 +++++++---
 freqtrade/freqai/RL/BaseEnvironment.py        | 33 ++++++++++---------
 .../RL/BaseReinforcementLearningModel.py      | 18 +++++++---
 .../ReinforcementLearner_multiproc.py         |  2 +-
 setup.cfg                                     |  2 ++
 5 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index 5c9733403..58eb638b8 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -175,10 +175,20 @@ As you begin to modify the strategy and the prediction model, you will quickly r
                 pnl = self.get_unrealized_profit()
 
                 factor = 100
-                # reward agent for entering trades
-                if action in (Actions.Long_enter.value, Actions.Short_enter.value) \
-                        and self._position == Positions.Neutral:
-                    return 25
+
+            # you can use feature values from dataframe
+            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
+                              f"{self.config['timeframe']}"].iloc[self._current_tick]
+
+            # reward agent for entering trades
+            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
+                    and self._position == Positions.Neutral):
+                if rsi_now < 40:
+                    factor = 40 / rsi_now
+                else:
+                    factor = 1
+                return 25 * factor
+
                 # discourage agent from not entering trades
                 if action == Actions.Neutral.value and self._position == Positions.Neutral:
                     return -1
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index ef1c02a3b..3d9527d4f 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -45,7 +45,7 @@ class BaseEnvironment(gym.Env):
     def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
                  reward_kwargs: dict = {}, window_size=10, starting_point=True,
                  id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False,
-                 fee: float = 0.0015, can_short: bool = False):
+                 fee: float = 0.0015, can_short: bool = False, pair: str = ""):
         """
         Initializes the training/eval environment.
         :param df: dataframe of features
@@ -60,12 +60,13 @@ class BaseEnvironment(gym.Env):
         :param fee: The fee to use for environmental interactions.
         :param can_short: Whether or not the environment can short
         """
-        self.config = config
-        self.rl_config = config['freqai']['rl_config']
-        self.add_state_info = self.rl_config.get('add_state_info', False)
-        self.id = id
-        self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
-        self.compound_trades = config['stake_amount'] == 'unlimited'
+        self.config: dict = config
+        self.rl_config: dict = config['freqai']['rl_config']
+        self.add_state_info: bool = self.rl_config.get('add_state_info', False)
+        self.id: str = id
+        self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
+        self.compound_trades: bool = config['stake_amount'] == 'unlimited'
+        self.pair: str = pair
         if self.config.get('fee', None) is not None:
             self.fee = self.config['fee']
         else:
@@ -74,8 +75,8 @@ class BaseEnvironment(gym.Env):
         # set here to default 5Ac, but all children envs can override this
         self.actions: Type[Enum] = BaseActions
         self.tensorboard_metrics: dict = {}
-        self.can_short = can_short
-        self.live = live
+        self.can_short: bool = can_short
+        self.live: bool = live
         if not self.live and self.add_state_info:
             self.add_state_info = False
             logger.warning("add_state_info is not available in backtesting. Deactivating.")
@@ -93,13 +94,13 @@ class BaseEnvironment(gym.Env):
         :param reward_kwargs: extra config settings assigned by user in `rl_config`
         :param starting_point: start at edge of window or not
         """
-        self.df = df
-        self.signal_features = self.df
-        self.prices = prices
-        self.window_size = window_size
-        self.starting_point = starting_point
-        self.rr = reward_kwargs["rr"]
-        self.profit_aim = reward_kwargs["profit_aim"]
+        self.df: DataFrame = df
+        self.signal_features: DataFrame = self.df
+        self.prices: DataFrame = prices
+        self.window_size: int = window_size
+        self.starting_point: bool = starting_point
+        self.rr: float = reward_kwargs["rr"]
+        self.profit_aim: float = reward_kwargs["profit_aim"]
 
         # # spaces
         if self.add_state_info:
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 3a4d0d0e6..13882660e 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -143,7 +143,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         train_df = data_dictionary["train_features"]
         test_df = data_dictionary["test_features"]
 
-        env_info = self.pack_env_dict()
+        env_info = self.pack_env_dict(dk.pair)
 
         self.train_env = self.MyRLEnv(df=train_df,
                                       prices=prices_train,
@@ -158,7 +158,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         actions = self.train_env.get_actions()
         self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
 
-    def pack_env_dict(self) -> Dict[str, Any]:
+    def pack_env_dict(self, pair: str) -> Dict[str, Any]:
         """
         Create dictionary of environment arguments
         """
@@ -166,7 +166,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                     "reward_kwargs": self.reward_params,
                     "config": self.config,
                     "live": self.live,
-                    "can_short": self.can_short}
+                    "can_short": self.can_short,
+                    "pair": pair}
         if self.data_provider:
             env_info["fee"] = self.data_provider._exchange \
                 .get_fee(symbol=self.data_provider.current_whitelist()[0])  # type: ignore
@@ -363,10 +364,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
             pnl = self.get_unrealized_profit()
             factor = 100.
 
+            # you can use feature values from dataframe
+            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
+                              f"{self.config['timeframe']}"].iloc[self._current_tick]
+
             # reward agent for entering trades
             if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
                     and self._position == Positions.Neutral):
-                return 25
+                if rsi_now < 40:
+                    factor = 40 / rsi_now
+                else:
+                    factor = 1
+                return 25 * factor
+
             # discourage agent from not entering trades
             if action == Actions.Neutral.value and self._position == Positions.Neutral:
                 return -1
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
index a9be87b0b..9ee035c95 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
@@ -34,7 +34,7 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
         train_df = data_dictionary["train_features"]
         test_df = data_dictionary["test_features"]
 
-        env_info = self.pack_env_dict()
+        env_info = self.pack_env_dict(dk.pair)
 
         env_id = "train_env"
         self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1,
diff --git a/setup.cfg b/setup.cfg
index 60ec8a75f..8a63004a7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,3 +49,5 @@ exclude =
     __pycache__,
     .eggs,
     user_data,
+per-file-ignores =
+    freqtrade/freqai/RL/BaseReinforcementLearningModel.py:C901

From 154b6711b3725db753232b9e9659ff03f4bc07d9 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 10 Feb 2023 15:26:17 +0100
Subject: [PATCH 2/3] use function level noqa ignore

---
 freqtrade/freqai/RL/BaseReinforcementLearningModel.py | 2 +-
 setup.cfg                                             | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 13882660e..8987c35fe 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -348,7 +348,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         sets a custom reward based on profit and trade duration.
         """
 
-        def calculate_reward(self, action: int) -> float:
+        def calculate_reward(self, action: int) -> float:  # noqa: C901
             """
             An example reward function. This is the one function that users will likely
             wish to inject their own creativity into.
diff --git a/setup.cfg b/setup.cfg
index 8a63004a7..60ec8a75f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -49,5 +49,3 @@ exclude =
     __pycache__,
     .eggs,
     user_data,
-per-file-ignores =
-    freqtrade/freqai/RL/BaseReinforcementLearningModel.py:C901

From 8873a565eec61806d039d853b2e4077671b600ce Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Fri, 10 Feb 2023 15:48:18 +0100
Subject: [PATCH 3/3] expose raw features to the environment for use in
 calculate_reward

---
 docs/freqai-reinforcement-learning.md                 |  2 +-
 freqtrade/freqai/RL/BaseEnvironment.py                |  7 ++++---
 freqtrade/freqai/RL/BaseReinforcementLearningModel.py | 10 +++++++---
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index 58eb638b8..7e4110b0b 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -177,7 +177,7 @@ As you begin to modify the strategy and the prediction model, you will quickly r
                 factor = 100
 
             # you can use feature values from dataframe
-            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
+            rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
                               f"{self.config['timeframe']}"].iloc[self._current_tick]
 
             # reward agent for entering trades
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 3d9527d4f..7a4467bf7 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -45,7 +45,8 @@ class BaseEnvironment(gym.Env):
     def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
                  reward_kwargs: dict = {}, window_size=10, starting_point=True,
                  id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False,
-                 fee: float = 0.0015, can_short: bool = False, pair: str = ""):
+                 fee: float = 0.0015, can_short: bool = False, pair: str = "",
+                 df_raw: DataFrame = DataFrame()):
         """
         Initializes the training/eval environment.
         :param df: dataframe of features
@@ -67,6 +68,7 @@ class BaseEnvironment(gym.Env):
         self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
         self.compound_trades: bool = config['stake_amount'] == 'unlimited'
         self.pair: str = pair
+        self.raw_features: DataFrame = df_raw
         if self.config.get('fee', None) is not None:
             self.fee = self.config['fee']
         else:
@@ -94,8 +96,7 @@ class BaseEnvironment(gym.Env):
         :param reward_kwargs: extra config settings assigned by user in `rl_config`
         :param starting_point: start at edge of window or not
         """
-        self.df: DataFrame = df
-        self.signal_features: DataFrame = self.df
+        self.signal_features: DataFrame = df
         self.prices: DataFrame = prices
         self.window_size: int = window_size
         self.starting_point: bool = starting_point
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 8987c35fe..a8ef69394 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -1,3 +1,4 @@
+import copy
 import importlib
 import logging
 from abc import abstractmethod
@@ -50,6 +51,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         self.eval_callback: Optional[EvalCallback] = None
         self.model_type = self.freqai_info['rl_config']['model_type']
         self.rl_config = self.freqai_info['rl_config']
+        self.df_raw: DataFrame = DataFrame()
         self.continual_learning = self.freqai_info.get('continual_learning', False)
         if self.model_type in SB3_MODELS:
             import_str = 'stable_baselines3'
@@ -107,6 +109,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
 
         data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
             features_filtered, labels_filtered)
+        self.df_raw = copy.deepcopy(data_dictionary["train_features"])
         dk.fit_labels()  # FIXME useless for now, but just satiating append methods
 
         # normalize all data based on train_dataset only
@@ -167,7 +170,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                     "config": self.config,
                     "live": self.live,
                     "can_short": self.can_short,
-                    "pair": pair}
+                    "pair": pair,
+                    "df_raw": self.df_raw}
         if self.data_provider:
             env_info["fee"] = self.data_provider._exchange \
                 .get_fee(symbol=self.data_provider.current_whitelist()[0])  # type: ignore
@@ -365,8 +369,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
             factor = 100.
 
             # you can use feature values from dataframe
-            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
-                              f"{self.config['timeframe']}"].iloc[self._current_tick]
+            rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
+                                        f"{self.config['timeframe']}"].iloc[self._current_tick]
 
             # reward agent for entering trades
             if (action in (Actions.Long_enter.value, Actions.Short_enter.value)