diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py
index 84a82c5de..2b1c4f975 100644
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -62,7 +62,7 @@ class Base5ActionRLEnv(gym.Env):
         self.fee = 0.0015
 
         # # spaces
-        self.shape = (window_size, self.signal_features.shape[1] + 2)
+        self.shape = (window_size, self.signal_features.shape[1] + 3)
         self.action_space = spaces.Discrete(len(Actions))
         self.observation_space = spaces.Box(
             low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
@@ -184,15 +184,22 @@ class Base5ActionRLEnv(gym.Env):
     def _get_observation(self):
         features_window = self.signal_features[(
             self._current_tick - self.window_size):self._current_tick]
-        features_and_state = DataFrame(np.zeros((len(features_window), 2)),
-                                       columns=['current_profit_pct', 'position'],
+        features_and_state = DataFrame(np.zeros((len(features_window), 3)),
+                                       columns=['current_profit_pct', 'position', 'trade_duration'],
                                        index=features_window.index)
 
         features_and_state['current_profit_pct'] = self.get_unrealized_profit()
         features_and_state['position'] = self._position.value
+        features_and_state['trade_duration'] = self.get_trade_duration()
         features_and_state = pd.concat([features_window, features_and_state], axis=1)
         return features_and_state
 
+    def get_trade_duration(self):
+        if self._last_trade_tick is None:
+            return 0
+        else:
+            return self._current_tick - self._last_trade_tick
+
     def get_unrealized_profit(self):
 
         if self._last_trade_tick is None:
diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
index 360cbf9d4..6a15b96f9 100644
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@@ -16,6 +16,7 @@ from stable_baselines3.common.callbacks import EvalCallback
 from stable_baselines3.common.monitor import Monitor
 import torch as th
 from typing import Callable
+from datetime import datetime, timezone
 from stable_baselines3.common.utils import set_random_seed
 import gym
 logger = logging.getLogger(__name__)
@@ -140,23 +141,27 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         open_trades = Trade.get_trades_proxy(is_open=True)
         market_side = 0.5
         current_profit = 0
+        trade_duration = 0
         for trade in open_trades:
             if trade.pair == pair:
-                current_value = self.strategy.dp._exchange.get_rate(pair, refresh=False) #, side="buy", is_short=True)
+                current_value = self.strategy.dp._exchange.get_rate(
+                    pair, refresh=False, side="exit", is_short=trade.is_short)
                 openrate = trade.open_rate
+                now = datetime.now(timezone.utc).timestamp()
+                trade_duration = (now - trade.open_date.timestamp()) / self.base_tf_seconds
                 if 'long' in trade.enter_tag:
                     market_side = 1
                     current_profit = (current_value - openrate) / openrate
                 else:
                     market_side = 0
-                    current_profit = (openrate - current_value ) / openrate
+                    current_profit = (openrate - current_value) / openrate
 
-        total_profit = 0
-        closed_trades = Trade.get_trades_proxy(pair=pair, is_open=False)
-        for trade in closed_trades:
-            total_profit += trade.close_profit
+        # total_profit = 0
+        # closed_trades = Trade.get_trades_proxy(pair=pair, is_open=False)
+        # for trade in closed_trades:
+        #     total_profit += trade.close_profit
 
-        return market_side, current_profit, total_profit
+        return market_side, current_profit, int(trade_duration)
 
     def predict(
         self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
@@ -192,10 +197,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
         output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list)
 
         def _predict(window):
-            market_side, current_profit, total_profit = self.get_state_info(dk.pair)
+            market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
             observations = dataframe.iloc[window.index]
             observations['current_profit'] = current_profit
             observations['position'] = market_side
+            observations['trade_duration'] = trade_duration
             res, _ = model.predict(observations, deterministic=True)
             return res