From 64d4a52a5615ff9d5ddc2be693d8a79c002d0c9f Mon Sep 17 00:00:00 2001
From: richardjozsa <richard.jozsa93@gmail.com>
Date: Sun, 27 Nov 2022 20:43:50 +0100
Subject: [PATCH 1/3] Improve the RL learning process

Improve the RL learning process by selecting random start point for the agent, it can help to block the agent to only learn on the selected period of time, while improving the quality of the model.
---
 freqtrade/freqai/RL/BaseEnvironment.py | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 3332e5a18..5d881ba32 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -9,6 +9,7 @@ import pandas as pd
 from gym import spaces
 from gym.utils import seeding
 from pandas import DataFrame
+import random
 
 from freqtrade.data.dataprovider import DataProvider
 
@@ -121,6 +122,9 @@ class BaseEnvironment(gym.Env):
         self._done = False
 
         if self.starting_point is True:
+            length_of_data = int(self._end_tick/4)
+            start_tick = random.randint(self.window_size+1, length_of_data)
+            self._start_tick = start_tick
             self._position_history = (self._start_tick * [None]) + [self._position]
         else:
             self._position_history = (self.window_size * [None]) + [self._position]

From 7fd6bc526e38537a8595abcbe562af6ac6f53729 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sun, 27 Nov 2022 21:03:13 +0100
Subject: [PATCH 2/3] add randomize_starting_position to the rl_config

---
 docs/freqai-parameter-table.md         | 1 +
 freqtrade/constants.py                 | 1 +
 freqtrade/freqai/RL/BaseEnvironment.py | 7 ++++---
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md
index 02426ec13..f2a52a9b8 100644
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -82,6 +82,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
 | `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
 | `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.
+| `randomize_starting_position` | Randomize the starting point of each episode to avoid overfitting. <br> **Datatype:** bool. <br> Default: `False`.
 
 ### Additional parameters
 
diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index 878c38929..d869b89f6 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -591,6 +591,7 @@ CONF_SCHEMA = {
                         "model_type": {"type": "string", "default": "PPO"},
                         "policy_type": {"type": "string", "default": "MlpPolicy"},
                         "net_arch": {"type": "array", "default": [128, 128]},
+                        "randomize_startinng_position": {"type": "boolean", "default": False},
                         "model_reward_parameters": {
                             "type": "object",
                             "properties": {
diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 5d881ba32..8f940dd1b 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -122,9 +122,10 @@ class BaseEnvironment(gym.Env):
         self._done = False
 
         if self.starting_point is True:
-            length_of_data = int(self._end_tick/4)
-            start_tick = random.randint(self.window_size+1, length_of_data)
-            self._start_tick = start_tick
+            if self.rl_config.get('randomize_starting_position', False):
+                length_of_data = int(self._end_tick / 4)
+                start_tick = random.randint(self.window_size + 1, length_of_data)
+                self._start_tick = start_tick
             self._position_history = (self._start_tick * [None]) + [self._position]
         else:
             self._position_history = (self.window_size * [None]) + [self._position]

From 56518def42fab1fd3d89f12bcda281a1eff11ef7 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sun, 27 Nov 2022 21:06:01 +0100
Subject: [PATCH 3/3] isort

---
 freqtrade/freqai/RL/BaseEnvironment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/freqtrade/freqai/RL/BaseEnvironment.py b/freqtrade/freqai/RL/BaseEnvironment.py
index 8f940dd1b..66bdb8435 100644
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@@ -1,4 +1,5 @@
 import logging
+import random
 from abc import abstractmethod
 from enum import Enum
 from typing import Optional
@@ -9,7 +10,6 @@ import pandas as pd
 from gym import spaces
 from gym.utils import seeding
 from pandas import DataFrame
-import random
 
 from freqtrade.data.dataprovider import DataProvider