From aaaa5a5f64dd4b1dec7d81fa0f1e7e2ede11f963 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 26 Nov 2022 13:44:03 +0100
Subject: [PATCH] add documentation for net_arch, other small changes

---
 docs/freqai-parameter-table.md          |  1 +
 docs/freqai-reinforcement-learning.md   |  2 +-
 freqtrade/constants.py                  |  1 +
 freqtrade/freqai/RL/Base5ActionRLEnv.py | 31 -------------------------
 4 files changed, 3 insertions(+), 32 deletions(-)
diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md
index 084c9118c..02426ec13 100644
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -81,6 +81,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `cpu_count` | Number of threads/cpus to dedicate to the Reinforcement Learning training process (depending on if `ReinforcementLearning_multiproc` is selected or not). Recommended to leave this untouched, by default, this value is set to the total number of physical cores minus 1. <br> **Datatype:** int. 
 | `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
 | `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
+| `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.
 
 ### Additional parameters
 
diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index 6bcba96ff..241ccc3e2 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -34,7 +34,7 @@ Setting up and running a Reinforcement Learning model is the same as running a R
 freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json
 ```
 
-where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner`. The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
+where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
 
 ```python
     def populate_any_indicators(
diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index ba43e1328..3d7dbb13e 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -590,6 +590,7 @@ CONF_SCHEMA = {
                         "cpu_count": {"type": "integer", "default": 1},
                         "model_type": {"type": "string", "default": "PPO"},
                         "policy_type": {"type": "string", "default": "MlpPolicy"},
+                        "net_arch": {"type": "list", "default": [128, 128]},
                         "model_reward_parameters": {
                             "type": "object",
                             "properties": {
diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py
index 61abb8031..ee43ac868 100644
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -26,31 +26,6 @@ class Base5ActionRLEnv(BaseEnvironment):
     def set_action_space(self):
         self.action_space = spaces.Discrete(len(Actions))
 
-    def reset(self):
-
-        self._done = False
-
-        if self.starting_point is True:
-            self._position_history = (self._start_tick * [None]) + [self._position]
-        else:
-            self._position_history = (self.window_size * [None]) + [self._position]
-
-        self._current_tick = self._start_tick
-        self._last_trade_tick = None
-        self._position = Positions.Neutral
-
-        self.total_reward = 0.
-        self._total_profit = 1.  # unit
-        self.history = {}
-        self.trade_history = []
-        self.portfolio_log_returns = np.zeros(len(self.prices))
-
-        self._profits = [(self._start_tick, 1)]
-        self.close_trade_profit = []
-        self._total_unrealized_profit = 1
-
-        return self._get_observation()
-
     def step(self, action: int):
         """
         Logic for a single step (incrementing one candle in time)
@@ -135,12 +110,6 @@ class Base5ActionRLEnv(BaseEnvironment):
 
         return observation, step_reward, self._done, info
 
-    def get_trade_duration(self):
-        if self._last_trade_tick is None:
-            return 0
-        else:
-            return self._current_tick - self._last_trade_tick
-
     def is_tradesignal(self, action: int) -> bool:
         """
         Determine if the signal is a trade signal