add documentation for net_arch, other small changes

2022-11-26 13:44:03 +01:00
parent 7ebc8ee169
commit aaaa5a5f64
4 changed files with 3 additions and 32 deletions
--- a/docs/freqai-parameter-table.md
+++ b/docs/freqai-parameter-table.md
@@ -81,6 +81,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
 | `cpu_count` | Number of threads/cpus to dedicate to the Reinforcement Learning training process (depending on if `ReinforcementLearning_multiproc` is selected or not). Recommended to leave this untouched, by default, this value is set to the total number of physical cores minus 1. <br> **Datatype:** int. 
 | `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
 | `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
+| `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.

 ### Additional parameters

--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -34,7 +34,7 @@ Setting up and running a Reinforcement Learning model is the same as running a R
 freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json
 ```

-where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner`. The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
+where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:

 ```python
    def populate_any_indicators(
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -590,6 +590,7 @@ CONF_SCHEMA = {
                        "cpu_count": {"type": "integer", "default": 1},
                        "model_type": {"type": "string", "default": "PPO"},
                        "policy_type": {"type": "string", "default": "MlpPolicy"},
+                        "net_arch": {"type": "list", "default": [128, 128]},
                        "model_reward_parameters": {
                            "type": "object",
                            "properties": {
--- a/freqtrade/freqai/RL/Base5ActionRLEnv.py
+++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py
@@ -26,31 +26,6 @@ class Base5ActionRLEnv(BaseEnvironment):
    def set_action_space(self):
        self.action_space = spaces.Discrete(len(Actions))

-    def reset(self):
-
-        self._done = False
-
-        if self.starting_point is True:
-            self._position_history = (self._start_tick * [None]) + [self._position]
-        else:
-            self._position_history = (self.window_size * [None]) + [self._position]
-
-        self._current_tick = self._start_tick
-        self._last_trade_tick = None
-        self._position = Positions.Neutral
-
-        self.total_reward = 0.
-        self._total_profit = 1.  # unit
-        self.history = {}
-        self.trade_history = []
-        self.portfolio_log_returns = np.zeros(len(self.prices))
-
-        self._profits = [(self._start_tick, 1)]
-        self.close_trade_profit = []
-        self._total_unrealized_profit = 1
-
-        return self._get_observation()
-
    def step(self, action: int):
        """
        Logic for a single step (incrementing one candle in time)
@@ -135,12 +110,6 @@ class Base5ActionRLEnv(BaseEnvironment):

        return observation, step_reward, self._done, info

-    def get_trade_duration(self):
-        if self._last_trade_tick is None:
-            return 0
-        else:
-            return self._current_tick - self._last_trade_tick
-
    def is_tradesignal(self, action: int) -> bool:
        """
        Determine if the signal is a trade signal