add documentation for net_arch, other small changes

This commit is contained in:
robcaulk 2022-11-26 13:44:03 +01:00
parent 7ebc8ee169
commit aaaa5a5f64
4 changed files with 3 additions and 32 deletions

View File

@ -81,6 +81,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| `cpu_count` | Number of threads/cpus to dedicate to the Reinforcement Learning training process (depending on if `ReinforcementLearning_multiproc` is selected or not). Recommended to leave this untouched, by default, this value is set to the total number of physical cores minus 1. <br> **Datatype:** int.
| `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
| `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
| `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.
### Additional parameters

View File

@ -34,7 +34,7 @@ Setting up and running a Reinforcement Learning model is the same as running a R
freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json
```
where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner`. The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
```python
def populate_any_indicators(

View File

@ -590,6 +590,7 @@ CONF_SCHEMA = {
"cpu_count": {"type": "integer", "default": 1},
"model_type": {"type": "string", "default": "PPO"},
"policy_type": {"type": "string", "default": "MlpPolicy"},
"net_arch": {"type": "list", "default": [128, 128]},
"model_reward_parameters": {
"type": "object",
"properties": {

View File

@ -26,31 +26,6 @@ class Base5ActionRLEnv(BaseEnvironment):
def set_action_space(self):
self.action_space = spaces.Discrete(len(Actions))
def reset(self):
self._done = False
if self.starting_point is True:
self._position_history = (self._start_tick * [None]) + [self._position]
else:
self._position_history = (self.window_size * [None]) + [self._position]
self._current_tick = self._start_tick
self._last_trade_tick = None
self._position = Positions.Neutral
self.total_reward = 0.
self._total_profit = 1. # unit
self.history = {}
self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices))
self._profits = [(self._start_tick, 1)]
self.close_trade_profit = []
self._total_unrealized_profit = 1
return self._get_observation()
def step(self, action: int):
"""
Logic for a single step (incrementing one candle in time)
@ -135,12 +110,6 @@ class Base5ActionRLEnv(BaseEnvironment):
return observation, step_reward, self._done, info
def get_trade_duration(self):
if self._last_trade_tick is None:
return 0
else:
return self._current_tick - self._last_trade_tick
def is_tradesignal(self, action: int) -> bool:
"""
Determine if the signal is a trade signal