add documentation for net_arch, other small changes
This commit is contained in:
parent
7ebc8ee169
commit
aaaa5a5f64
@ -81,6 +81,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
|||||||
| `cpu_count` | Number of threads/cpus to dedicate to the Reinforcement Learning training process (depending on if `ReinforcementLearning_multiproc` is selected or not). Recommended to leave this untouched, by default, this value is set to the total number of physical cores minus 1. <br> **Datatype:** int.
|
| `cpu_count` | Number of threads/cpus to dedicate to the Reinforcement Learning training process (depending on if `ReinforcementLearning_multiproc` is selected or not). Recommended to leave this untouched, by default, this value is set to the total number of physical cores minus 1. <br> **Datatype:** int.
|
||||||
| `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
|
| `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
|
||||||
| `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
|
| `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
|
||||||
|
| `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.
|
||||||
|
|
||||||
### Additional parameters
|
### Additional parameters
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ Setting up and running a Reinforcement Learning model is the same as running a R
|
|||||||
freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json
|
freqtrade trade --freqaimodel ReinforcementLearner --strategy MyRLStrategy --config config.json
|
||||||
```
|
```
|
||||||
|
|
||||||
where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner`. The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
|
where `ReinforcementLearner` will use the templated `ReinforcementLearner` from `freqai/prediction_models/ReinforcementLearner` (or a custom user defined one located in `user_data/freqaimodels`). The strategy, on the other hand, follows the same base [feature engineering](freqai-feature-engineering.md) with `populate_any_indicators` as a typical Regressor:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def populate_any_indicators(
|
def populate_any_indicators(
|
||||||
|
@ -590,6 +590,7 @@ CONF_SCHEMA = {
|
|||||||
"cpu_count": {"type": "integer", "default": 1},
|
"cpu_count": {"type": "integer", "default": 1},
|
||||||
"model_type": {"type": "string", "default": "PPO"},
|
"model_type": {"type": "string", "default": "PPO"},
|
||||||
"policy_type": {"type": "string", "default": "MlpPolicy"},
|
"policy_type": {"type": "string", "default": "MlpPolicy"},
|
||||||
|
"net_arch": {"type": "list", "default": [128, 128]},
|
||||||
"model_reward_parameters": {
|
"model_reward_parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -26,31 +26,6 @@ class Base5ActionRLEnv(BaseEnvironment):
|
|||||||
def set_action_space(self):
|
def set_action_space(self):
|
||||||
self.action_space = spaces.Discrete(len(Actions))
|
self.action_space = spaces.Discrete(len(Actions))
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
|
|
||||||
self._done = False
|
|
||||||
|
|
||||||
if self.starting_point is True:
|
|
||||||
self._position_history = (self._start_tick * [None]) + [self._position]
|
|
||||||
else:
|
|
||||||
self._position_history = (self.window_size * [None]) + [self._position]
|
|
||||||
|
|
||||||
self._current_tick = self._start_tick
|
|
||||||
self._last_trade_tick = None
|
|
||||||
self._position = Positions.Neutral
|
|
||||||
|
|
||||||
self.total_reward = 0.
|
|
||||||
self._total_profit = 1. # unit
|
|
||||||
self.history = {}
|
|
||||||
self.trade_history = []
|
|
||||||
self.portfolio_log_returns = np.zeros(len(self.prices))
|
|
||||||
|
|
||||||
self._profits = [(self._start_tick, 1)]
|
|
||||||
self.close_trade_profit = []
|
|
||||||
self._total_unrealized_profit = 1
|
|
||||||
|
|
||||||
return self._get_observation()
|
|
||||||
|
|
||||||
def step(self, action: int):
|
def step(self, action: int):
|
||||||
"""
|
"""
|
||||||
Logic for a single step (incrementing one candle in time)
|
Logic for a single step (incrementing one candle in time)
|
||||||
@ -135,12 +110,6 @@ class Base5ActionRLEnv(BaseEnvironment):
|
|||||||
|
|
||||||
return observation, step_reward, self._done, info
|
return observation, step_reward, self._done, info
|
||||||
|
|
||||||
def get_trade_duration(self):
|
|
||||||
if self._last_trade_tick is None:
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
return self._current_tick - self._last_trade_tick
|
|
||||||
|
|
||||||
def is_tradesignal(self, action: int) -> bool:
|
def is_tradesignal(self, action: int) -> bool:
|
||||||
"""
|
"""
|
||||||
Determine if the signal is a trade signal
|
Determine if the signal is a trade signal
|
||||||
|
Loading…
Reference in New Issue
Block a user