add randomize_starting_position to the rl_config
This commit is contained in:
parent
64d4a52a56
commit
7fd6bc526e
@ -82,6 +82,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
||||
| `model_reward_parameters` | Parameters used inside the customizable `calculate_reward()` function in `ReinforcementLearner.py` <br> **Datatype:** int.
|
||||
| `add_state_info` | Tell FreqAI to include state information in the feature set for training and inferencing. The current state variables include trade duration, current profit, trade position. This is only available in dry/live runs, and is automatically switched to false for backtesting. <br> **Datatype:** bool. <br> Default: `False`.
|
||||
| `net_arch` | Network architecture which is well described in [`stable_baselines3` doc](https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html#examples). In summary: `[<shared layers>, dict(vf=[<non-shared value network layers>], pi=[<non-shared policy network layers>])]`. By default this is set to `[128, 128]`, which defines 2 shared hidden layers with 128 units each.
|
||||
| `randomize_starting_position` | Randomize the starting point of each episode to avoid overfitting. <br> **Datatype:** bool. <br> Default: `False`.
|
||||
|
||||
### Additional parameters
|
||||
|
||||
|
@ -591,6 +591,7 @@ CONF_SCHEMA = {
|
||||
"model_type": {"type": "string", "default": "PPO"},
|
||||
"policy_type": {"type": "string", "default": "MlpPolicy"},
|
||||
"net_arch": {"type": "array", "default": [128, 128]},
|
||||
"randomize_startinng_position": {"type": "boolean", "default": False},
|
||||
"model_reward_parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -122,8 +122,9 @@ class BaseEnvironment(gym.Env):
|
||||
self._done = False
|
||||
|
||||
if self.starting_point is True:
|
||||
length_of_data = int(self._end_tick/4)
|
||||
start_tick = random.randint(self.window_size+1, length_of_data)
|
||||
if self.rl_config.get('randomize_starting_position', False):
|
||||
length_of_data = int(self._end_tick / 4)
|
||||
start_tick = random.randint(self.window_size + 1, length_of_data)
|
||||
self._start_tick = start_tick
|
||||
self._position_history = (self._start_tick * [None]) + [self._position]
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user