Improve the training by randomly setting the starting point
This is an improvement for all kind of RL stuff, the starting point sets in the first third of the data which ensures that the model not only optimizes the policy or Q values to the choosen timerange.
This commit is contained in:
parent
bf4d5b432a
commit
926f870fb5
@ -2,7 +2,7 @@ import logging
|
||||
from abc import abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import random
|
||||
import gym
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@ -121,6 +121,9 @@ class BaseEnvironment(gym.Env):
|
||||
self._done = False
|
||||
|
||||
if self.starting_point is True:
|
||||
length_of_data = int(len(self.prices)/3) # 1/3 looks okay in any case of short or long training timerange
|
||||
start_tick = random.randint(self.window_size+1, length_of_data)
|
||||
self._start_tick = start_tick
|
||||
self._position_history = (self._start_tick * [None]) + [self._position]
|
||||
else:
|
||||
self._position_history = (self.window_size * [None]) + [self._position]
|
||||
|
Loading…
Reference in New Issue
Block a user