Improve the training by randomly setting the starting point

This is an improvement for all kind of RL stuff, the starting point sets in the first third of the data which ensures that the model not only optimizes the policy or Q values to the choosen timerange.
This commit is contained in:
richardjozsa 2022-11-15 00:16:04 +01:00
parent bf4d5b432a
commit 926f870fb5

View File

@ -2,7 +2,7 @@ import logging
from abc import abstractmethod
from enum import Enum
from typing import Optional
import random
import gym
import numpy as np
import pandas as pd
@ -121,6 +121,9 @@ class BaseEnvironment(gym.Env):
self._done = False
if self.starting_point is True:
length_of_data = int(len(self.prices)/3) # 1/3 looks okay in any case of short or long training timerange
start_tick = random.randint(self.window_size+1, length_of_data)
self._start_tick = start_tick
self._position_history = (self._start_tick * [None]) + [self._position]
else:
self._position_history = (self.window_size * [None]) + [self._position]