expose environment reward parameters to the user config
This commit is contained in:
parent
d88a0dbf82
commit
29f0e01c4a
@ -92,7 +92,8 @@
|
|||||||
"policy_type": "MlpPolicy",
|
"policy_type": "MlpPolicy",
|
||||||
"model_reward_parameters": {
|
"model_reward_parameters": {
|
||||||
"rr": 1,
|
"rr": 1,
|
||||||
"profit_aim": 0.02
|
"profit_aim": 0.02,
|
||||||
|
"win_reward_factor": 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -42,9 +42,10 @@ class Base5ActionRLEnv(gym.Env):
|
|||||||
|
|
||||||
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
|
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
|
||||||
reward_kwargs: dict = {}, window_size=10, starting_point=True,
|
reward_kwargs: dict = {}, window_size=10, starting_point=True,
|
||||||
id: str = 'baseenv-1', seed: int = 1):
|
id: str = 'baseenv-1', seed: int = 1, config: dict = {}):
|
||||||
assert df.ndim == 2
|
assert df.ndim == 2
|
||||||
|
|
||||||
|
self.rl_config = config['freqai']['rl_config']
|
||||||
self.id = id
|
self.id = id
|
||||||
self.seed(seed)
|
self.seed(seed)
|
||||||
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
|
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
|
||||||
@ -268,7 +269,7 @@ class Base5ActionRLEnv(gym.Env):
|
|||||||
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
||||||
factor = 1
|
factor = 1
|
||||||
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
factor = 2
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
|
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
|
||||||
|
|
||||||
# close short
|
# close short
|
||||||
@ -277,7 +278,7 @@ class Base5ActionRLEnv(gym.Env):
|
|||||||
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
|
||||||
factor = 1
|
factor = 1
|
||||||
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
factor = 2
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
return float(np.log(last_trade_price) - np.log(current_price) * factor)
|
return float(np.log(last_trade_price) - np.log(current_price) * factor)
|
||||||
|
|
||||||
return 0.
|
return 0.
|
||||||
|
@ -110,10 +110,10 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
|||||||
# environments
|
# environments
|
||||||
if not self.train_env:
|
if not self.train_env:
|
||||||
self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
|
self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
|
||||||
reward_kwargs=self.reward_params)
|
reward_kwargs=self.reward_params, config=self.config)
|
||||||
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
|
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
|
||||||
window_size=self.CONV_WIDTH,
|
window_size=self.CONV_WIDTH,
|
||||||
reward_kwargs=self.reward_params), ".")
|
reward_kwargs=self.reward_params, config=self.config), ".")
|
||||||
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
||||||
render=False, eval_freq=eval_freq,
|
render=False, eval_freq=eval_freq,
|
||||||
best_model_save_path=dk.data_path)
|
best_model_save_path=dk.data_path)
|
||||||
@ -239,7 +239,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
|
|||||||
|
|
||||||
|
|
||||||
def make_env(env_id: str, rank: int, seed: int, train_df, price,
|
def make_env(env_id: str, rank: int, seed: int, train_df, price,
|
||||||
reward_params, window_size, monitor=False) -> Callable:
|
reward_params, window_size, monitor=False, config={}) -> Callable:
|
||||||
"""
|
"""
|
||||||
Utility function for multiprocessed env.
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
@ -252,7 +252,7 @@ def make_env(env_id: str, rank: int, seed: int, train_df, price,
|
|||||||
def _init() -> gym.Env:
|
def _init() -> gym.Env:
|
||||||
|
|
||||||
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
|
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
|
||||||
reward_kwargs=reward_params, id=env_id, seed=seed + rank)
|
reward_kwargs=reward_params, id=env_id, seed=seed + rank, config=config)
|
||||||
if monitor:
|
if monitor:
|
||||||
env = Monitor(env, ".")
|
env = Monitor(env, ".")
|
||||||
return env
|
return env
|
||||||
@ -277,16 +277,16 @@ class MyRLEnv(Base5ActionRLEnv):
|
|||||||
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
||||||
factor = 1
|
factor = 1
|
||||||
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
factor = 2
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
|
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
|
||||||
|
|
||||||
# close short
|
# close short
|
||||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||||
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
|
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
|
||||||
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
|
||||||
factor = 1
|
factor = 1
|
||||||
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
factor = 2
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
return float(np.log(last_trade_price) - np.log(current_price) * factor)
|
return float(np.log(last_trade_price) - np.log(current_price) * factor)
|
||||||
|
|
||||||
return 0.
|
return 0.
|
||||||
|
@ -57,26 +57,20 @@ class MyRLEnv(Base5ActionRLEnv):
|
|||||||
|
|
||||||
# close long
|
# close long
|
||||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||||
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
|
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
|
||||||
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
||||||
return float(np.log(current_price) - np.log(last_trade_price))
|
factor = 1
|
||||||
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
|
||||||
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
|
|
||||||
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
|
|
||||||
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
|
|
||||||
|
|
||||||
# close short
|
# close short
|
||||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||||
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
|
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
|
||||||
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
|
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
|
||||||
return float(np.log(last_trade_price) - np.log(current_price))
|
factor = 1
|
||||||
|
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
||||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||||
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
|
return float(np.log(last_trade_price) - np.log(current_price) * factor)
|
||||||
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
|
|
||||||
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
|
|
||||||
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
|
|
||||||
|
|
||||||
return 0.
|
return 0.
|
||||||
|
@ -62,12 +62,12 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
|
|||||||
env_id = "train_env"
|
env_id = "train_env"
|
||||||
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
|
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
|
||||||
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
|
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
|
||||||
self.reward_params, self.CONV_WIDTH) for i
|
self.reward_params, self.CONV_WIDTH, config=self.config) for i
|
||||||
in range(num_cpu)])
|
in range(num_cpu)])
|
||||||
|
|
||||||
eval_env_id = 'eval_env'
|
eval_env_id = 'eval_env'
|
||||||
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
|
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
|
||||||
self.reward_params, self.CONV_WIDTH, monitor=True) for i
|
self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i
|
||||||
in range(num_cpu)])
|
in range(num_cpu)])
|
||||||
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
||||||
render=False, eval_freq=eval_freq,
|
render=False, eval_freq=eval_freq,
|
||||||
|
Loading…
Reference in New Issue
Block a user