fix generic reward, add time duration to reward

This commit is contained in:
robcaulk 2022-08-23 14:58:38 +02:00
parent 280a1dc3f8
commit b26ed7dea4
5 changed files with 43 additions and 45 deletions

View File

@ -88,6 +88,7 @@
"train_cycles": 10, "train_cycles": 10,
"eval_cycles": 3, "eval_cycles": 3,
"thread_count": 4, "thread_count": 4,
"max_trade_duration_candles": 100,
"model_type": "PPO", "model_type": "PPO",
"policy_type": "MlpPolicy", "policy_type": "MlpPolicy",
"model_reward_parameters": { "model_reward_parameters": {

View File

@ -8,6 +8,7 @@ from gym import spaces
from gym.utils import seeding from gym.utils import seeding
from pandas import DataFrame from pandas import DataFrame
import pandas as pd import pandas as pd
from abc import abstractmethod
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -265,28 +266,12 @@ class Base5ActionRLEnv(gym.Env):
def get_sharpe_ratio(self): def get_sharpe_ratio(self):
return mean_over_std(self.get_portfolio_log_returns()) return mean_over_std(self.get_portfolio_log_returns())
@abstractmethod
def calculate_reward(self, action): def calculate_reward(self, action):
"""
if self._last_trade_tick is None: Reward is created by BaseReinforcementLearningModel and can
return 0. be inherited/edited by the user made ReinforcementLearner file.
"""
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor)
return 0. return 0.

View File

@ -270,7 +270,7 @@ def make_env(env_id: str, rank: int, seed: int, train_df, price,
class MyRLEnv(Base5ActionRLEnv): class MyRLEnv(Base5ActionRLEnv):
""" """
User can override any function in BaseRLEnv and gym.Env. Here the user User can override any function in BaseRLEnv and gym.Env. Here the user
Adds 5 actions. sets a custom reward based on profit and trade duration.
""" """
def calculate_reward(self, action): def calculate_reward(self, action):
@ -278,22 +278,27 @@ class MyRLEnv(Base5ActionRLEnv):
if self._last_trade_tick is None: if self._last_trade_tick is None:
return 0. return 0.
pnl = self.get_unrealized_profit()
max_trade_duration = self.rl_config['max_trade_duration_candles']
trade_duration = self._current_tick - self._last_trade_tick
factor = 1
if trade_duration <= max_trade_duration:
factor *= 1.5
elif trade_duration > max_trade_duration:
factor *= 0.5
# close long # close long
if action == Actions.Long_exit.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor) return float(pnl * factor)
# close short # close short
if action == Actions.Short_exit.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1 factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor) return float(pnl * factor)
return 0. return 0.

View File

@ -3,7 +3,6 @@ from typing import Any, Dict # , Tuple
# import numpy.typing as npt # import numpy.typing as npt
import torch as th import torch as th
import numpy as np
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
@ -47,30 +46,36 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
class MyRLEnv(Base5ActionRLEnv): class MyRLEnv(Base5ActionRLEnv):
""" """
User can modify any part of the environment by overriding base User can override any function in BaseRLEnv and gym.Env. Here the user
functions sets a custom reward based on profit and trade duration.
""" """
def calculate_reward(self, action): def calculate_reward(self, action):
if self._last_trade_tick is None: if self._last_trade_tick is None:
return 0. return 0.
pnl = self.get_unrealized_profit()
max_trade_duration = self.rl_config['max_trade_duration_candles']
trade_duration = self._current_tick - self._last_trade_tick
factor = 1
if trade_duration <= max_trade_duration:
factor *= 1.5
elif trade_duration > max_trade_duration:
factor *= 0.5
# close long # close long
if action == Actions.Long_exit.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor) return float(pnl * factor)
# close short # close short
if action == Actions.Short_exit.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1 factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor) return float(pnl * factor)
return 0. return 0.

View File

@ -62,12 +62,14 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
env_id = "train_env" env_id = "train_env"
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH, config=self.config) for i self.reward_params, self.CONV_WIDTH,
config=self.config) for i
in range(num_cpu)]) in range(num_cpu)])
eval_env_id = 'eval_env' eval_env_id = 'eval_env'
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i self.reward_params, self.CONV_WIDTH, monitor=True,
config=self.config) for i
in range(num_cpu)]) in range(num_cpu)])
self.eval_callback = EvalCallback(self.eval_env, deterministic=True, self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq, render=False, eval_freq=eval_freq,