From 5d4e5e69fe44aa9dedb9dcfdf43adfe240d9832b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Thu, 18 Aug 2022 13:02:47 +0200 Subject: [PATCH] reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. --- config_examples/config_freqai-rl.example.json | 39 ++++----- freqtrade/freqai/RL/Base3ActionRLEnv.py | 4 +- freqtrade/freqai/RL/Base5ActionRLEnv.py | 17 +++- .../RL/BaseReinforcementLearningModel.py | 44 +++------- .../ReinforcementLearningPPO.py | 12 ++- .../ReinforcementLearningPPO_multiproc.py | 21 ++--- .../ReinforcementLearningTDQN.py | 83 ++++++------------ .../ReinforcementLearningTDQN_multiproc.py | 86 ++++++------------- 8 files changed, 114 insertions(+), 192 deletions(-) diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index 053c1a08e..1f12cbc6c 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -8,7 +8,7 @@ "tradable_balance_ratio": 1, "fiat_display_currency": "USD", "dry_run": true, - "timeframe": "3m", + "timeframe": "5m", "dataformat_ohlcv": "json", "dry_run_wallet": 12000, "cancel_open_orders_on_exit": true, @@ -35,7 +35,6 @@ }, "entry_pricing": { "price_side": "same", - "purge_old_models": true, "use_order_book": true, "order_book_top": 1, "price_last_balance": 0.0, @@ -56,10 +55,8 @@ ], "freqai": { "enabled": true, - "startup_candles": 1000, - "model_save_type": "stable_baselines_ppo", + "model_save_type": "stable_baselines_dqn", "conv_width": 10, - "follow_mode": false, "purge_old_models": true, "train_period_days": 10, "backtest_period_days": 2, @@ -71,13 +68,9 @@ "ETH/USDT" ], "include_timeframes": [ - "3m", - "15m" + "5m", + "30m" ], - "include_shifted_candles": 0, - "weight_factor": 0.9, - "principal_component_analysis": false, - "use_SVM_to_remove_outliers": false, "indicator_max_period_candles": 10, "indicator_periods_candles": [5, 10] }, @@ -86,16 +79,22 @@ "random_state": 1, "shuffle": false }, - "model_training_parameters": { - "ent_coef": 0.005, - "learning_rate": 0.000025, - "batch_size": 256, - "eval_cycles" : 5, - "train_cycles" : 15 + "model_training_parameters": { + "learning_rate": 0.00025, + "gamma": 0.9, + "target_update_interval": 5000, + "buffer_size": 50000, + "exploration_initial_eps":1, + "exploration_final_eps": 0.1, + "verbose": 1 }, - "model_reward_parameters": { - "rr": 1, - "profit_aim": 0.01 + "rl_config": { + "train_cycles": 15, + "eval_cycles": 5, + "model_reward_parameters": { + "rr": 1, + "profit_aim": 0.02 + } } }, "bot_name": "RL_test", diff --git a/freqtrade/freqai/RL/Base3ActionRLEnv.py b/freqtrade/freqai/RL/Base3ActionRLEnv.py index bf7b2fc7b..9d17b982d 100644 --- a/freqtrade/freqai/RL/Base3ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base3ActionRLEnv.py @@ -6,6 +6,7 @@ import gym import numpy as np from gym import spaces from gym.utils import seeding +from pandas import DataFrame logger = logging.getLogger(__name__) @@ -35,7 +36,8 @@ class Base3ActionRLEnv(gym.Env): metadata = {'render.modes': ['human']} - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, + def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), + reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1): assert df.ndim == 2 diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 5f817f14e..d7ceb5ff3 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -6,6 +6,7 @@ import gym import numpy as np from gym import spaces from gym.utils import seeding +from pandas import DataFrame logger = logging.getLogger(__name__) @@ -39,7 +40,8 @@ class Base5ActionRLEnv(gym.Env): """ metadata = {'render.modes': ['human']} - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, + def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), + reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1): assert df.ndim == 2 @@ -56,7 +58,7 @@ class Base5ActionRLEnv(gym.Env): self.fee = 0.0015 # # spaces - self.shape = (window_size, self.signal_features.shape[1]) + self.shape = (window_size, self.signal_features.shape[1] + 2) self.action_space = spaces.Discrete(len(Actions)) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) @@ -161,19 +163,26 @@ class Base5ActionRLEnv(gym.Env): self._done = True self._position_history.append(self._position) - observation = self._get_observation() + info = dict( tick=self._current_tick, total_reward=self.total_reward, total_profit=self._total_profit, position=self._position.value ) + + observation = self._get_observation() + self._update_history(info) return observation, step_reward, self._done, info def _get_observation(self): - return self.signal_features[(self._current_tick - self.window_size):self._current_tick] + features_and_state = self.signal_features[( + self._current_tick - self.window_size):self._current_tick] + features_and_state['current_profit_pct'] = self.get_unrealized_profit() + features_and_state['position'] = self._position.value + return features_and_state def get_unrealized_profit(self): diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 78feea6d1..395b2a1a6 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -13,7 +13,7 @@ from freqtrade.persistence import Trade import torch.multiprocessing import torch as th logger = logging.getLogger(__name__) -th.set_num_threads(8) + torch.multiprocessing.set_sharing_strategy('file_system') @@ -22,6 +22,11 @@ class BaseReinforcementLearningModel(IFreqaiModel): User created Reinforcement Learning Model prediction model. """ + def __init__(self, **kwargs): + super().__init__(config=kwargs['config']) + th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4)) + self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] + def train( self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen ) -> Any: @@ -62,12 +67,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): model = self.fit_rl(data_dictionary, pair, dk, prices_train, prices_test) - if pair not in self.dd.historic_predictions: - self.set_initial_historic_predictions( - data_dictionary['train_features'], model, dk, pair) - - self.dd.save_historic_predictions_to_disk() - logger.info(f"--------------------done training {pair}--------------------") return model @@ -127,7 +126,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): # optional additional data cleaning/analysis self.data_cleaning_predict(dk, filtered_dataframe) - pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model) + pred_df = self.rl_model_predict( + dk.data_dictionary["prediction_features"], dk, self.model) pred_df.fillna(0, inplace=True) return (pred_df, dk.do_predict) @@ -135,10 +135,13 @@ class BaseReinforcementLearningModel(IFreqaiModel): def rl_model_predict(self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any) -> DataFrame: - output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list) + output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list) def _predict(window): + market_side, current_profit, total_profit = self.get_state_info(dk.pair) observations = dataframe.iloc[window.index] + observations['current_profit'] = current_profit + observations['position'] = market_side res, _ = model.predict(observations, deterministic=True) return res @@ -174,29 +177,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): return prices_train, prices_test - def set_initial_historic_predictions( - self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str - ) -> None: - - pred_df = self.rl_model_predict(df, dk, model) - pred_df.fillna(0, inplace=True) - self.dd.historic_predictions[pair] = pred_df - hist_preds_df = self.dd.historic_predictions[pair] - - for label in hist_preds_df.columns: - if hist_preds_df[label].dtype == object: - continue - hist_preds_df[f'{label}_mean'] = 0 - hist_preds_df[f'{label}_std'] = 0 - - hist_preds_df['do_predict'] = 0 - - if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: - hist_preds_df['DI_values'] = 0 - - for return_str in dk.data['extra_returns_per_train']: - hist_preds_df[return_str] = 0 - # TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it. # But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor # all the other existing fit() functions to include dk argument. For now we instantiate and diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py index b437ea8aa..5dc7735d3 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py @@ -24,18 +24,16 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params.get("eval_cycles", 4) * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) # environments train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=reward_params) + reward_kwargs=self.reward_params) eval = MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params) eval_env = Monitor(eval, ".") path = dk.data_path @@ -49,7 +47,7 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, - gamma=0.9, verbose=1 + **self.freqai_info['model_training_parameters'] ) model.learn( diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py index b1c5f316f..337e94607 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py @@ -51,23 +51,20 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params.get("eval_cycles", 4) * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) - learning_rate = agent_params["learning_rate"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) env_id = "train_env" - th.set_num_threads(dk.thread_count) num_cpu = int(dk.thread_count / 2) - train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params, - self.CONV_WIDTH) for i in range(num_cpu)]) + train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.reward_params, self.CONV_WIDTH) for i in range(num_cpu)]) eval_env_id = 'eval_env' - eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params, - self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) + eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.reward_params, self.CONV_WIDTH, monitor=True) for i in + range(num_cpu)]) path = dk.data_path eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", @@ -80,9 +77,7 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/ppo/tensorboard/", - learning_rate=learning_rate, - gamma=0.9, - verbose=1 + **self.freqai_info['model_training_parameters'] ) model.learn( diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py index a60bc1fa1..3a57142cf 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py @@ -3,8 +3,7 @@ from typing import Any, Dict # Optional import torch as th from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor -# from stable_baselines3.common.vec_env import SubprocVecEnv -from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.TDQNagent import TDQN from stable_baselines3 import DQN @@ -25,18 +24,16 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params["eval_cycles"] * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) # environments train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=reward_params) + reward_kwargs=self.reward_params) eval = MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params) eval_env = Monitor(eval, ".") eval_env.reset() @@ -50,12 +47,10 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): net_arch=[256, 256, 128]) model = TDQN('TMultiInputPolicy', train_env, - policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/tdqn/tensorboard/", - learning_rate=0.00025, gamma=0.9, - target_update_interval=5000, buffer_size=50000, - exploration_initial_eps=1, exploration_final_eps=0.1, - replay_buffer_class=ReplayBuffer + policy_kwargs=policy_kwargs, + replay_buffer_class=ReplayBuffer, + **self.freqai_info['model_training_parameters'] ) model.learn( @@ -70,9 +65,11 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): return best_model -class MyRLEnv(Base3ActionRLEnv): +# User can inherit and customize 5 action environment +class MyRLEnv(Base5ActionRLEnv): """ - User can override any function in BaseRLEnv and gym.Env + User can override any function in BaseRLEnv and gym.Env. Here the user + Adds 5 actions. """ def calculate_reward(self, action): @@ -81,55 +78,27 @@ class MyRLEnv(Base3ActionRLEnv): return 0. # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: + if action == Actions.Long_sell.value and self._position == Positions.Long: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float(np.log(current_price) - np.log(last_trade_price)) + if action == Actions.Long_sell.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: + if action == Actions.Short_buy.value and self._position == Positions.Short: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) return float(np.log(last_trade_price) - np.log(current_price)) + if action == Actions.Short_buy.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + return 0. - -# User can inherit and customize 5 action environment -# class MyRLEnv(Base5ActionRLEnv): -# """ -# User can override any function in BaseRLEnv and gym.Env. Here the user -# Adds 5 actions. -# """ - -# def calculate_reward(self, action): - -# if self._last_trade_tick is None: -# return 0. - -# # close long -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(current_price) - np.log(last_trade_price)) - -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(current_price) - np.log(last_trade_price)) * 2) - -# # close short -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(last_trade_price) - np.log(current_price)) - -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(last_trade_price) - np.log(current_price)) * 2) - -# return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py index 51e3c07c4..bf9e03b7f 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py @@ -10,7 +10,7 @@ from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.utils import set_random_seed from stable_baselines3 import DQN -from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.TDQNagent import TDQN from stable_baselines3.common.buffers import ReplayBuffer @@ -50,22 +50,20 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params["eval_cycles"] * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) - learning_rate = agent_params["learning_rate"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) env_id = "train_env" num_cpu = int(dk.thread_count / 2) - train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params, - self.CONV_WIDTH) for i in range(num_cpu)]) + train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.reward_params, self.CONV_WIDTH) for i in range(num_cpu)]) eval_env_id = 'eval_env' - eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params, - self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) + eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.reward_params, self.CONV_WIDTH, monitor=True) for i in + range(num_cpu)]) path = dk.data_path stop_train_callback = StopTrainingOnNoModelImprovement( @@ -91,10 +89,8 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): model = TDQN('TMultiInputPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/tdqn/tensorboard/", - learning_rate=learning_rate, gamma=0.9, - target_update_interval=5000, buffer_size=50000, - exploration_initial_eps=1, exploration_final_eps=0.1, - replay_buffer_class=ReplayBuffer + replay_buffer_class=ReplayBuffer, + **self.freqai_info['model_training_parameters'] ) model.learn( @@ -109,9 +105,11 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): return best_model -class MyRLEnv(Base3ActionRLEnv): +# User can inherit and customize 5 action environment +class MyRLEnv(Base5ActionRLEnv): """ - User can override any function in BaseRLEnv and gym.Env + User can override any function in BaseRLEnv and gym.Env. Here the user + Adds 5 actions. """ def calculate_reward(self, action): @@ -120,55 +118,27 @@ class MyRLEnv(Base3ActionRLEnv): return 0. # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: + if action == Actions.Long_sell.value and self._position == Positions.Long: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float(np.log(current_price) - np.log(last_trade_price)) + if action == Actions.Long_sell.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: + if action == Actions.Short_buy.value and self._position == Positions.Short: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) return float(np.log(last_trade_price) - np.log(current_price)) + if action == Actions.Short_buy.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + return 0. - -# User can inherit and customize 5 action environment -# class MyRLEnv(Base5ActionRLEnv): -# """ -# User can override any function in BaseRLEnv and gym.Env. Here the user -# Adds 5 actions. -# """ - -# def calculate_reward(self, action): - -# if self._last_trade_tick is None: -# return 0. - -# # close long -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(current_price) - np.log(last_trade_price)) - -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(current_price) - np.log(last_trade_price)) * 2) - -# # close short -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(last_trade_price) - np.log(current_price)) - -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(last_trade_price) - np.log(current_price)) * 2) - -# return 0.