diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index 053c1a08e..1f12cbc6c 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -8,7 +8,7 @@ "tradable_balance_ratio": 1, "fiat_display_currency": "USD", "dry_run": true, - "timeframe": "3m", + "timeframe": "5m", "dataformat_ohlcv": "json", "dry_run_wallet": 12000, "cancel_open_orders_on_exit": true, @@ -35,7 +35,6 @@ }, "entry_pricing": { "price_side": "same", - "purge_old_models": true, "use_order_book": true, "order_book_top": 1, "price_last_balance": 0.0, @@ -56,10 +55,8 @@ ], "freqai": { "enabled": true, - "startup_candles": 1000, - "model_save_type": "stable_baselines_ppo", + "model_save_type": "stable_baselines_dqn", "conv_width": 10, - "follow_mode": false, "purge_old_models": true, "train_period_days": 10, "backtest_period_days": 2, @@ -71,13 +68,9 @@ "ETH/USDT" ], "include_timeframes": [ - "3m", - "15m" + "5m", + "30m" ], - "include_shifted_candles": 0, - "weight_factor": 0.9, - "principal_component_analysis": false, - "use_SVM_to_remove_outliers": false, "indicator_max_period_candles": 10, "indicator_periods_candles": [5, 10] }, @@ -86,16 +79,22 @@ "random_state": 1, "shuffle": false }, - "model_training_parameters": { - "ent_coef": 0.005, - "learning_rate": 0.000025, - "batch_size": 256, - "eval_cycles" : 5, - "train_cycles" : 15 + "model_training_parameters": { + "learning_rate": 0.00025, + "gamma": 0.9, + "target_update_interval": 5000, + "buffer_size": 50000, + "exploration_initial_eps":1, + "exploration_final_eps": 0.1, + "verbose": 1 }, - "model_reward_parameters": { - "rr": 1, - "profit_aim": 0.01 + "rl_config": { + "train_cycles": 15, + "eval_cycles": 5, + "model_reward_parameters": { + "rr": 1, + "profit_aim": 0.02 + } } }, "bot_name": "RL_test", diff --git a/freqtrade/freqai/RL/Base3ActionRLEnv.py b/freqtrade/freqai/RL/Base3ActionRLEnv.py index bf7b2fc7b..9d17b982d 100644 --- a/freqtrade/freqai/RL/Base3ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base3ActionRLEnv.py @@ -6,6 +6,7 @@ import gym import numpy as np from gym import spaces from gym.utils import seeding +from pandas import DataFrame logger = logging.getLogger(__name__) @@ -35,7 +36,8 @@ class Base3ActionRLEnv(gym.Env): metadata = {'render.modes': ['human']} - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, + def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), + reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1): assert df.ndim == 2 diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 5f817f14e..d7ceb5ff3 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -6,6 +6,7 @@ import gym import numpy as np from gym import spaces from gym.utils import seeding +from pandas import DataFrame logger = logging.getLogger(__name__) @@ -39,7 +40,8 @@ class Base5ActionRLEnv(gym.Env): """ metadata = {'render.modes': ['human']} - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, + def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), + reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1): assert df.ndim == 2 @@ -56,7 +58,7 @@ class Base5ActionRLEnv(gym.Env): self.fee = 0.0015 # # spaces - self.shape = (window_size, self.signal_features.shape[1]) + self.shape = (window_size, self.signal_features.shape[1] + 2) self.action_space = spaces.Discrete(len(Actions)) self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) @@ -161,19 +163,26 @@ class Base5ActionRLEnv(gym.Env): self._done = True self._position_history.append(self._position) - observation = self._get_observation() + info = dict( tick=self._current_tick, total_reward=self.total_reward, total_profit=self._total_profit, position=self._position.value ) + + observation = self._get_observation() + self._update_history(info) return observation, step_reward, self._done, info def _get_observation(self): - return self.signal_features[(self._current_tick - self.window_size):self._current_tick] + features_and_state = self.signal_features[( + self._current_tick - self.window_size):self._current_tick] + features_and_state['current_profit_pct'] = self.get_unrealized_profit() + features_and_state['position'] = self._position.value + return features_and_state def get_unrealized_profit(self): diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 78feea6d1..395b2a1a6 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -13,7 +13,7 @@ from freqtrade.persistence import Trade import torch.multiprocessing import torch as th logger = logging.getLogger(__name__) -th.set_num_threads(8) + torch.multiprocessing.set_sharing_strategy('file_system') @@ -22,6 +22,11 @@ class BaseReinforcementLearningModel(IFreqaiModel): User created Reinforcement Learning Model prediction model. """ + def __init__(self, **kwargs): + super().__init__(config=kwargs['config']) + th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4)) + self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] + def train( self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen ) -> Any: @@ -62,12 +67,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): model = self.fit_rl(data_dictionary, pair, dk, prices_train, prices_test) - if pair not in self.dd.historic_predictions: - self.set_initial_historic_predictions( - data_dictionary['train_features'], model, dk, pair) - - self.dd.save_historic_predictions_to_disk() - logger.info(f"--------------------done training {pair}--------------------") return model @@ -127,7 +126,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): # optional additional data cleaning/analysis self.data_cleaning_predict(dk, filtered_dataframe) - pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model) + pred_df = self.rl_model_predict( + dk.data_dictionary["prediction_features"], dk, self.model) pred_df.fillna(0, inplace=True) return (pred_df, dk.do_predict) @@ -135,10 +135,13 @@ class BaseReinforcementLearningModel(IFreqaiModel): def rl_model_predict(self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any) -> DataFrame: - output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list) + output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list) def _predict(window): + market_side, current_profit, total_profit = self.get_state_info(dk.pair) observations = dataframe.iloc[window.index] + observations['current_profit'] = current_profit + observations['position'] = market_side res, _ = model.predict(observations, deterministic=True) return res @@ -174,29 +177,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): return prices_train, prices_test - def set_initial_historic_predictions( - self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str - ) -> None: - - pred_df = self.rl_model_predict(df, dk, model) - pred_df.fillna(0, inplace=True) - self.dd.historic_predictions[pair] = pred_df - hist_preds_df = self.dd.historic_predictions[pair] - - for label in hist_preds_df.columns: - if hist_preds_df[label].dtype == object: - continue - hist_preds_df[f'{label}_mean'] = 0 - hist_preds_df[f'{label}_std'] = 0 - - hist_preds_df['do_predict'] = 0 - - if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: - hist_preds_df['DI_values'] = 0 - - for return_str in dk.data['extra_returns_per_train']: - hist_preds_df[return_str] = 0 - # TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it. # But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor # all the other existing fit() functions to include dk argument. For now we instantiate and diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py index b437ea8aa..5dc7735d3 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py @@ -24,18 +24,16 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params.get("eval_cycles", 4) * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) # environments train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=reward_params) + reward_kwargs=self.reward_params) eval = MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params) eval_env = Monitor(eval, ".") path = dk.data_path @@ -49,7 +47,7 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, - gamma=0.9, verbose=1 + **self.freqai_info['model_training_parameters'] ) model.learn( diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py index b1c5f316f..337e94607 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py @@ -51,23 +51,20 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params.get("eval_cycles", 4) * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) - learning_rate = agent_params["learning_rate"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) env_id = "train_env" - th.set_num_threads(dk.thread_count) num_cpu = int(dk.thread_count / 2) - train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params, - self.CONV_WIDTH) for i in range(num_cpu)]) + train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.reward_params, self.CONV_WIDTH) for i in range(num_cpu)]) eval_env_id = 'eval_env' - eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params, - self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) + eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.reward_params, self.CONV_WIDTH, monitor=True) for i in + range(num_cpu)]) path = dk.data_path eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", @@ -80,9 +77,7 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/ppo/tensorboard/", - learning_rate=learning_rate, - gamma=0.9, - verbose=1 + **self.freqai_info['model_training_parameters'] ) model.learn( diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py index a60bc1fa1..3a57142cf 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py @@ -3,8 +3,7 @@ from typing import Any, Dict # Optional import torch as th from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor -# from stable_baselines3.common.vec_env import SubprocVecEnv -from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.TDQNagent import TDQN from stable_baselines3 import DQN @@ -25,18 +24,16 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params["eval_cycles"] * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) # environments train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=reward_params) + reward_kwargs=self.reward_params) eval = MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params) eval_env = Monitor(eval, ".") eval_env.reset() @@ -50,12 +47,10 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): net_arch=[256, 256, 128]) model = TDQN('TMultiInputPolicy', train_env, - policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/tdqn/tensorboard/", - learning_rate=0.00025, gamma=0.9, - target_update_interval=5000, buffer_size=50000, - exploration_initial_eps=1, exploration_final_eps=0.1, - replay_buffer_class=ReplayBuffer + policy_kwargs=policy_kwargs, + replay_buffer_class=ReplayBuffer, + **self.freqai_info['model_training_parameters'] ) model.learn( @@ -70,9 +65,11 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): return best_model -class MyRLEnv(Base3ActionRLEnv): +# User can inherit and customize 5 action environment +class MyRLEnv(Base5ActionRLEnv): """ - User can override any function in BaseRLEnv and gym.Env + User can override any function in BaseRLEnv and gym.Env. Here the user + Adds 5 actions. """ def calculate_reward(self, action): @@ -81,55 +78,27 @@ class MyRLEnv(Base3ActionRLEnv): return 0. # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: + if action == Actions.Long_sell.value and self._position == Positions.Long: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float(np.log(current_price) - np.log(last_trade_price)) + if action == Actions.Long_sell.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: + if action == Actions.Short_buy.value and self._position == Positions.Short: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) return float(np.log(last_trade_price) - np.log(current_price)) + if action == Actions.Short_buy.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + return 0. - -# User can inherit and customize 5 action environment -# class MyRLEnv(Base5ActionRLEnv): -# """ -# User can override any function in BaseRLEnv and gym.Env. Here the user -# Adds 5 actions. -# """ - -# def calculate_reward(self, action): - -# if self._last_trade_tick is None: -# return 0. - -# # close long -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(current_price) - np.log(last_trade_price)) - -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(current_price) - np.log(last_trade_price)) * 2) - -# # close short -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(last_trade_price) - np.log(current_price)) - -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(last_trade_price) - np.log(current_price)) * 2) - -# return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py index 51e3c07c4..bf9e03b7f 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py @@ -10,7 +10,7 @@ from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.utils import set_random_seed from stable_baselines3 import DQN -from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.TDQNagent import TDQN from stable_baselines3.common.buffers import ReplayBuffer @@ -50,22 +50,20 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen, prices_train: DataFrame, prices_test: DataFrame): - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - eval_freq = agent_params["eval_cycles"] * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) - learning_rate = agent_params["learning_rate"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) env_id = "train_env" num_cpu = int(dk.thread_count / 2) - train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params, - self.CONV_WIDTH) for i in range(num_cpu)]) + train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.reward_params, self.CONV_WIDTH) for i in range(num_cpu)]) eval_env_id = 'eval_env' - eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params, - self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) + eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.reward_params, self.CONV_WIDTH, monitor=True) for i in + range(num_cpu)]) path = dk.data_path stop_train_callback = StopTrainingOnNoModelImprovement( @@ -91,10 +89,8 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): model = TDQN('TMultiInputPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/tdqn/tensorboard/", - learning_rate=learning_rate, gamma=0.9, - target_update_interval=5000, buffer_size=50000, - exploration_initial_eps=1, exploration_final_eps=0.1, - replay_buffer_class=ReplayBuffer + replay_buffer_class=ReplayBuffer, + **self.freqai_info['model_training_parameters'] ) model.learn( @@ -109,9 +105,11 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): return best_model -class MyRLEnv(Base3ActionRLEnv): +# User can inherit and customize 5 action environment +class MyRLEnv(Base5ActionRLEnv): """ - User can override any function in BaseRLEnv and gym.Env + User can override any function in BaseRLEnv and gym.Env. Here the user + Adds 5 actions. """ def calculate_reward(self, action): @@ -120,55 +118,27 @@ class MyRLEnv(Base3ActionRLEnv): return 0. # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: + if action == Actions.Long_sell.value and self._position == Positions.Long: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float(np.log(current_price) - np.log(last_trade_price)) + if action == Actions.Long_sell.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: + if action == Actions.Short_buy.value and self._position == Positions.Short: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) return float(np.log(last_trade_price) - np.log(current_price)) + if action == Actions.Short_buy.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + return 0. - -# User can inherit and customize 5 action environment -# class MyRLEnv(Base5ActionRLEnv): -# """ -# User can override any function in BaseRLEnv and gym.Env. Here the user -# Adds 5 actions. -# """ - -# def calculate_reward(self, action): - -# if self._last_trade_tick is None: -# return 0. - -# # close long -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(current_price) - np.log(last_trade_price)) - -# if action == Actions.Long_sell.value and self._position == Positions.Long: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(current_price) - np.log(last_trade_price)) * 2) - -# # close short -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float(np.log(last_trade_price) - np.log(current_price)) - -# if action == Actions.Short_buy.value and self._position == Positions.Short: -# if self.close_trade_profit[-1] > self.profit_aim * self.rr: -# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) -# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) -# return float((np.log(last_trade_price) - np.log(current_price)) * 2) - -# return 0.