From 3eb897c2f8c89e07f81fbd8675b97a3f7bddab91 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 20 Aug 2022 16:35:29 +0200 Subject: [PATCH] reuse callback, allow user to acces all stable_baselines3 agents via config --- config_examples/config_freqai-rl.example.json | 9 +- freqtrade/freqai/RL/Base5ActionRLEnv.py | 69 +++----- .../RL/BaseReinforcementLearningModel.py | 66 +++++--- freqtrade/freqai/data_drawer.py | 11 +- .../prediction_models/ReinforcementLearner.py | 82 ++++++++++ .../ReinforcementLearnerCustomAgent.py} | 62 ++++++-- .../ReinforcementLearner_multiproc.py | 84 ++++++++++ .../ReinforcementLearningPPO.py | 104 ------------ .../ReinforcementLearningPPO_multiproc.py | 132 ---------------- .../ReinforcementLearningTDQN.py | 115 -------------- .../ReinforcementLearningTDQN_multiproc.py | 148 ------------------ 11 files changed, 295 insertions(+), 587 deletions(-) create mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearner.py rename freqtrade/freqai/{RL/TDQNagent.py => prediction_models/ReinforcementLearnerCustomAgent.py} (81%) create mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index 1af872552..fa08cdd60 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -55,7 +55,7 @@ ], "freqai": { "enabled": true, - "model_save_type": "stable_baselines_dqn", + "model_save_type": "stable_baselines", "conv_width": 10, "purge_old_models": true, "train_period_days": 10, @@ -85,8 +85,11 @@ "verbose": 1 }, "rl_config": { - "train_cycles": 15, - "eval_cycles": 5, + "train_cycles": 10, + "eval_cycles": 3, + "thread_count": 4, + "model_type": "PPO", + "policy_type": "MlpPolicy", "model_reward_parameters": { "rr": 1, "profit_aim": 0.02 diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 4c946a5b2..7d3cbffbe 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -266,59 +266,28 @@ class Base5ActionRLEnv(gym.Env): # close long if action == Actions.Long_exit.value and self._position == Positions.Long: - if len(self.close_trade_profit): - # aim x2 rw - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee( - self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee( - self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - # less than aim x1 rw - elif self.close_trade_profit[-1] < self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee( - self.prices.iloc[self._last_trade_tick].open - ) - current_price = self.add_sell_fee( - self.prices.iloc[self._current_tick].open - ) - return float(np.log(current_price) - np.log(last_trade_price)) - # # less than RR SL x2 neg rw - # elif self.close_trade_profit[-1] < (self.profit_aim * -1): - # last_trade_price = self.add_buy_fee( - # self.prices.iloc[self._last_trade_tick].open) - # current_price = self.add_sell_fee( - # self.prices.iloc[self._current_tick].open) - # return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1 + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + + if action == Actions.Long_exit.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) # close short if action == Actions.Short_exit.value and self._position == Positions.Short: - if len(self.close_trade_profit): - # aim x2 rw - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee( - self.prices.iloc[self._last_trade_tick].open - ) - current_price = self.add_buy_fee( - self.prices.iloc[self._current_tick].open - ) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - # less than aim x1 rw - elif self.close_trade_profit[-1] < self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee( - self.prices.iloc[self._last_trade_tick].open - ) - current_price = self.add_buy_fee( - self.prices.iloc[self._current_tick].open - ) - return float(np.log(last_trade_price) - np.log(current_price)) - # # less than RR SL x2 neg rw - # elif self.close_trade_profit[-1] > self.profit_aim * self.rr: - # last_trade_price = self.add_sell_fee( - # self.prices.iloc[self._last_trade_tick].open) - # current_price = self.add_buy_fee( - # self.prices.iloc[self._current_tick].open) - # return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1 + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + + if action == Actions.Short_exit.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + return 0. def _update_profit(self, action): diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 9c7b1e4b4..9cada2bf0 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -11,8 +11,12 @@ from freqtrade.freqai.freqai_interface import IFreqaiModel from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.persistence import Trade import torch.multiprocessing +from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor import torch as th +from typing import Callable +from stable_baselines3.common.utils import set_random_seed +import gym logger = logging.getLogger(__name__) torch.multiprocessing.set_sharing_strategy('file_system') @@ -25,9 +29,15 @@ class BaseReinforcementLearningModel(IFreqaiModel): def __init__(self, **kwargs): super().__init__(config=kwargs['config']) - th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4)) + th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4)) self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] self.train_env: Base5ActionRLEnv = None + self.eval_env: Base5ActionRLEnv = None + self.eval_callback: EvalCallback = None + mod = __import__('stable_baselines3', fromlist=[ + self.freqai_info['rl_config']['model_type']]) + self.MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) + self.policy_type = self.freqai_info['rl_config']['policy_type'] def train( self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen @@ -67,7 +77,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test) + self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk) model = self.fit_rl(data_dictionary, dk) @@ -75,13 +85,13 @@ class BaseReinforcementLearningModel(IFreqaiModel): return model - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): + def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk): """ - User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise - leaving this will default to Base5ActEnv + User overrides this as shown here if they are using a custom MyRLEnv """ train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) # environments if not self.train_env: @@ -90,11 +100,17 @@ class BaseReinforcementLearningModel(IFreqaiModel): self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params), ".") + self.eval_callback = EvalCallback(self.eval_env, deterministic=True, + render=False, eval_freq=eval_freq, + best_model_save_path=dk.data_path) else: - self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) - self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) self.train_env.reset() self.eval_env.reset() + self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) + self.eval_env.reset_env(test_df, prices_test, self.CONV_WIDTH, self.reward_params) + self.eval_callback.__init__(self.eval_env, deterministic=True, + render=False, eval_freq=eval_freq, + best_model_save_path=dk.data_path) @abstractmethod def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): @@ -206,16 +222,28 @@ class BaseReinforcementLearningModel(IFreqaiModel): # all the other existing fit() functions to include dk argument. For now we instantiate and # leave it. def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: - """ - Most regressors use the same function names and arguments e.g. user - can drop in LGBMRegressor in place of CatBoostRegressor and all data - management will be properly handled by Freqai. - :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold - all the training and test data/labels. - """ - return +def make_env(env_id: str, rank: int, seed: int, train_df, price, + reward_params, window_size, monitor=False) -> Callable: + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environment you wish to have in subprocesses + :param seed: (int) the inital seed for RNG + :param rank: (int) index of the subprocess + :return: (Callable) + """ + def _init() -> gym.Env: + + env = MyRLEnv(df=train_df, prices=price, window_size=window_size, + reward_kwargs=reward_params, id=env_id, seed=seed + rank) + if monitor: + env = Monitor(env, ".") + return env + set_random_seed(seed) + return _init class MyRLEnv(Base5ActionRLEnv): """ @@ -229,24 +257,24 @@ class MyRLEnv(Base5ActionRLEnv): return 0. # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: + if action == Actions.Long_exit.value and self._position == Positions.Long: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float(np.log(current_price) - np.log(last_trade_price)) - if action == Actions.Long_sell.value and self._position == Positions.Long: + if action == Actions.Long_exit.value and self._position == Positions.Long: if self.close_trade_profit[-1] > self.profit_aim * self.rr: last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) return float((np.log(current_price) - np.log(last_trade_price)) * 2) # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: + if action == Actions.Short_exit.value and self._position == Positions.Short: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) return float(np.log(last_trade_price) - np.log(current_price)) - if action == Actions.Short_buy.value and self._position == Positions.Short: + if action == Actions.Short_exit.value and self._position == Positions.Short: if self.close_trade_profit[-1] > self.profit_aim * self.rr: last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 9603fb9ab..c37973551 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -471,12 +471,11 @@ class FreqaiDataDrawer: elif model_type == 'keras': from tensorflow import keras model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") - elif model_type == 'stable_baselines_ppo': - from stable_baselines3.ppo.ppo import PPO - model = PPO.load(dk.data_path / f"{dk.model_filename}_model") - elif model_type == 'stable_baselines_dqn': - from stable_baselines3 import DQN - model = DQN.load(dk.data_path / f"{dk.model_filename}_model") + elif model_type == 'stable_baselines': + mod = __import__('stable_baselines3', fromlist=[ + self.freqai_info['rl_config']['model_type']]) + MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) + model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py new file mode 100644 index 000000000..2faa6eb3a --- /dev/null +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -0,0 +1,82 @@ +import logging +from typing import Any, Dict # , Tuple + +# import numpy.typing as npt +import torch as th +import numpy as np +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class ReinforcementLearner(BaseReinforcementLearningModel): + """ + User created Reinforcement Learning Model prediction model. + """ + + def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): + + train_df = data_dictionary["train_features"] + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) + + policy_kwargs = dict(activation_fn=th.nn.ReLU, + net_arch=[256, 256, 128]) + + model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, + tensorboard_log=Path(dk.data_path / "tensorboard"), + **self.freqai_info['model_training_parameters'] + ) + + model.learn( + total_timesteps=int(total_timesteps), + callback=self.eval_callback + ) + + if Path(dk.data_path / "best_model.zip").is_file(): + logger.info('Callback found a best model.') + best_model = self.MODELCLASS.load(dk.data_path / "best_model") + return best_model + + logger.info('Couldnt find best model, using final model instead.') + + return model + + +class MyRLEnv(Base5ActionRLEnv): + """ + User can modify any part of the environment by overriding base + functions + """ + def calculate_reward(self, action): + + if self._last_trade_tick is None: + return 0. + + # close long + if action == Actions.Long_exit.value and self._position == Positions.Long: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + + if action == Actions.Long_exit.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + + # close short + if action == Actions.Short_exit.value and self._position == Positions.Short: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + + if action == Actions.Short_exit.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + + return 0. diff --git a/freqtrade/freqai/RL/TDQNagent.py b/freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py similarity index 81% rename from freqtrade/freqai/RL/TDQNagent.py rename to freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py index 584f6a8ef..bb16b612b 100644 --- a/freqtrade/freqai/RL/TDQNagent.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py @@ -1,17 +1,59 @@ -from typing import Any, Dict, List, Optional, Tuple, Type, Union - -import gym -import torch +import logging import torch as th +from typing import Any, Dict, List, Optional, Tuple, Type, Union +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from stable_baselines3 import DQN from stable_baselines3.common.buffers import ReplayBuffer -from stable_baselines3.common.policies import BasePolicy -from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, - FlattenExtractor) -from stable_baselines3.common.type_aliases import GymEnv, Schedule +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from pathlib import Path from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, QNetwork) from torch import nn +import gym +from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, + FlattenExtractor) +from stable_baselines3.common.type_aliases import GymEnv, Schedule +from stable_baselines3.common.policies import BasePolicy + +logger = logging.getLogger(__name__) + + +class ReinforcementLearnerCustomAgent(BaseReinforcementLearningModel): + """ + User can customize agent by defining the class and using it directly. + Here the example is "TDQN" + """ + + def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): + + train_df = data_dictionary["train_features"] + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) + + policy_kwargs = dict(activation_fn=th.nn.ReLU, + net_arch=[256, 256, 128]) + + # TDQN is a custom agent defined below + model = TDQN(self.policy_type, self.train_env, + tensorboard_log=Path(dk.data_path / "tensorboard"), + policy_kwargs=policy_kwargs, + **self.freqai_info['model_training_parameters'] + ) + + model.learn( + total_timesteps=int(total_timesteps), + callback=self.eval_callback + ) + + if Path(dk.data_path / "best_model.zip").is_file(): + logger.info('Callback found a best model.') + best_model = self.MODELCLASS.load(dk.data_path / "best_model") + return best_model + + logger.info('Couldnt find best model, using final model instead.') + + return model + +# User creates their custom agent and networks as shown below def create_mlp_( @@ -72,7 +114,7 @@ class TDQNetwork(QNetwork): def init_weights(self, m): if type(m) == nn.Linear: - torch.nn.init.kaiming_uniform_(m.weight) + th.nn.init.kaiming_uniform_(m.weight) class TDQNPolicy(DQNPolicy): @@ -175,7 +217,7 @@ class TDQN(DQN): exploration_initial_eps: float = 1.0, exploration_final_eps: float = 0.05, max_grad_norm: float = 10, - tensorboard_log: Optional[str] = None, + tensorboard_log: Optional[Path] = None, create_eval_env: bool = False, policy_kwargs: Optional[Dict[str, Any]] = None, verbose: int = 1, diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py new file mode 100644 index 000000000..1854bb1a5 --- /dev/null +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py @@ -0,0 +1,84 @@ +import logging +from typing import Any, Dict # , Tuple + +# import numpy.typing as npt +import torch as th +from stable_baselines3.common.callbacks import EvalCallback +from stable_baselines3.common.vec_env import SubprocVecEnv +from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel, + make_env) +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + +from pathlib import Path + +logger = logging.getLogger(__name__) + + +class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): + """ + User created Reinforcement Learning Model prediction model. + """ + + def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): + + train_df = data_dictionary["train_features"] + total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) + + # model arch + policy_kwargs = dict(activation_fn=th.nn.ReLU, + net_arch=[512, 512, 512]) + + model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, + tensorboard_log=Path(dk.data_path / "tensorboard"), + **self.freqai_info['model_training_parameters'] + ) + + model.learn( + total_timesteps=int(total_timesteps), + callback=self.eval_callback + ) + + if Path(dk.data_path / "best_model.zip").is_file(): + logger.info('Callback found a best model.') + best_model = self.MODELCLASS.load(dk.data_path / "best_model") + return best_model + + logger.info('Couldnt find best model, using final model instead.') + + return model + + def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk): + """ + If user has particular environment configuration needs, they can do that by + overriding this function. In the present case, the user wants to setup training + environments for multiple workers. + """ + train_df = data_dictionary["train_features"] + test_df = data_dictionary["test_features"] + eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) + + # environments + if not self.train_env: + env_id = "train_env" + num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) + self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.reward_params, self.CONV_WIDTH) for i + in range(num_cpu)]) + + eval_env_id = 'eval_env' + self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.reward_params, self.CONV_WIDTH, monitor=True) for i + in range(num_cpu)]) + self.eval_callback = EvalCallback(self.eval_env, deterministic=True, + render=False, eval_freq=eval_freq, + best_model_save_path=dk.data_path) + else: + self.train_env.env_method('reset') + self.eval_env.env_method('reset') + self.train_env.env_method('reset_env', train_df, prices_train, + self.CONV_WIDTH, self.reward_params) + self.eval_env.env_method('reset_env', train_df, prices_train, + self.CONV_WIDTH, self.reward_params) + self.eval_callback.__init__(self.eval_env, deterministic=True, + render=False, eval_freq=eval_freq, + best_model_save_path=dk.data_path) diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py deleted file mode 100644 index 993ac263b..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py +++ /dev/null @@ -1,104 +0,0 @@ -import gc -import logging -from typing import Any, Dict # , Tuple - -import numpy as np -# import numpy.typing as npt -import torch as th -from stable_baselines3 import PPO -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor - -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.RL.Base3ActionRLEnv import Actions, Base3ActionRLEnv, Positions -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel - - -logger = logging.getLogger(__name__) - - -class ReinforcementLearningPPO(BaseReinforcementLearningModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): - - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) - total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) - - path = dk.data_path - eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", - log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), - deterministic=True, render=False) - - # model arch - policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[256, 256, 128]) - - model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs, - tensorboard_log=f"{path}/ppo/tensorboard/", - **self.freqai_info['model_training_parameters'] - ) - - model.learn( - total_timesteps=int(total_timesteps), - callback=eval_callback - ) - - del model - best_model = PPO.load(dk.data_path / "best_model") - - print('Training finished!') - gc.collect() - - return best_model - - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): - """ - User overrides this as shown here if they are using a custom MyRLEnv - """ - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - - # environments - if not self.train_env: - self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params) - self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params), ".") - else: - self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) - self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) - self.train_env.reset() - self.eval_env.reset() - - -class MyRLEnv(Base3ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env - """ - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py deleted file mode 100644 index 5fa24a599..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py +++ /dev/null @@ -1,132 +0,0 @@ -import logging -from typing import Any, Dict # , Tuple - -import numpy as np -# import numpy.typing as npt -import torch as th -from stable_baselines3.common.monitor import Monitor -from typing import Callable -from stable_baselines3 import PPO -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.vec_env import SubprocVecEnv -from stable_baselines3.common.utils import set_random_seed -from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -import gym - -logger = logging.getLogger(__name__) - - -def make_env(env_id: str, rank: int, seed: int, train_df, price, - reward_params, window_size, monitor=False) -> Callable: - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environment you wish to have in subprocesses - :param seed: (int) the inital seed for RNG - :param rank: (int) index of the subprocess - :return: (Callable) - """ - def _init() -> gym.Env: - - env = MyRLEnv(df=train_df, prices=price, window_size=window_size, - reward_kwargs=reward_params, id=env_id, seed=seed + rank) - if monitor: - env = Monitor(env, ".") - return env - set_random_seed(seed) - return _init - - -class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): - - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) - total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) - - path = dk.data_path - eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", - log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), - deterministic=True, render=False) - - # model arch - policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[512, 512, 512]) - - model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs, - tensorboard_log=f"{path}/ppo/tensorboard/", - **self.freqai_info['model_training_parameters'] - ) - - model.learn( - total_timesteps=int(total_timesteps), - callback=eval_callback - ) - - best_model = PPO.load(dk.data_path / "best_model") - print('Training finished!') - - return best_model - - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): - """ - User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise - leaving this will default to Base5ActEnv - """ - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - - # environments - if not self.train_env: - env_id = "train_env" - num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) - self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, - self.reward_params, self.CONV_WIDTH) for i - in range(num_cpu)]) - - eval_env_id = 'eval_env' - self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, - self.reward_params, self.CONV_WIDTH, monitor=True) for i - in range(num_cpu)]) - else: - self.train_env.env_method('reset_env', train_df, prices_train, - self.CONV_WIDTH, self.reward_params) - self.eval_env.env_method('reset_env', train_df, prices_train, - self.CONV_WIDTH, self.reward_params) - self.train_env.env_method('reset') - self.eval_env.env_method('reset') - - -class MyRLEnv(Base3ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env - """ - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py deleted file mode 100644 index 3c4ac6bdb..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py +++ /dev/null @@ -1,115 +0,0 @@ -import logging -from typing import Any, Dict # Optional -import torch as th -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor -from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel -from freqtrade.freqai.RL.TDQNagent import TDQN -from stable_baselines3 import DQN -from stable_baselines3.common.buffers import ReplayBuffer -import numpy as np -import gc -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen - -logger = logging.getLogger(__name__) - - -class ReinforcementLearningTDQN(BaseReinforcementLearningModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): - - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) - total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) - - path = dk.data_path - eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", - log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), - deterministic=True, render=False) - - # model arch - policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[256, 256, 128]) - - model = TDQN('TMultiInputPolicy', self.train_env, - tensorboard_log=f"{path}/tdqn/tensorboard/", - policy_kwargs=policy_kwargs, - replay_buffer_class=ReplayBuffer, - **self.freqai_info['model_training_parameters'] - ) - - model.learn( - total_timesteps=int(total_timesteps), - callback=eval_callback - ) - - del model - best_model = DQN.load(dk.data_path / "best_model") - - print('Training finished!') - gc.collect() - return best_model - - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): - """ - User overrides this as shown here if they are using a custom MyRLEnv - """ - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - - # environments - if not self.train_env: - self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params) - self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params), ".") - else: - self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) - self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) - self.train_env.reset() - self.eval_env.reset() - - -# User can inherit and customize 5 action environment -class MyRLEnv(Base5ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env. Here the user - Adds 5 actions. - """ - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if action == Actions.Long_sell.value and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - - # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if action == Actions.Short_buy.value and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - - return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py deleted file mode 100644 index 8634fd958..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN_multiproc.py +++ /dev/null @@ -1,148 +0,0 @@ -import logging -from typing import Any, Dict # Optional -import torch as th -import numpy as np -import gym -from typing import Callable -from stable_baselines3.common.callbacks import EvalCallback -# EvalCallback , StopTrainingOnNoModelImprovement, StopTrainingOnRewardThreshold -from stable_baselines3.common.monitor import Monitor -from stable_baselines3.common.vec_env import SubprocVecEnv -from stable_baselines3.common.utils import set_random_seed -from stable_baselines3 import DQN -from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions -from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel -from freqtrade.freqai.RL.TDQNagent import TDQN -from stable_baselines3.common.buffers import ReplayBuffer -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen - -logger = logging.getLogger(__name__) - - -def make_env(env_id: str, rank: int, seed: int, train_df, price, - reward_params, window_size, monitor=False) -> Callable: - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environment you wish to have in subprocesses - :param seed: (int) the inital seed for RNG - :param rank: (int) index of the subprocess - :return: (Callable) - """ - def _init() -> gym.Env: - - env = MyRLEnv(df=train_df, prices=price, window_size=window_size, - reward_kwargs=reward_params, id=env_id, seed=seed + rank) - if monitor: - env = Monitor(env, ".") - return env - set_random_seed(seed) - return _init - - -class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): - - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) - total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) - - path = dk.data_path - - eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", - log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), - deterministic=True, render=False) - # model arch - policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[512, 512, 512]) - - model = TDQN('TMultiInputPolicy', self.train_env, - policy_kwargs=policy_kwargs, - tensorboard_log=f"{path}/tdqn/tensorboard/", - replay_buffer_class=ReplayBuffer, - **self.freqai_info['model_training_parameters'] - ) - - model.learn( - total_timesteps=int(total_timesteps), - callback=eval_callback - ) - - best_model = DQN.load(dk.data_path / "best_model.zip") - print('Training finished!') - - return best_model - - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): - """ - User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise - leaving this will default to Base5ActEnv - """ - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - - # environments - if not self.train_env: - env_id = "train_env" - num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) - self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, - self.reward_params, self.CONV_WIDTH) for i - in range(num_cpu)]) - - eval_env_id = 'eval_env' - self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, - self.reward_params, self.CONV_WIDTH, monitor=True) for i - in range(num_cpu)]) - else: - self.train_env.env_method('reset_env', train_df, prices_train, - self.CONV_WIDTH, self.reward_params) - self.eval_env.env_method('reset_env', train_df, prices_train, - self.CONV_WIDTH, self.reward_params) - self.train_env.env_method('reset') - self.eval_env.env_method('reset') - -# User can inherit and customize 5 action environment - - -class MyRLEnv(Base5ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env. Here the user - Adds 5 actions. - """ - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if action == Actions.Long_sell.value and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - - # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if action == Actions.Short_buy.value and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - - return 0.