reuse callback, allow user to acces all stable_baselines3 agents via config
This commit is contained in:
		| @@ -55,7 +55,7 @@ | |||||||
|     ], |     ], | ||||||
|     "freqai": { |     "freqai": { | ||||||
|         "enabled": true, |         "enabled": true, | ||||||
|         "model_save_type": "stable_baselines_dqn", |         "model_save_type": "stable_baselines", | ||||||
|         "conv_width": 10, |         "conv_width": 10, | ||||||
|         "purge_old_models": true, |         "purge_old_models": true, | ||||||
|         "train_period_days": 10, |         "train_period_days": 10, | ||||||
| @@ -85,8 +85,11 @@ | |||||||
|             "verbose": 1 |             "verbose": 1 | ||||||
|         }, |         }, | ||||||
|         "rl_config": { |         "rl_config": { | ||||||
|             "train_cycles": 15, |             "train_cycles": 10, | ||||||
|             "eval_cycles": 5, |             "eval_cycles": 3, | ||||||
|  |             "thread_count": 4, | ||||||
|  |             "model_type": "PPO", | ||||||
|  |             "policy_type": "MlpPolicy", | ||||||
|             "model_reward_parameters": { |             "model_reward_parameters": { | ||||||
|                 "rr": 1, |                 "rr": 1, | ||||||
|                 "profit_aim": 0.02 |                 "profit_aim": 0.02 | ||||||
|   | |||||||
| @@ -266,59 +266,28 @@ class Base5ActionRLEnv(gym.Env): | |||||||
|  |  | ||||||
|         # close long |         # close long | ||||||
|         if action == Actions.Long_exit.value and self._position == Positions.Long: |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|             if len(self.close_trade_profit): |             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 # aim x2 rw |             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|                 if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                     last_trade_price = self.add_buy_fee( |  | ||||||
|                         self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|                     current_price = self.add_sell_fee( |  | ||||||
|                         self.prices.iloc[self._current_tick].open) |  | ||||||
|                     return float((np.log(current_price) - np.log(last_trade_price)) * 2) |  | ||||||
|                 # less than aim x1 rw |  | ||||||
|                 elif self.close_trade_profit[-1] < self.profit_aim * self.rr: |  | ||||||
|                     last_trade_price = self.add_buy_fee( |  | ||||||
|                         self.prices.iloc[self._last_trade_tick].open |  | ||||||
|                     ) |  | ||||||
|                     current_price = self.add_sell_fee( |  | ||||||
|                         self.prices.iloc[self._current_tick].open |  | ||||||
|                     ) |  | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |             return float(np.log(current_price) - np.log(last_trade_price)) | ||||||
|                 # # less than RR SL x2 neg rw |  | ||||||
|                 # elif self.close_trade_profit[-1] < (self.profit_aim * -1): |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|                 #     last_trade_price = self.add_buy_fee( |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|                 #         self.prices.iloc[self._last_trade_tick].open) |                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 #     current_price = self.add_sell_fee( |                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|                 #         self.prices.iloc[self._current_tick].open) |                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) | ||||||
|                 #     return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1 |  | ||||||
|  |  | ||||||
|         # close short |         # close short | ||||||
|         if action == Actions.Short_exit.value and self._position == Positions.Short: |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|             if len(self.close_trade_profit): |             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 # aim x2 rw |             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|                 if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                     last_trade_price = self.add_sell_fee( |  | ||||||
|                         self.prices.iloc[self._last_trade_tick].open |  | ||||||
|                     ) |  | ||||||
|                     current_price = self.add_buy_fee( |  | ||||||
|                         self.prices.iloc[self._current_tick].open |  | ||||||
|                     ) |  | ||||||
|                     return float((np.log(last_trade_price) - np.log(current_price)) * 2) |  | ||||||
|                 # less than aim x1 rw |  | ||||||
|                 elif self.close_trade_profit[-1] < self.profit_aim * self.rr: |  | ||||||
|                     last_trade_price = self.add_sell_fee( |  | ||||||
|                         self.prices.iloc[self._last_trade_tick].open |  | ||||||
|                     ) |  | ||||||
|                     current_price = self.add_buy_fee( |  | ||||||
|                         self.prices.iloc[self._current_tick].open |  | ||||||
|                     ) |  | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |             return float(np.log(last_trade_price) - np.log(current_price)) | ||||||
|                 # # less than RR SL x2 neg rw |  | ||||||
|                 # elif self.close_trade_profit[-1] > self.profit_aim * self.rr: |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|                 #     last_trade_price = self.add_sell_fee( |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|                 #         self.prices.iloc[self._last_trade_tick].open) |                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 #     current_price = self.add_buy_fee( |                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|                 #         self.prices.iloc[self._current_tick].open) |                 return float((np.log(last_trade_price) - np.log(current_price)) * 2) | ||||||
|                 #     return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1 |  | ||||||
|         return 0. |         return 0. | ||||||
|  |  | ||||||
|     def _update_profit(self, action): |     def _update_profit(self, action): | ||||||
|   | |||||||
| @@ -11,8 +11,12 @@ from freqtrade.freqai.freqai_interface import IFreqaiModel | |||||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions | from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions | ||||||
| from freqtrade.persistence import Trade | from freqtrade.persistence import Trade | ||||||
| import torch.multiprocessing | import torch.multiprocessing | ||||||
|  | from stable_baselines3.common.callbacks import EvalCallback | ||||||
| from stable_baselines3.common.monitor import Monitor | from stable_baselines3.common.monitor import Monitor | ||||||
| import torch as th | import torch as th | ||||||
|  | from typing import Callable | ||||||
|  | from stable_baselines3.common.utils import set_random_seed | ||||||
|  | import gym | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
|  |  | ||||||
| torch.multiprocessing.set_sharing_strategy('file_system') | torch.multiprocessing.set_sharing_strategy('file_system') | ||||||
| @@ -25,9 +29,15 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|  |  | ||||||
|     def __init__(self, **kwargs): |     def __init__(self, **kwargs): | ||||||
|         super().__init__(config=kwargs['config']) |         super().__init__(config=kwargs['config']) | ||||||
|         th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4)) |         th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4)) | ||||||
|         self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] |         self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] | ||||||
|         self.train_env: Base5ActionRLEnv = None |         self.train_env: Base5ActionRLEnv = None | ||||||
|  |         self.eval_env: Base5ActionRLEnv = None | ||||||
|  |         self.eval_callback: EvalCallback = None | ||||||
|  |         mod = __import__('stable_baselines3', fromlist=[ | ||||||
|  |                          self.freqai_info['rl_config']['model_type']]) | ||||||
|  |         self.MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) | ||||||
|  |         self.policy_type = self.freqai_info['rl_config']['policy_type'] | ||||||
|  |  | ||||||
|     def train( |     def train( | ||||||
|         self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen |         self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen | ||||||
| @@ -67,7 +77,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|         ) |         ) | ||||||
|         logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') |         logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') | ||||||
|  |  | ||||||
|         self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test) |         self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk) | ||||||
|  |  | ||||||
|         model = self.fit_rl(data_dictionary, dk) |         model = self.fit_rl(data_dictionary, dk) | ||||||
|  |  | ||||||
| @@ -75,13 +85,13 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|  |  | ||||||
|         return model |         return model | ||||||
|  |  | ||||||
|     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): |     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk): | ||||||
|         """ |         """ | ||||||
|         User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise |         User overrides this as shown here if they are using a custom MyRLEnv | ||||||
|         leaving this will default to Base5ActEnv |  | ||||||
|         """ |         """ | ||||||
|         train_df = data_dictionary["train_features"] |         train_df = data_dictionary["train_features"] | ||||||
|         test_df = data_dictionary["test_features"] |         test_df = data_dictionary["test_features"] | ||||||
|  |         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) | ||||||
|  |  | ||||||
|         # environments |         # environments | ||||||
|         if not self.train_env: |         if not self.train_env: | ||||||
| @@ -90,11 +100,17 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|             self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, |             self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, | ||||||
|                                     window_size=self.CONV_WIDTH, |                                     window_size=self.CONV_WIDTH, | ||||||
|                                     reward_kwargs=self.reward_params), ".") |                                     reward_kwargs=self.reward_params), ".") | ||||||
|  |             self.eval_callback = EvalCallback(self.eval_env, deterministic=True, | ||||||
|  |                                               render=False, eval_freq=eval_freq, | ||||||
|  |                                               best_model_save_path=dk.data_path) | ||||||
|         else: |         else: | ||||||
|             self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.train_env.reset() |             self.train_env.reset() | ||||||
|             self.eval_env.reset() |             self.eval_env.reset() | ||||||
|  |             self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) | ||||||
|  |             self.eval_env.reset_env(test_df, prices_test, self.CONV_WIDTH, self.reward_params) | ||||||
|  |             self.eval_callback.__init__(self.eval_env, deterministic=True, | ||||||
|  |                                         render=False, eval_freq=eval_freq, | ||||||
|  |                                         best_model_save_path=dk.data_path) | ||||||
|  |  | ||||||
|     @abstractmethod |     @abstractmethod | ||||||
|     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): |     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): | ||||||
| @@ -206,16 +222,28 @@ class BaseReinforcementLearningModel(IFreqaiModel): | |||||||
|     # all the other existing fit() functions to include dk argument. For now we instantiate and |     # all the other existing fit() functions to include dk argument. For now we instantiate and | ||||||
|     # leave it. |     # leave it. | ||||||
|     def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: |     def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: | ||||||
|         """ |  | ||||||
|         Most regressors use the same function names and arguments e.g. user |  | ||||||
|         can drop in LGBMRegressor in place of CatBoostRegressor and all data |  | ||||||
|         management will be properly handled by Freqai. |  | ||||||
|         :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold |  | ||||||
|                                 all the training and test data/labels. |  | ||||||
|         """ |  | ||||||
|  |  | ||||||
|         return |         return | ||||||
|  |  | ||||||
|  | def make_env(env_id: str, rank: int, seed: int, train_df, price, | ||||||
|  |              reward_params, window_size, monitor=False) -> Callable: | ||||||
|  |     """ | ||||||
|  |     Utility function for multiprocessed env. | ||||||
|  |  | ||||||
|  |     :param env_id: (str) the environment ID | ||||||
|  |     :param num_env: (int) the number of environment you wish to have in subprocesses | ||||||
|  |     :param seed: (int) the inital seed for RNG | ||||||
|  |     :param rank: (int) index of the subprocess | ||||||
|  |     :return: (Callable) | ||||||
|  |     """ | ||||||
|  |     def _init() -> gym.Env: | ||||||
|  |  | ||||||
|  |         env = MyRLEnv(df=train_df, prices=price, window_size=window_size, | ||||||
|  |                       reward_kwargs=reward_params, id=env_id, seed=seed + rank) | ||||||
|  |         if monitor: | ||||||
|  |             env = Monitor(env, ".") | ||||||
|  |         return env | ||||||
|  |     set_random_seed(seed) | ||||||
|  |     return _init | ||||||
|  |  | ||||||
| class MyRLEnv(Base5ActionRLEnv): | class MyRLEnv(Base5ActionRLEnv): | ||||||
|     """ |     """ | ||||||
| @@ -229,24 +257,24 @@ class MyRLEnv(Base5ActionRLEnv): | |||||||
|             return 0. |             return 0. | ||||||
|  |  | ||||||
|         # close long |         # close long | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |             return float(np.log(current_price) - np.log(last_trade_price)) | ||||||
|  |  | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) |                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) | ||||||
|  |  | ||||||
|         # close short |         # close short | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |             return float(np.log(last_trade_price) - np.log(current_price)) | ||||||
|  |  | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|   | |||||||
| @@ -471,12 +471,11 @@ class FreqaiDataDrawer: | |||||||
|         elif model_type == 'keras': |         elif model_type == 'keras': | ||||||
|             from tensorflow import keras |             from tensorflow import keras | ||||||
|             model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") |             model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") | ||||||
|         elif model_type == 'stable_baselines_ppo': |         elif model_type == 'stable_baselines': | ||||||
|             from stable_baselines3.ppo.ppo import PPO |             mod = __import__('stable_baselines3', fromlist=[ | ||||||
|             model = PPO.load(dk.data_path / f"{dk.model_filename}_model") |                              self.freqai_info['rl_config']['model_type']]) | ||||||
|         elif model_type == 'stable_baselines_dqn': |             MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) | ||||||
|             from stable_baselines3 import DQN |             model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") | ||||||
|             model = DQN.load(dk.data_path / f"{dk.model_filename}_model") |  | ||||||
|  |  | ||||||
|         if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): |         if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): | ||||||
|             dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") |             dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") | ||||||
|   | |||||||
							
								
								
									
										82
									
								
								freqtrade/freqai/prediction_models/ReinforcementLearner.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								freqtrade/freqai/prediction_models/ReinforcementLearner.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | |||||||
|  | import logging | ||||||
|  | from typing import Any, Dict  # , Tuple | ||||||
|  |  | ||||||
|  | # import numpy.typing as npt | ||||||
|  | import torch as th | ||||||
|  | import numpy as np | ||||||
|  | from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||||
|  | from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions | ||||||
|  | from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||||
|  | from pathlib import Path | ||||||
|  |  | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ReinforcementLearner(BaseReinforcementLearningModel): | ||||||
|  |     """ | ||||||
|  |     User created Reinforcement Learning Model prediction model. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): | ||||||
|  |  | ||||||
|  |         train_df = data_dictionary["train_features"] | ||||||
|  |         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) | ||||||
|  |  | ||||||
|  |         policy_kwargs = dict(activation_fn=th.nn.ReLU, | ||||||
|  |                              net_arch=[256, 256, 128]) | ||||||
|  |  | ||||||
|  |         model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, | ||||||
|  |                                 tensorboard_log=Path(dk.data_path / "tensorboard"), | ||||||
|  |                                 **self.freqai_info['model_training_parameters'] | ||||||
|  |                                 ) | ||||||
|  |  | ||||||
|  |         model.learn( | ||||||
|  |             total_timesteps=int(total_timesteps), | ||||||
|  |             callback=self.eval_callback | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         if Path(dk.data_path / "best_model.zip").is_file(): | ||||||
|  |             logger.info('Callback found a best model.') | ||||||
|  |             best_model = self.MODELCLASS.load(dk.data_path / "best_model") | ||||||
|  |             return best_model | ||||||
|  |  | ||||||
|  |         logger.info('Couldnt find best model, using final model instead.') | ||||||
|  |  | ||||||
|  |         return model | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class MyRLEnv(Base5ActionRLEnv): | ||||||
|  |     """ | ||||||
|  |     User can modify any part of the environment by overriding base | ||||||
|  |     functions | ||||||
|  |     """ | ||||||
|  |     def calculate_reward(self, action): | ||||||
|  |  | ||||||
|  |         if self._last_trade_tick is None: | ||||||
|  |             return 0. | ||||||
|  |  | ||||||
|  |         # close long | ||||||
|  |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|  |             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|  |             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|  |             return float(np.log(current_price) - np.log(last_trade_price)) | ||||||
|  |  | ||||||
|  |         if action == Actions.Long_exit.value and self._position == Positions.Long: | ||||||
|  |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|  |                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|  |                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) | ||||||
|  |                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) | ||||||
|  |  | ||||||
|  |         # close short | ||||||
|  |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|  |             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|  |             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|  |             return float(np.log(last_trade_price) - np.log(current_price)) | ||||||
|  |  | ||||||
|  |         if action == Actions.Short_exit.value and self._position == Positions.Short: | ||||||
|  |             if self.close_trade_profit[-1] > self.profit_aim * self.rr: | ||||||
|  |                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) | ||||||
|  |                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) | ||||||
|  |                 return float((np.log(last_trade_price) - np.log(current_price)) * 2) | ||||||
|  |  | ||||||
|  |         return 0. | ||||||
| @@ -1,17 +1,59 @@ | |||||||
| from typing import Any, Dict, List, Optional, Tuple, Type, Union | import logging | ||||||
| 
 |  | ||||||
| import gym |  | ||||||
| import torch |  | ||||||
| import torch as th | import torch as th | ||||||
|  | from typing import Any, Dict, List, Optional, Tuple, Type, Union | ||||||
|  | from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel | ||||||
| from stable_baselines3 import DQN | from stable_baselines3 import DQN | ||||||
| from stable_baselines3.common.buffers import ReplayBuffer | from stable_baselines3.common.buffers import ReplayBuffer | ||||||
| from stable_baselines3.common.policies import BasePolicy | from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||||
| from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, | from pathlib import Path | ||||||
|                                                    FlattenExtractor) |  | ||||||
| from stable_baselines3.common.type_aliases import GymEnv, Schedule |  | ||||||
| from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, | from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, | ||||||
|                                             QNetwork) |                                             QNetwork) | ||||||
| from torch import nn | from torch import nn | ||||||
|  | import gym | ||||||
|  | from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, | ||||||
|  |                                                    FlattenExtractor) | ||||||
|  | from stable_baselines3.common.type_aliases import GymEnv, Schedule | ||||||
|  | from stable_baselines3.common.policies import BasePolicy | ||||||
|  | 
 | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ReinforcementLearnerCustomAgent(BaseReinforcementLearningModel): | ||||||
|  |     """ | ||||||
|  |     User can customize agent by defining the class and using it directly. | ||||||
|  |     Here the example is "TDQN" | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): | ||||||
|  | 
 | ||||||
|  |         train_df = data_dictionary["train_features"] | ||||||
|  |         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) | ||||||
|  | 
 | ||||||
|  |         policy_kwargs = dict(activation_fn=th.nn.ReLU, | ||||||
|  |                              net_arch=[256, 256, 128]) | ||||||
|  | 
 | ||||||
|  |         # TDQN is a custom agent defined below | ||||||
|  |         model = TDQN(self.policy_type, self.train_env, | ||||||
|  |                      tensorboard_log=Path(dk.data_path / "tensorboard"), | ||||||
|  |                      policy_kwargs=policy_kwargs, | ||||||
|  |                      **self.freqai_info['model_training_parameters'] | ||||||
|  |                      ) | ||||||
|  | 
 | ||||||
|  |         model.learn( | ||||||
|  |             total_timesteps=int(total_timesteps), | ||||||
|  |             callback=self.eval_callback | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         if Path(dk.data_path / "best_model.zip").is_file(): | ||||||
|  |             logger.info('Callback found a best model.') | ||||||
|  |             best_model = self.MODELCLASS.load(dk.data_path / "best_model") | ||||||
|  |             return best_model | ||||||
|  | 
 | ||||||
|  |         logger.info('Couldnt find best model, using final model instead.') | ||||||
|  | 
 | ||||||
|  |         return model | ||||||
|  | 
 | ||||||
|  | # User creates their custom agent and networks as shown below | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def create_mlp_( | def create_mlp_( | ||||||
| @@ -72,7 +114,7 @@ class TDQNetwork(QNetwork): | |||||||
| 
 | 
 | ||||||
|     def init_weights(self, m): |     def init_weights(self, m): | ||||||
|         if type(m) == nn.Linear: |         if type(m) == nn.Linear: | ||||||
|             torch.nn.init.kaiming_uniform_(m.weight) |             th.nn.init.kaiming_uniform_(m.weight) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TDQNPolicy(DQNPolicy): | class TDQNPolicy(DQNPolicy): | ||||||
| @@ -175,7 +217,7 @@ class TDQN(DQN): | |||||||
|         exploration_initial_eps: float = 1.0, |         exploration_initial_eps: float = 1.0, | ||||||
|         exploration_final_eps: float = 0.05, |         exploration_final_eps: float = 0.05, | ||||||
|         max_grad_norm: float = 10, |         max_grad_norm: float = 10, | ||||||
|         tensorboard_log: Optional[str] = None, |         tensorboard_log: Optional[Path] = None, | ||||||
|         create_eval_env: bool = False, |         create_eval_env: bool = False, | ||||||
|         policy_kwargs: Optional[Dict[str, Any]] = None, |         policy_kwargs: Optional[Dict[str, Any]] = None, | ||||||
|         verbose: int = 1, |         verbose: int = 1, | ||||||
| @@ -0,0 +1,84 @@ | |||||||
|  | import logging | ||||||
|  | from typing import Any, Dict  # , Tuple | ||||||
|  |  | ||||||
|  | # import numpy.typing as npt | ||||||
|  | import torch as th | ||||||
|  | from stable_baselines3.common.callbacks import EvalCallback | ||||||
|  | from stable_baselines3.common.vec_env import SubprocVecEnv | ||||||
|  | from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel, | ||||||
|  |                                                                 make_env) | ||||||
|  | from freqtrade.freqai.data_kitchen import FreqaiDataKitchen | ||||||
|  |  | ||||||
|  | from pathlib import Path | ||||||
|  |  | ||||||
|  | logger = logging.getLogger(__name__) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): | ||||||
|  |     """ | ||||||
|  |     User created Reinforcement Learning Model prediction model. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): | ||||||
|  |  | ||||||
|  |         train_df = data_dictionary["train_features"] | ||||||
|  |         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) | ||||||
|  |  | ||||||
|  |         # model arch | ||||||
|  |         policy_kwargs = dict(activation_fn=th.nn.ReLU, | ||||||
|  |                              net_arch=[512, 512, 512]) | ||||||
|  |  | ||||||
|  |         model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, | ||||||
|  |                                 tensorboard_log=Path(dk.data_path / "tensorboard"), | ||||||
|  |                                 **self.freqai_info['model_training_parameters'] | ||||||
|  |                                 ) | ||||||
|  |  | ||||||
|  |         model.learn( | ||||||
|  |             total_timesteps=int(total_timesteps), | ||||||
|  |             callback=self.eval_callback | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         if Path(dk.data_path / "best_model.zip").is_file(): | ||||||
|  |             logger.info('Callback found a best model.') | ||||||
|  |             best_model = self.MODELCLASS.load(dk.data_path / "best_model") | ||||||
|  |             return best_model | ||||||
|  |  | ||||||
|  |         logger.info('Couldnt find best model, using final model instead.') | ||||||
|  |  | ||||||
|  |         return model | ||||||
|  |  | ||||||
|  |     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk): | ||||||
|  |         """ | ||||||
|  |         If user has particular environment configuration needs, they can do that by | ||||||
|  |         overriding this function. In the present case, the user wants to setup training | ||||||
|  |         environments for multiple workers. | ||||||
|  |         """ | ||||||
|  |         train_df = data_dictionary["train_features"] | ||||||
|  |         test_df = data_dictionary["test_features"] | ||||||
|  |         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) | ||||||
|  |  | ||||||
|  |         # environments | ||||||
|  |         if not self.train_env: | ||||||
|  |             env_id = "train_env" | ||||||
|  |             num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) | ||||||
|  |             self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, | ||||||
|  |                                             self.reward_params, self.CONV_WIDTH) for i | ||||||
|  |                                             in range(num_cpu)]) | ||||||
|  |  | ||||||
|  |             eval_env_id = 'eval_env' | ||||||
|  |             self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, | ||||||
|  |                                            self.reward_params, self.CONV_WIDTH, monitor=True) for i | ||||||
|  |                                            in range(num_cpu)]) | ||||||
|  |             self.eval_callback = EvalCallback(self.eval_env, deterministic=True, | ||||||
|  |                                               render=False, eval_freq=eval_freq, | ||||||
|  |                                               best_model_save_path=dk.data_path) | ||||||
|  |         else: | ||||||
|  |             self.train_env.env_method('reset') | ||||||
|  |             self.eval_env.env_method('reset') | ||||||
|  |             self.train_env.env_method('reset_env', train_df, prices_train, | ||||||
|  |                                       self.CONV_WIDTH, self.reward_params) | ||||||
|  |             self.eval_env.env_method('reset_env', train_df, prices_train, | ||||||
|  |                                      self.CONV_WIDTH, self.reward_params) | ||||||
|  |             self.eval_callback.__init__(self.eval_env, deterministic=True, | ||||||
|  |                                         render=False, eval_freq=eval_freq, | ||||||
|  |                                         best_model_save_path=dk.data_path) | ||||||
| @@ -1,104 +0,0 @@ | |||||||
| import gc |  | ||||||
| import logging |  | ||||||
| from typing import Any, Dict  # , Tuple |  | ||||||
|  |  | ||||||
| import numpy as np |  | ||||||
| # import numpy.typing as npt |  | ||||||
| import torch as th |  | ||||||
| from stable_baselines3 import PPO |  | ||||||
| from stable_baselines3.common.callbacks import EvalCallback |  | ||||||
| from stable_baselines3.common.monitor import Monitor |  | ||||||
|  |  | ||||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen |  | ||||||
| from freqtrade.freqai.RL.Base3ActionRLEnv import Actions, Base3ActionRLEnv, Positions |  | ||||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel |  | ||||||
|  |  | ||||||
|  |  | ||||||
| logger = logging.getLogger(__name__) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ReinforcementLearningPPO(BaseReinforcementLearningModel): |  | ||||||
|     """ |  | ||||||
|     User created Reinforcement Learning Model prediction model. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): |  | ||||||
|  |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) |  | ||||||
|         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) |  | ||||||
|  |  | ||||||
|         path = dk.data_path |  | ||||||
|         eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", |  | ||||||
|                                      log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), |  | ||||||
|                                      deterministic=True, render=False) |  | ||||||
|  |  | ||||||
|         # model arch |  | ||||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, |  | ||||||
|                              net_arch=[256, 256, 128]) |  | ||||||
|  |  | ||||||
|         model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs, |  | ||||||
|                     tensorboard_log=f"{path}/ppo/tensorboard/", |  | ||||||
|                     **self.freqai_info['model_training_parameters'] |  | ||||||
|                     ) |  | ||||||
|  |  | ||||||
|         model.learn( |  | ||||||
|             total_timesteps=int(total_timesteps), |  | ||||||
|             callback=eval_callback |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         del model |  | ||||||
|         best_model = PPO.load(dk.data_path / "best_model") |  | ||||||
|  |  | ||||||
|         print('Training finished!') |  | ||||||
|         gc.collect() |  | ||||||
|  |  | ||||||
|         return best_model |  | ||||||
|  |  | ||||||
|     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): |  | ||||||
|         """ |  | ||||||
|         User overrides this as shown here if they are using a custom MyRLEnv |  | ||||||
|         """ |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|  |  | ||||||
|         # environments |  | ||||||
|         if not self.train_env: |  | ||||||
|             self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, |  | ||||||
|                                      reward_kwargs=self.reward_params) |  | ||||||
|             self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, |  | ||||||
|                                     window_size=self.CONV_WIDTH, |  | ||||||
|                                     reward_kwargs=self.reward_params), ".") |  | ||||||
|         else: |  | ||||||
|             self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.train_env.reset() |  | ||||||
|             self.eval_env.reset() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MyRLEnv(Base3ActionRLEnv): |  | ||||||
|     """ |  | ||||||
|     User can override any function in BaseRLEnv and gym.Env |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def calculate_reward(self, action): |  | ||||||
|  |  | ||||||
|         if self._last_trade_tick is None: |  | ||||||
|             return 0. |  | ||||||
|  |  | ||||||
|         # close long |  | ||||||
|         if (action == Actions.Short.value or |  | ||||||
|                 action == Actions.Neutral.value) and self._position == Positions.Long: |  | ||||||
|             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |  | ||||||
|  |  | ||||||
|         # close short |  | ||||||
|         if (action == Actions.Long.value or |  | ||||||
|                 action == Actions.Neutral.value) and self._position == Positions.Short: |  | ||||||
|             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |  | ||||||
|  |  | ||||||
|         return 0. |  | ||||||
| @@ -1,132 +0,0 @@ | |||||||
| import logging |  | ||||||
| from typing import Any, Dict  # , Tuple |  | ||||||
|  |  | ||||||
| import numpy as np |  | ||||||
| # import numpy.typing as npt |  | ||||||
| import torch as th |  | ||||||
| from stable_baselines3.common.monitor import Monitor |  | ||||||
| from typing import Callable |  | ||||||
| from stable_baselines3 import PPO |  | ||||||
| from stable_baselines3.common.callbacks import EvalCallback |  | ||||||
| from stable_baselines3.common.vec_env import SubprocVecEnv |  | ||||||
| from stable_baselines3.common.utils import set_random_seed |  | ||||||
| from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions |  | ||||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel |  | ||||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen |  | ||||||
| import gym |  | ||||||
|  |  | ||||||
| logger = logging.getLogger(__name__) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def make_env(env_id: str, rank: int, seed: int, train_df, price, |  | ||||||
|              reward_params, window_size, monitor=False) -> Callable: |  | ||||||
|     """ |  | ||||||
|     Utility function for multiprocessed env. |  | ||||||
|  |  | ||||||
|     :param env_id: (str) the environment ID |  | ||||||
|     :param num_env: (int) the number of environment you wish to have in subprocesses |  | ||||||
|     :param seed: (int) the inital seed for RNG |  | ||||||
|     :param rank: (int) index of the subprocess |  | ||||||
|     :return: (Callable) |  | ||||||
|     """ |  | ||||||
|     def _init() -> gym.Env: |  | ||||||
|  |  | ||||||
|         env = MyRLEnv(df=train_df, prices=price, window_size=window_size, |  | ||||||
|                       reward_kwargs=reward_params, id=env_id, seed=seed + rank) |  | ||||||
|         if monitor: |  | ||||||
|             env = Monitor(env, ".") |  | ||||||
|         return env |  | ||||||
|     set_random_seed(seed) |  | ||||||
|     return _init |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): |  | ||||||
|     """ |  | ||||||
|     User created Reinforcement Learning Model prediction model. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): |  | ||||||
|  |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) |  | ||||||
|         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) |  | ||||||
|  |  | ||||||
|         path = dk.data_path |  | ||||||
|         eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", |  | ||||||
|                                      log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), |  | ||||||
|                                      deterministic=True, render=False) |  | ||||||
|  |  | ||||||
|         # model arch |  | ||||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, |  | ||||||
|                              net_arch=[512, 512, 512]) |  | ||||||
|  |  | ||||||
|         model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs, |  | ||||||
|                     tensorboard_log=f"{path}/ppo/tensorboard/", |  | ||||||
|                     **self.freqai_info['model_training_parameters'] |  | ||||||
|                     ) |  | ||||||
|  |  | ||||||
|         model.learn( |  | ||||||
|             total_timesteps=int(total_timesteps), |  | ||||||
|             callback=eval_callback |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         best_model = PPO.load(dk.data_path / "best_model") |  | ||||||
|         print('Training finished!') |  | ||||||
|  |  | ||||||
|         return best_model |  | ||||||
|  |  | ||||||
|     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): |  | ||||||
|         """ |  | ||||||
|         User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise |  | ||||||
|         leaving this will default to Base5ActEnv |  | ||||||
|         """ |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|  |  | ||||||
|         # environments |  | ||||||
|         if not self.train_env: |  | ||||||
|             env_id = "train_env" |  | ||||||
|             num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) |  | ||||||
|             self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, |  | ||||||
|                                             self.reward_params, self.CONV_WIDTH) for i |  | ||||||
|                                             in range(num_cpu)]) |  | ||||||
|  |  | ||||||
|             eval_env_id = 'eval_env' |  | ||||||
|             self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, |  | ||||||
|                                            self.reward_params, self.CONV_WIDTH, monitor=True) for i |  | ||||||
|                                            in range(num_cpu)]) |  | ||||||
|         else: |  | ||||||
|             self.train_env.env_method('reset_env', train_df, prices_train, |  | ||||||
|                                       self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.eval_env.env_method('reset_env', train_df, prices_train, |  | ||||||
|                                      self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.train_env.env_method('reset') |  | ||||||
|             self.eval_env.env_method('reset') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MyRLEnv(Base3ActionRLEnv): |  | ||||||
|     """ |  | ||||||
|     User can override any function in BaseRLEnv and gym.Env |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def calculate_reward(self, action): |  | ||||||
|  |  | ||||||
|         if self._last_trade_tick is None: |  | ||||||
|             return 0. |  | ||||||
|  |  | ||||||
|         # close long |  | ||||||
|         if (action == Actions.Short.value or |  | ||||||
|                 action == Actions.Neutral.value) and self._position == Positions.Long: |  | ||||||
|             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |  | ||||||
|  |  | ||||||
|         # close short |  | ||||||
|         if (action == Actions.Long.value or |  | ||||||
|                 action == Actions.Neutral.value) and self._position == Positions.Short: |  | ||||||
|             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |  | ||||||
|  |  | ||||||
|         return 0. |  | ||||||
| @@ -1,115 +0,0 @@ | |||||||
| import logging |  | ||||||
| from typing import Any, Dict  # Optional |  | ||||||
| import torch as th |  | ||||||
| from stable_baselines3.common.callbacks import EvalCallback |  | ||||||
| from stable_baselines3.common.monitor import Monitor |  | ||||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions |  | ||||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel |  | ||||||
| from freqtrade.freqai.RL.TDQNagent import TDQN |  | ||||||
| from stable_baselines3 import DQN |  | ||||||
| from stable_baselines3.common.buffers import ReplayBuffer |  | ||||||
| import numpy as np |  | ||||||
| import gc |  | ||||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen |  | ||||||
|  |  | ||||||
| logger = logging.getLogger(__name__) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ReinforcementLearningTDQN(BaseReinforcementLearningModel): |  | ||||||
|     """ |  | ||||||
|     User created Reinforcement Learning Model prediction model. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): |  | ||||||
|  |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) |  | ||||||
|         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) |  | ||||||
|  |  | ||||||
|         path = dk.data_path |  | ||||||
|         eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", |  | ||||||
|                                      log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), |  | ||||||
|                                      deterministic=True, render=False) |  | ||||||
|  |  | ||||||
|         # model arch |  | ||||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, |  | ||||||
|                              net_arch=[256, 256, 128]) |  | ||||||
|  |  | ||||||
|         model = TDQN('TMultiInputPolicy', self.train_env, |  | ||||||
|                      tensorboard_log=f"{path}/tdqn/tensorboard/", |  | ||||||
|                      policy_kwargs=policy_kwargs, |  | ||||||
|                      replay_buffer_class=ReplayBuffer, |  | ||||||
|                      **self.freqai_info['model_training_parameters'] |  | ||||||
|                      ) |  | ||||||
|  |  | ||||||
|         model.learn( |  | ||||||
|             total_timesteps=int(total_timesteps), |  | ||||||
|             callback=eval_callback |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         del model |  | ||||||
|         best_model = DQN.load(dk.data_path / "best_model") |  | ||||||
|  |  | ||||||
|         print('Training finished!') |  | ||||||
|         gc.collect() |  | ||||||
|         return best_model |  | ||||||
|  |  | ||||||
|     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): |  | ||||||
|         """ |  | ||||||
|         User overrides this as shown here if they are using a custom MyRLEnv |  | ||||||
|         """ |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|  |  | ||||||
|         # environments |  | ||||||
|         if not self.train_env: |  | ||||||
|             self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, |  | ||||||
|                                      reward_kwargs=self.reward_params) |  | ||||||
|             self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, |  | ||||||
|                                     window_size=self.CONV_WIDTH, |  | ||||||
|                                     reward_kwargs=self.reward_params), ".") |  | ||||||
|         else: |  | ||||||
|             self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.train_env.reset() |  | ||||||
|             self.eval_env.reset() |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # User can inherit and customize 5 action environment |  | ||||||
| class MyRLEnv(Base5ActionRLEnv): |  | ||||||
|     """ |  | ||||||
|     User can override any function in BaseRLEnv and gym.Env. Here the user |  | ||||||
|     Adds 5 actions. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def calculate_reward(self, action): |  | ||||||
|  |  | ||||||
|         if self._last_trade_tick is None: |  | ||||||
|             return 0. |  | ||||||
|  |  | ||||||
|         # close long |  | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |  | ||||||
|             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |  | ||||||
|  |  | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |  | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) |  | ||||||
|  |  | ||||||
|         # close short |  | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |  | ||||||
|             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |  | ||||||
|  |  | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |  | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|                 return float((np.log(last_trade_price) - np.log(current_price)) * 2) |  | ||||||
|  |  | ||||||
|         return 0. |  | ||||||
| @@ -1,148 +0,0 @@ | |||||||
| import logging |  | ||||||
| from typing import Any, Dict  # Optional |  | ||||||
| import torch as th |  | ||||||
| import numpy as np |  | ||||||
| import gym |  | ||||||
| from typing import Callable |  | ||||||
| from stable_baselines3.common.callbacks import EvalCallback |  | ||||||
| # EvalCallback , StopTrainingOnNoModelImprovement, StopTrainingOnRewardThreshold |  | ||||||
| from stable_baselines3.common.monitor import Monitor |  | ||||||
| from stable_baselines3.common.vec_env import SubprocVecEnv |  | ||||||
| from stable_baselines3.common.utils import set_random_seed |  | ||||||
| from stable_baselines3 import DQN |  | ||||||
| from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions |  | ||||||
| from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel |  | ||||||
| from freqtrade.freqai.RL.TDQNagent import TDQN |  | ||||||
| from stable_baselines3.common.buffers import ReplayBuffer |  | ||||||
| from freqtrade.freqai.data_kitchen import FreqaiDataKitchen |  | ||||||
|  |  | ||||||
| logger = logging.getLogger(__name__) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def make_env(env_id: str, rank: int, seed: int, train_df, price, |  | ||||||
|              reward_params, window_size, monitor=False) -> Callable: |  | ||||||
|     """ |  | ||||||
|     Utility function for multiprocessed env. |  | ||||||
|  |  | ||||||
|     :param env_id: (str) the environment ID |  | ||||||
|     :param num_env: (int) the number of environment you wish to have in subprocesses |  | ||||||
|     :param seed: (int) the inital seed for RNG |  | ||||||
|     :param rank: (int) index of the subprocess |  | ||||||
|     :return: (Callable) |  | ||||||
|     """ |  | ||||||
|     def _init() -> gym.Env: |  | ||||||
|  |  | ||||||
|         env = MyRLEnv(df=train_df, prices=price, window_size=window_size, |  | ||||||
|                       reward_kwargs=reward_params, id=env_id, seed=seed + rank) |  | ||||||
|         if monitor: |  | ||||||
|             env = Monitor(env, ".") |  | ||||||
|         return env |  | ||||||
|     set_random_seed(seed) |  | ||||||
|     return _init |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel): |  | ||||||
|     """ |  | ||||||
|     User created Reinforcement Learning Model prediction model. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): |  | ||||||
|  |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|         eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df) |  | ||||||
|         total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) |  | ||||||
|  |  | ||||||
|         path = dk.data_path |  | ||||||
|  |  | ||||||
|         eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/", |  | ||||||
|                                      log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), |  | ||||||
|                                      deterministic=True, render=False) |  | ||||||
|         # model arch |  | ||||||
|         policy_kwargs = dict(activation_fn=th.nn.ReLU, |  | ||||||
|                              net_arch=[512, 512, 512]) |  | ||||||
|  |  | ||||||
|         model = TDQN('TMultiInputPolicy', self.train_env, |  | ||||||
|                      policy_kwargs=policy_kwargs, |  | ||||||
|                      tensorboard_log=f"{path}/tdqn/tensorboard/", |  | ||||||
|                      replay_buffer_class=ReplayBuffer, |  | ||||||
|                      **self.freqai_info['model_training_parameters'] |  | ||||||
|                      ) |  | ||||||
|  |  | ||||||
|         model.learn( |  | ||||||
|             total_timesteps=int(total_timesteps), |  | ||||||
|             callback=eval_callback |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|         best_model = DQN.load(dk.data_path / "best_model.zip") |  | ||||||
|         print('Training finished!') |  | ||||||
|  |  | ||||||
|         return best_model |  | ||||||
|  |  | ||||||
|     def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): |  | ||||||
|         """ |  | ||||||
|         User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise |  | ||||||
|         leaving this will default to Base5ActEnv |  | ||||||
|         """ |  | ||||||
|         train_df = data_dictionary["train_features"] |  | ||||||
|         test_df = data_dictionary["test_features"] |  | ||||||
|  |  | ||||||
|         # environments |  | ||||||
|         if not self.train_env: |  | ||||||
|             env_id = "train_env" |  | ||||||
|             num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) |  | ||||||
|             self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, |  | ||||||
|                                             self.reward_params, self.CONV_WIDTH) for i |  | ||||||
|                                             in range(num_cpu)]) |  | ||||||
|  |  | ||||||
|             eval_env_id = 'eval_env' |  | ||||||
|             self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, |  | ||||||
|                                            self.reward_params, self.CONV_WIDTH, monitor=True) for i |  | ||||||
|                                            in range(num_cpu)]) |  | ||||||
|         else: |  | ||||||
|             self.train_env.env_method('reset_env', train_df, prices_train, |  | ||||||
|                                       self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.eval_env.env_method('reset_env', train_df, prices_train, |  | ||||||
|                                      self.CONV_WIDTH, self.reward_params) |  | ||||||
|             self.train_env.env_method('reset') |  | ||||||
|             self.eval_env.env_method('reset') |  | ||||||
|  |  | ||||||
| # User can inherit and customize 5 action environment |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class MyRLEnv(Base5ActionRLEnv): |  | ||||||
|     """ |  | ||||||
|     User can override any function in BaseRLEnv and gym.Env. Here the user |  | ||||||
|     Adds 5 actions. |  | ||||||
|     """ |  | ||||||
|  |  | ||||||
|     def calculate_reward(self, action): |  | ||||||
|  |  | ||||||
|         if self._last_trade_tick is None: |  | ||||||
|             return 0. |  | ||||||
|  |  | ||||||
|         # close long |  | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |  | ||||||
|             last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(current_price) - np.log(last_trade_price)) |  | ||||||
|  |  | ||||||
|         if action == Actions.Long_sell.value and self._position == Positions.Long: |  | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                 last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|                 current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|                 return float((np.log(current_price) - np.log(last_trade_price)) * 2) |  | ||||||
|  |  | ||||||
|         # close short |  | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |  | ||||||
|             last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|             current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|             return float(np.log(last_trade_price) - np.log(current_price)) |  | ||||||
|  |  | ||||||
|         if action == Actions.Short_buy.value and self._position == Positions.Short: |  | ||||||
|             if self.close_trade_profit[-1] > self.profit_aim * self.rr: |  | ||||||
|                 last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) |  | ||||||
|                 current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) |  | ||||||
|                 return float((np.log(last_trade_price) - np.log(current_price)) * 2) |  | ||||||
|  |  | ||||||
|         return 0. |  | ||||||
		Reference in New Issue
	
	Block a user