# common library import numpy as np from stable_baselines3 import A2C from stable_baselines3 import DDPG from stable_baselines3 import PPO from stable_baselines3 import SAC from stable_baselines3 import TD3 from stable_baselines3.common.callbacks import BaseCallback from stable_baselines3.common.noise import NormalActionNoise from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise # from stable_baselines3.common.vec_env import DummyVecEnv from freqtrade.freqai.prediction_models.RL import config # from meta.env_stock_trading.env_stock_trading import StockTradingEnv # RL models from stable-baselines MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO} MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()} NOISE = { "normal": NormalActionNoise, "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise, } class TensorboardCallback(BaseCallback): """ Custom callback for plotting additional values in tensorboard. """ def __init__(self, verbose=0): super(TensorboardCallback, self).__init__(verbose) def _on_step(self) -> bool: try: self.logger.record(key="train/reward", value=self.locals["rewards"][0]) except BaseException: self.logger.record(key="train/reward", value=self.locals["reward"][0]) return True class RLPrediction_agent: """Provides implementations for DRL algorithms Based on: https://github.com/AI4Finance-Foundation/FinRL-Meta/blob/master/agents/stablebaselines3_models.py Attributes ---------- env: gym environment class user-defined class Methods ------- get_model() setup DRL algorithms train_model() train DRL algorithms in a train dataset and output the trained model DRL_prediction() make a prediction in a test dataset and get results """ def __init__(self, env): self.env = env def get_model( self, model_name, policy="MlpPolicy", policy_kwargs=None, model_kwargs=None, verbose=1, seed=None, ): if model_name not in MODELS: raise NotImplementedError("NotImplementedError") if model_kwargs is None: model_kwargs = MODEL_KWARGS[model_name] if "action_noise" in model_kwargs: n_actions = self.env.action_space.shape[-1] model_kwargs["action_noise"] = NOISE[model_kwargs["action_noise"]]( mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions) ) print(model_kwargs) model = MODELS[model_name]( policy=policy, env=self.env, tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}", verbose=verbose, policy_kwargs=policy_kwargs, seed=seed, **model_kwargs, ) return model def train_model(self, model, tb_log_name, total_timesteps=5000): model = model.learn( total_timesteps=total_timesteps, tb_log_name=tb_log_name, callback=TensorboardCallback(), ) return model @staticmethod def DRL_prediction(model, environment): test_env, test_obs = environment.get_sb_env() """make a prediction""" account_memory = [] actions_memory = [] test_env.reset() for i in range(len(environment.df.index.unique())): action, _states = model.predict(test_obs) # account_memory = test_env.env_method(method_name="save_asset_memory") # actions_memory = test_env.env_method(method_name="save_action_memory") test_obs, rewards, dones, info = test_env.step(action) if i == (len(environment.df.index.unique()) - 2): account_memory = test_env.env_method(method_name="save_asset_memory") actions_memory = test_env.env_method(method_name="save_action_memory") if dones[0]: print("hit end!") break return account_memory[0], actions_memory[0] @staticmethod def DRL_prediction_load_from_file(model_name, environment, cwd): if model_name not in MODELS: raise NotImplementedError("NotImplementedError") try: # load agent model = MODELS[model_name].load(cwd) print("Successfully load model", cwd) except BaseException: raise ValueError("Fail to load agent!") # test on the testing env state = environment.reset() episode_returns = list() # the cumulative_return / initial_account episode_total_assets = list() episode_total_assets.append(environment.initial_total_asset) done = False while not done: action = model.predict(state)[0] state, reward, done, _ = environment.step(action) total_asset = ( environment.cash + (environment.price_array[environment.time] * environment.stocks).sum() ) episode_total_assets.append(total_asset) episode_return = total_asset / environment.initial_total_asset episode_returns.append(episode_return) print("episode_return", episode_return) print("Test Finished!") return episode_total_assets