stable/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py

163 lines
5.3 KiB
Python
Raw Normal View History

# common library
import numpy as np
from stable_baselines3 import A2C
from stable_baselines3 import DDPG
from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import TD3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
# from stable_baselines3.common.vec_env import DummyVecEnv
from freqtrade.freqai.prediction_models.RL import config
# from meta.env_stock_trading.env_stock_trading import StockTradingEnv
# RL models from stable-baselines
MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO}
MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()}
NOISE = {
"normal": NormalActionNoise,
"ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise,
}
class TensorboardCallback(BaseCallback):
"""
Custom callback for plotting additional values in tensorboard.
"""
def __init__(self, verbose=0):
super(TensorboardCallback, self).__init__(verbose)
def _on_step(self) -> bool:
try:
self.logger.record(key="train/reward", value=self.locals["rewards"][0])
except BaseException:
self.logger.record(key="train/reward", value=self.locals["reward"][0])
return True
class RLPrediction_agent:
"""Provides implementations for DRL algorithms
Based on:
https://github.com/AI4Finance-Foundation/FinRL-Meta/blob/master/agents/stablebaselines3_models.py
Attributes
----------
env: gym environment class
user-defined class
Methods
-------
get_model()
setup DRL algorithms
train_model()
train DRL algorithms in a train dataset
and output the trained model
DRL_prediction()
make a prediction in a test dataset and get results
"""
def __init__(self, env):
self.env = env
def get_model(
self,
model_name,
policy="MlpPolicy",
policy_kwargs=None,
model_kwargs=None,
verbose=1,
seed=None,
):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
if model_kwargs is None:
model_kwargs = MODEL_KWARGS[model_name]
if "action_noise" in model_kwargs:
n_actions = self.env.action_space.shape[-1]
model_kwargs["action_noise"] = NOISE[model_kwargs["action_noise"]](
mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
)
print(model_kwargs)
model = MODELS[model_name](
policy=policy,
env=self.env,
tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
verbose=verbose,
policy_kwargs=policy_kwargs,
seed=seed,
**model_kwargs,
)
return model
def train_model(self, model, tb_log_name, total_timesteps=5000):
model = model.learn(
total_timesteps=total_timesteps,
tb_log_name=tb_log_name,
callback=TensorboardCallback(),
)
return model
@staticmethod
def DRL_prediction(model, environment):
test_env, test_obs = environment.get_sb_env()
"""make a prediction"""
account_memory = []
actions_memory = []
test_env.reset()
for i in range(len(environment.df.index.unique())):
action, _states = model.predict(test_obs)
# account_memory = test_env.env_method(method_name="save_asset_memory")
# actions_memory = test_env.env_method(method_name="save_action_memory")
test_obs, rewards, dones, info = test_env.step(action)
if i == (len(environment.df.index.unique()) - 2):
account_memory = test_env.env_method(method_name="save_asset_memory")
actions_memory = test_env.env_method(method_name="save_action_memory")
if dones[0]:
print("hit end!")
break
return account_memory[0], actions_memory[0]
@staticmethod
def DRL_prediction_load_from_file(model_name, environment, cwd):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
try:
# load agent
model = MODELS[model_name].load(cwd)
print("Successfully load model", cwd)
except BaseException:
raise ValueError("Fail to load agent!")
# test on the testing env
state = environment.reset()
episode_returns = list() # the cumulative_return / initial_account
episode_total_assets = list()
episode_total_assets.append(environment.initial_total_asset)
done = False
while not done:
action = model.predict(state)[0]
state, reward, done, _ = environment.step(action)
total_asset = (
environment.cash
+ (environment.price_array[environment.time] * environment.stocks).sum()
)
episode_total_assets.append(total_asset)
episode_return = total_asset / environment.initial_total_asset
episode_returns.append(episode_return)
print("episode_return", episode_return)
print("Test Finished!")
return episode_total_assets