reuse callback, allow user to acces all stable_baselines3 agents via config

This commit is contained in:
robcaulk 2022-08-20 16:35:29 +02:00
parent 4b9499e321
commit 3eb897c2f8
11 changed files with 295 additions and 587 deletions

View File

@ -55,7 +55,7 @@
], ],
"freqai": { "freqai": {
"enabled": true, "enabled": true,
"model_save_type": "stable_baselines_dqn", "model_save_type": "stable_baselines",
"conv_width": 10, "conv_width": 10,
"purge_old_models": true, "purge_old_models": true,
"train_period_days": 10, "train_period_days": 10,
@ -85,8 +85,11 @@
"verbose": 1 "verbose": 1
}, },
"rl_config": { "rl_config": {
"train_cycles": 15, "train_cycles": 10,
"eval_cycles": 5, "eval_cycles": 3,
"thread_count": 4,
"model_type": "PPO",
"policy_type": "MlpPolicy",
"model_reward_parameters": { "model_reward_parameters": {
"rr": 1, "rr": 1,
"profit_aim": 0.02 "profit_aim": 0.02

View File

@ -266,59 +266,28 @@ class Base5ActionRLEnv(gym.Env):
# close long # close long
if action == Actions.Long_exit.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
if len(self.close_trade_profit): last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
# aim x2 rw current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(
self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(
self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# less than aim x1 rw
elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(
self.prices.iloc[self._last_trade_tick].open
)
current_price = self.add_sell_fee(
self.prices.iloc[self._current_tick].open
)
return float(np.log(current_price) - np.log(last_trade_price)) return float(np.log(current_price) - np.log(last_trade_price))
# # less than RR SL x2 neg rw
# elif self.close_trade_profit[-1] < (self.profit_aim * -1): if action == Actions.Long_exit.value and self._position == Positions.Long:
# last_trade_price = self.add_buy_fee( if self.close_trade_profit[-1] > self.profit_aim * self.rr:
# self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
# current_price = self.add_sell_fee( current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
# self.prices.iloc[self._current_tick].open) return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# return float((np.log(current_price) - np.log(last_trade_price)) * 2) * -1
# close short # close short
if action == Actions.Short_exit.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
if len(self.close_trade_profit): last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
# aim x2 rw current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(
self.prices.iloc[self._last_trade_tick].open
)
current_price = self.add_buy_fee(
self.prices.iloc[self._current_tick].open
)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
# less than aim x1 rw
elif self.close_trade_profit[-1] < self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(
self.prices.iloc[self._last_trade_tick].open
)
current_price = self.add_buy_fee(
self.prices.iloc[self._current_tick].open
)
return float(np.log(last_trade_price) - np.log(current_price)) return float(np.log(last_trade_price) - np.log(current_price))
# # less than RR SL x2 neg rw
# elif self.close_trade_profit[-1] > self.profit_aim * self.rr: if action == Actions.Short_exit.value and self._position == Positions.Short:
# last_trade_price = self.add_sell_fee( if self.close_trade_profit[-1] > self.profit_aim * self.rr:
# self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
# current_price = self.add_buy_fee( current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
# self.prices.iloc[self._current_tick].open) return float((np.log(last_trade_price) - np.log(current_price)) * 2)
# return float((np.log(last_trade_price) - np.log(current_price)) * 2) * -1
return 0. return 0.
def _update_profit(self, action): def _update_profit(self, action):

View File

@ -11,8 +11,12 @@ from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions
from freqtrade.persistence import Trade from freqtrade.persistence import Trade
import torch.multiprocessing import torch.multiprocessing
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.monitor import Monitor
import torch as th import torch as th
from typing import Callable
from stable_baselines3.common.utils import set_random_seed
import gym
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy('file_system') torch.multiprocessing.set_sharing_strategy('file_system')
@ -25,9 +29,15 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(config=kwargs['config']) super().__init__(config=kwargs['config'])
th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4)) th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4))
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
self.train_env: Base5ActionRLEnv = None self.train_env: Base5ActionRLEnv = None
self.eval_env: Base5ActionRLEnv = None
self.eval_callback: EvalCallback = None
mod = __import__('stable_baselines3', fromlist=[
self.freqai_info['rl_config']['model_type']])
self.MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
self.policy_type = self.freqai_info['rl_config']['policy_type']
def train( def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
@ -67,7 +77,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
) )
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test) self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk)
model = self.fit_rl(data_dictionary, dk) model = self.fit_rl(data_dictionary, dk)
@ -75,13 +85,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return model return model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test): def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk):
""" """
User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise User overrides this as shown here if they are using a custom MyRLEnv
leaving this will default to Base5ActEnv
""" """
train_df = data_dictionary["train_features"] train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"] test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
# environments # environments
if not self.train_env: if not self.train_env:
@ -90,11 +100,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH, window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params), ".") reward_kwargs=self.reward_params), ".")
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=dk.data_path)
else: else:
self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.train_env.reset() self.train_env.reset()
self.eval_env.reset() self.eval_env.reset()
self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.eval_env.reset_env(test_df, prices_test, self.CONV_WIDTH, self.reward_params)
self.eval_callback.__init__(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=dk.data_path)
@abstractmethod @abstractmethod
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
@ -206,16 +222,28 @@ class BaseReinforcementLearningModel(IFreqaiModel):
# all the other existing fit() functions to include dk argument. For now we instantiate and # all the other existing fit() functions to include dk argument. For now we instantiate and
# leave it. # leave it.
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
management will be properly handled by Freqai.
:param data_dictionary: Dict = the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
return return
def make_env(env_id: str, rank: int, seed: int, train_df, price,
reward_params, window_size, monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environment you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
:return: (Callable)
"""
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
reward_kwargs=reward_params, id=env_id, seed=seed + rank)
if monitor:
env = Monitor(env, ".")
return env
set_random_seed(seed)
return _init
class MyRLEnv(Base5ActionRLEnv): class MyRLEnv(Base5ActionRLEnv):
""" """
@ -229,24 +257,24 @@ class MyRLEnv(Base5ActionRLEnv):
return 0. return 0.
# close long # close long
if action == Actions.Long_sell.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price)) return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_sell.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2) return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# close short # close short
if action == Actions.Short_buy.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price)) return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_buy.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr: if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)

View File

@ -471,12 +471,11 @@ class FreqaiDataDrawer:
elif model_type == 'keras': elif model_type == 'keras':
from tensorflow import keras from tensorflow import keras
model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
elif model_type == 'stable_baselines_ppo': elif model_type == 'stable_baselines':
from stable_baselines3.ppo.ppo import PPO mod = __import__('stable_baselines3', fromlist=[
model = PPO.load(dk.data_path / f"{dk.model_filename}_model") self.freqai_info['rl_config']['model_type']])
elif model_type == 'stable_baselines_dqn': MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
from stable_baselines3 import DQN model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
model = DQN.load(dk.data_path / f"{dk.model_filename}_model")
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")

View File

@ -0,0 +1,82 @@
import logging
from typing import Any, Dict # , Tuple
# import numpy.typing as npt
import torch as th
import numpy as np
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from pathlib import Path
logger = logging.getLogger(__name__)
class ReinforcementLearner(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256, 128])
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs,
tensorboard_log=Path(dk.data_path / "tensorboard"),
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=self.eval_callback
)
if Path(dk.data_path / "best_model.zip").is_file():
logger.info('Callback found a best model.')
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model
logger.info('Couldnt find best model, using final model instead.')
return model
class MyRLEnv(Base5ActionRLEnv):
"""
User can modify any part of the environment by overriding base
functions
"""
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_exit.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_exit.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
return 0.

View File

@ -1,17 +1,59 @@
from typing import Any, Dict, List, Optional, Tuple, Type, Union import logging
import gym
import torch
import torch as th import torch as th
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from stable_baselines3 import DQN from stable_baselines3 import DQN
from stable_baselines3.common.buffers import ReplayBuffer from stable_baselines3.common.buffers import ReplayBuffer
from stable_baselines3.common.policies import BasePolicy from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, from pathlib import Path
FlattenExtractor)
from stable_baselines3.common.type_aliases import GymEnv, Schedule
from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy,
QNetwork) QNetwork)
from torch import nn from torch import nn
import gym
from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor,
FlattenExtractor)
from stable_baselines3.common.type_aliases import GymEnv, Schedule
from stable_baselines3.common.policies import BasePolicy
logger = logging.getLogger(__name__)
class ReinforcementLearnerCustomAgent(BaseReinforcementLearningModel):
"""
User can customize agent by defining the class and using it directly.
Here the example is "TDQN"
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256, 128])
# TDQN is a custom agent defined below
model = TDQN(self.policy_type, self.train_env,
tensorboard_log=Path(dk.data_path / "tensorboard"),
policy_kwargs=policy_kwargs,
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=self.eval_callback
)
if Path(dk.data_path / "best_model.zip").is_file():
logger.info('Callback found a best model.')
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model
logger.info('Couldnt find best model, using final model instead.')
return model
# User creates their custom agent and networks as shown below
def create_mlp_( def create_mlp_(
@ -72,7 +114,7 @@ class TDQNetwork(QNetwork):
def init_weights(self, m): def init_weights(self, m):
if type(m) == nn.Linear: if type(m) == nn.Linear:
torch.nn.init.kaiming_uniform_(m.weight) th.nn.init.kaiming_uniform_(m.weight)
class TDQNPolicy(DQNPolicy): class TDQNPolicy(DQNPolicy):
@ -175,7 +217,7 @@ class TDQN(DQN):
exploration_initial_eps: float = 1.0, exploration_initial_eps: float = 1.0,
exploration_final_eps: float = 0.05, exploration_final_eps: float = 0.05,
max_grad_norm: float = 10, max_grad_norm: float = 10,
tensorboard_log: Optional[str] = None, tensorboard_log: Optional[Path] = None,
create_eval_env: bool = False, create_eval_env: bool = False,
policy_kwargs: Optional[Dict[str, Any]] = None, policy_kwargs: Optional[Dict[str, Any]] = None,
verbose: int = 1, verbose: int = 1,

View File

@ -0,0 +1,84 @@
import logging
from typing import Any, Dict # , Tuple
# import numpy.typing as npt
import torch as th
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv
from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel,
make_env)
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from pathlib import Path
logger = logging.getLogger(__name__)
class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
# model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[512, 512, 512])
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs,
tensorboard_log=Path(dk.data_path / "tensorboard"),
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=self.eval_callback
)
if Path(dk.data_path / "best_model.zip").is_file():
logger.info('Callback found a best model.')
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model
logger.info('Couldnt find best model, using final model instead.')
return model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk):
"""
If user has particular environment configuration needs, they can do that by
overriding this function. In the present case, the user wants to setup training
environments for multiple workers.
"""
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
# environments
if not self.train_env:
env_id = "train_env"
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH) for i
in range(num_cpu)])
eval_env_id = 'eval_env'
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
self.reward_params, self.CONV_WIDTH, monitor=True) for i
in range(num_cpu)])
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=dk.data_path)
else:
self.train_env.env_method('reset')
self.eval_env.env_method('reset')
self.train_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.eval_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.eval_callback.__init__(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=dk.data_path)

View File

@ -1,104 +0,0 @@
import gc
import logging
from typing import Any, Dict # , Tuple
import numpy as np
# import numpy.typing as npt
import torch as th
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base3ActionRLEnv import Actions, Base3ActionRLEnv, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
logger = logging.getLogger(__name__)
class ReinforcementLearningPPO(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
path = dk.data_path
eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/",
log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq),
deterministic=True, render=False)
# model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256, 128])
model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs,
tensorboard_log=f"{path}/ppo/tensorboard/",
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=eval_callback
)
del model
best_model = PPO.load(dk.data_path / "best_model")
print('Training finished!')
gc.collect()
return best_model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test):
"""
User overrides this as shown here if they are using a custom MyRLEnv
"""
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# environments
if not self.train_env:
self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params)
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params), ".")
else:
self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.train_env.reset()
self.eval_env.reset()
class MyRLEnv(Base3ActionRLEnv):
"""
User can override any function in BaseRLEnv and gym.Env
"""
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if (action == Actions.Short.value or
action == Actions.Neutral.value) and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
# close short
if (action == Actions.Long.value or
action == Actions.Neutral.value) and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
return 0.

View File

@ -1,132 +0,0 @@
import logging
from typing import Any, Dict # , Tuple
import numpy as np
# import numpy.typing as npt
import torch as th
from stable_baselines3.common.monitor import Monitor
from typing import Callable
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
import gym
logger = logging.getLogger(__name__)
def make_env(env_id: str, rank: int, seed: int, train_df, price,
reward_params, window_size, monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environment you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
:return: (Callable)
"""
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
reward_kwargs=reward_params, id=env_id, seed=seed + rank)
if monitor:
env = Monitor(env, ".")
return env
set_random_seed(seed)
return _init
class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
path = dk.data_path
eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/",
log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq),
deterministic=True, render=False)
# model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[512, 512, 512])
model = PPO('MlpPolicy', self.train_env, policy_kwargs=policy_kwargs,
tensorboard_log=f"{path}/ppo/tensorboard/",
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=eval_callback
)
best_model = PPO.load(dk.data_path / "best_model")
print('Training finished!')
return best_model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test):
"""
User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise
leaving this will default to Base5ActEnv
"""
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# environments
if not self.train_env:
env_id = "train_env"
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH) for i
in range(num_cpu)])
eval_env_id = 'eval_env'
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
self.reward_params, self.CONV_WIDTH, monitor=True) for i
in range(num_cpu)])
else:
self.train_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.eval_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.train_env.env_method('reset')
self.eval_env.env_method('reset')
class MyRLEnv(Base3ActionRLEnv):
"""
User can override any function in BaseRLEnv and gym.Env
"""
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if (action == Actions.Short.value or
action == Actions.Neutral.value) and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
# close short
if (action == Actions.Long.value or
action == Actions.Neutral.value) and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
return 0.

View File

@ -1,115 +0,0 @@
import logging
from typing import Any, Dict # Optional
import torch as th
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from freqtrade.freqai.RL.TDQNagent import TDQN
from stable_baselines3 import DQN
from stable_baselines3.common.buffers import ReplayBuffer
import numpy as np
import gc
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class ReinforcementLearningTDQN(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
path = dk.data_path
eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/",
log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq),
deterministic=True, render=False)
# model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256, 128])
model = TDQN('TMultiInputPolicy', self.train_env,
tensorboard_log=f"{path}/tdqn/tensorboard/",
policy_kwargs=policy_kwargs,
replay_buffer_class=ReplayBuffer,
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=eval_callback
)
del model
best_model = DQN.load(dk.data_path / "best_model")
print('Training finished!')
gc.collect()
return best_model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test):
"""
User overrides this as shown here if they are using a custom MyRLEnv
"""
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# environments
if not self.train_env:
self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params)
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params), ".")
else:
self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.eval_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params)
self.train_env.reset()
self.eval_env.reset()
# User can inherit and customize 5 action environment
class MyRLEnv(Base5ActionRLEnv):
"""
User can override any function in BaseRLEnv and gym.Env. Here the user
Adds 5 actions.
"""
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if action == Actions.Long_sell.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_sell.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# close short
if action == Actions.Short_buy.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_buy.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
return 0.

View File

@ -1,148 +0,0 @@
import logging
from typing import Any, Dict # Optional
import torch as th
import numpy as np
import gym
from typing import Callable
from stable_baselines3.common.callbacks import EvalCallback
# EvalCallback , StopTrainingOnNoModelImprovement, StopTrainingOnRewardThreshold
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import DQN
from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from freqtrade.freqai.RL.TDQNagent import TDQN
from stable_baselines3.common.buffers import ReplayBuffer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
def make_env(env_id: str, rank: int, seed: int, train_df, price,
reward_params, window_size, monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environment you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
:return: (Callable)
"""
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
reward_kwargs=reward_params, id=env_id, seed=seed + rank)
if monitor:
env = Monitor(env, ".")
return env
set_random_seed(seed)
return _init
class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
eval_freq = self.freqai_info["rl_config"]["eval_cycles"] * len(test_df)
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
path = dk.data_path
eval_callback = EvalCallback(self.eval_env, best_model_save_path=f"{path}/",
log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq),
deterministic=True, render=False)
# model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[512, 512, 512])
model = TDQN('TMultiInputPolicy', self.train_env,
policy_kwargs=policy_kwargs,
tensorboard_log=f"{path}/tdqn/tensorboard/",
replay_buffer_class=ReplayBuffer,
**self.freqai_info['model_training_parameters']
)
model.learn(
total_timesteps=int(total_timesteps),
callback=eval_callback
)
best_model = DQN.load(dk.data_path / "best_model.zip")
print('Training finished!')
return best_model
def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test):
"""
User overrides this in their prediction model if they are custom a MyRLEnv. Othwerwise
leaving this will default to Base5ActEnv
"""
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# environments
if not self.train_env:
env_id = "train_env"
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH) for i
in range(num_cpu)])
eval_env_id = 'eval_env'
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
self.reward_params, self.CONV_WIDTH, monitor=True) for i
in range(num_cpu)])
else:
self.train_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.eval_env.env_method('reset_env', train_df, prices_train,
self.CONV_WIDTH, self.reward_params)
self.train_env.env_method('reset')
self.eval_env.env_method('reset')
# User can inherit and customize 5 action environment
class MyRLEnv(Base5ActionRLEnv):
"""
User can override any function in BaseRLEnv and gym.Env. Here the user
Adds 5 actions.
"""
def calculate_reward(self, action):
if self._last_trade_tick is None:
return 0.
# close long
if action == Actions.Long_sell.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_sell.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
# close short
if action == Actions.Short_buy.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_buy.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
return 0.