diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index e8852a0cf..dc7c62e4a 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -58,6 +58,7 @@ "model_save_type": "stable_baselines", "conv_width": 4, "purge_old_models": true, + "limit_ram_usage": false, "train_period_days": 5, "backtest_period_days": 2, "identifier": "unique-id", diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index 7a524ba87..5a7ae4372 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -19,6 +19,7 @@ from typing import Callable from datetime import datetime, timezone from stable_baselines3.common.utils import set_random_seed import gym +from pathlib import Path logger = logging.getLogger(__name__) torch.multiprocessing.set_sharing_strategy('file_system') @@ -110,9 +111,9 @@ class BaseReinforcementLearningModel(IFreqaiModel): train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] - self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params, config=self.config) - self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, + self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, + reward_kwargs=self.reward_params, config=self.config) + self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, window_size=self.CONV_WIDTH, reward_kwargs=self.reward_params, config=self.config)) self.eval_callback = EvalCallback(self.eval_env, deterministic=True, @@ -126,7 +127,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): go in here. Abstract method, so this function must be overridden by user class. """ - return def get_state_info(self, pair: str): @@ -232,6 +232,72 @@ class BaseReinforcementLearningModel(IFreqaiModel): return prices_train, prices_test + def load_model_from_disk(self, dk: FreqaiDataKitchen) -> Any: + """ + Can be used by user if they are trying to limit_ram_usage *and* + perform continual learning. + For now, this is unused. + """ + exists = Path(dk.data_path / f"{dk.model_filename}_model").is_file() + if exists: + model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") + else: + logger.info('No model file on disk to continue learning from.') + + return model + + # Nested class which can be overridden by user to customize further + class MyRLEnv(Base5ActionRLEnv): + """ + User can override any function in BaseRLEnv and gym.Env. Here the user + sets a custom reward based on profit and trade duration. + """ + + def calculate_reward(self, action): + + # first, penalize if the action is not valid + if not self._is_valid(action): + return -2 + + pnl = self.get_unrealized_profit() + rew = np.sign(pnl) * (pnl + 1) + factor = 100 + + # reward agent for entering trades + if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ + and self._position == Positions.Neutral: + return 25 + # discourage agent from not entering trades + if action == Actions.Neutral.value and self._position == Positions.Neutral: + return -1 + + max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) + trade_duration = self._current_tick - self._last_trade_tick + + if trade_duration <= max_trade_duration: + factor *= 1.5 + elif trade_duration > max_trade_duration: + factor *= 0.5 + + # discourage sitting in position + if self._position in (Positions.Short, Positions.Long) and \ + action == Actions.Neutral.value: + return -1 * trade_duration / max_trade_duration + + # close long + if action == Actions.Long_exit.value and self._position == Positions.Long: + if pnl > self.profit_aim * self.rr: + factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float(rew * factor) + + # close short + if action == Actions.Short_exit.value and self._position == Positions.Short: + if pnl > self.profit_aim * self.rr: + factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float(rew * factor) + + return 0. + # TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it. # But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor # all the other existing fit() functions to include dk argument. For now we instantiate and @@ -240,7 +306,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): return -def make_env(env_id: str, rank: int, seed: int, train_df: DataFrame, price: DataFrame, +def make_env(MyRLEnv: Base5ActionRLEnv, env_id: str, rank: int, + seed: int, train_df: DataFrame, price: DataFrame, reward_params: Dict[str, int], window_size: int, monitor: bool = False, config: Dict[str, Any] = {}) -> Callable: """ @@ -252,6 +319,7 @@ def make_env(env_id: str, rank: int, seed: int, train_df: DataFrame, price: Data :param rank: (int) index of the subprocess :return: (Callable) """ + def _init() -> gym.Env: env = MyRLEnv(df=train_df, prices=price, window_size=window_size, @@ -261,54 +329,3 @@ def make_env(env_id: str, rank: int, seed: int, train_df: DataFrame, price: Data return env set_random_seed(seed) return _init - - -class MyRLEnv(Base5ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env. Here the user - sets a custom reward based on profit and trade duration. - """ - - def calculate_reward(self, action): - - # first, penalize if the action is not valid - if not self._is_valid(action): - return -2 - - pnl = self.get_unrealized_profit() - rew = np.sign(pnl) * (pnl + 1) - factor = 100 - - # reward agent for entering trades - if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ - and self._position == Positions.Neutral: - return 25 - # discourage agent from not entering trades - if action == Actions.Neutral.value and self._position == Positions.Neutral: - return -1 - - max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) - trade_duration = self._current_tick - self._last_trade_tick - - if trade_duration <= max_trade_duration: - factor *= 1.5 - elif trade_duration > max_trade_duration: - factor *= 0.5 - - # discourage sitting in position - if self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value: - return -1 * trade_duration / max_trade_duration - - # close long - if action == Actions.Long_exit.value and self._position == Positions.Long: - if pnl > self.profit_aim * self.rr: - factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) - - # close short - if action == Actions.Short_exit.value and self._position == Positions.Short: - if pnl > self.profit_aim * self.rr: - factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) - - return 0. diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index ae3e92f5e..64a5502ad 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -90,6 +90,7 @@ class FreqaiDataDrawer: self.empty_pair_dict: pair_info = { "model_filename": "", "trained_timestamp": 0, "priority": 1, "first": True, "data_path": "", "extras": {}} + self.limit_ram_use = self.freqai_info.get('limit_ram_usage', False) def load_drawer_from_disk(self): """ @@ -423,8 +424,8 @@ class FreqaiDataDrawer: dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb") ) - # if self.live: - self.model_dictionary[coin] = model + if not self.limit_ram_use: + self.model_dictionary[coin] = model self.pair_dict[coin]["model_filename"] = dk.model_filename self.pair_dict[coin]["data_path"] = str(dk.data_path) self.save_drawer_to_disk() @@ -464,7 +465,7 @@ class FreqaiDataDrawer: model_type = self.freqai_info.get('model_save_type', 'joblib') # try to access model in memory instead of loading object from disk to save time - if dk.live and coin in self.model_dictionary: + if dk.live and coin in self.model_dictionary and not self.limit_ram_use: model = self.model_dictionary[coin] elif model_type == 'joblib': model = load(dk.data_path / f"{dk.model_filename}_model.joblib") @@ -486,7 +487,7 @@ class FreqaiDataDrawer: ) # load it into ram if it was loaded from disk - if coin not in self.model_dictionary: + if coin not in self.model_dictionary and not self.limit_ram_use: self.model_dictionary[coin] = model if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index 36cc821e4..a72a56e20 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -3,12 +3,12 @@ from typing import Any, Dict import torch as th from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions +from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from pathlib import Path -from pandas import DataFrame -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor +# from pandas import DataFrame +# from stable_baselines3.common.callbacks import EvalCallback +# from stable_baselines3.common.monitor import Monitor import numpy as np logger = logging.getLogger(__name__) @@ -53,71 +53,53 @@ class ReinforcementLearner(BaseReinforcementLearningModel): return model - def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame], - prices_train: DataFrame, prices_test: DataFrame, - dk: FreqaiDataKitchen): + class MyRLEnv(BaseReinforcementLearningModel.MyRLEnv): """ - User can override this if they are using a custom MyRLEnv + User can override any function in BaseRLEnv and gym.Env. Here the user + sets a custom reward based on profit and trade duration. """ - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params, config=self.config) - self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, - window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params, config=self.config)) - self.eval_callback = EvalCallback(self.eval_env, deterministic=True, - render=False, eval_freq=len(train_df), - best_model_save_path=str(dk.data_path)) + def calculate_reward(self, action): + # first, penalize if the action is not valid + if not self._is_valid(action): + return -2 -class MyRLEnv(Base5ActionRLEnv): - """ - User can override any function in BaseRLEnv and gym.Env. Here the user - sets a custom reward based on profit and trade duration. - """ + pnl = self.get_unrealized_profit() + rew = np.sign(pnl) * (pnl + 1) + factor = 100 - def calculate_reward(self, action): + # reward agent for entering trades + if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ + and self._position == Positions.Neutral: + return 25 + # discourage agent from not entering trades + if action == Actions.Neutral.value and self._position == Positions.Neutral: + return -1 - # first, penalize if the action is not valid - if not self._is_valid(action): - return -2 + max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) + trade_duration = self._current_tick - self._last_trade_tick - pnl = self.get_unrealized_profit() - rew = np.sign(pnl) * (pnl + 1) - factor = 100 + if trade_duration <= max_trade_duration: + factor *= 1.5 + elif trade_duration > max_trade_duration: + factor *= 0.5 - # reward agent for entering trades - if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ - and self._position == Positions.Neutral: - return 25 - # discourage agent from not entering trades - if action == Actions.Neutral.value and self._position == Positions.Neutral: - return -1 + # discourage sitting in position + if self._position in (Positions.Short, Positions.Long) and \ + action == Actions.Neutral.value: + return -1 * trade_duration / max_trade_duration - max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) - trade_duration = self._current_tick - self._last_trade_tick + # close long + if action == Actions.Long_exit.value and self._position == Positions.Long: + if pnl > self.profit_aim * self.rr: + factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float(rew * factor) - if trade_duration <= max_trade_duration: - factor *= 1.5 - elif trade_duration > max_trade_duration: - factor *= 0.5 + # close short + if action == Actions.Short_exit.value and self._position == Positions.Short: + if pnl > self.profit_aim * self.rr: + factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float(rew * factor) - # discourage sitting in position - if self._position in (Positions.Short, Positions.Long) and action == Actions.Neutral.value: - return -1 * trade_duration / max_trade_duration - - # close long - if action == Actions.Long_exit.value and self._position == Positions.Long: - if pnl > self.profit_aim * self.rr: - factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) - - # close short - if action == Actions.Short_exit.value and self._position == Positions.Short: - if pnl > self.profit_aim * self.rr: - factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) - - return 0. + return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py index 18a843b6d..f301da981 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py @@ -34,7 +34,7 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): **self.freqai_info['model_training_parameters'] ) else: - logger.info('Continual training activated - starting training from previously ' + logger.info('Continual learning activated - starting training from previously ' 'trained agent.') model = self.dd.model_dictionary[dk.pair] model.tensorboard_log = Path(dk.data_path / "tensorboard") @@ -65,13 +65,14 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): env_id = "train_env" num_cpu = int(self.freqai_info["rl_config"]["thread_count"] / 2) - self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, + self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, self.reward_params, self.CONV_WIDTH, config=self.config) for i in range(num_cpu)]) eval_env_id = 'eval_env' - self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, + self.eval_env = SubprocVecEnv([make_env(self.MyRLEnv, eval_env_id, i, 1, + test_df, prices_test, self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i in range(num_cpu)])