From 91683e1dcaa0b966b813d5e418834136828b0cad Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 15 Aug 2022 10:26:44 +0200 Subject: [PATCH] restructure RL so that user can customize environment --- freqtrade/freqai/RL/BaseRLEnv.py | 318 +++++++++ .../RL/BaseReinforcementLearningModel.py | 230 ++++++ .../TDQNagent.py} | 29 +- freqtrade/freqai/{rl => RL}/__init__.py | 0 .../RL/RLPrediction_agent.py | 139 ---- .../RL/RLPrediction_env_TDQN_3ac.py | 513 ------------- .../RL/RLPrediction_env_TDQN_5ac.py | 671 ------------------ .../freqai/prediction_models/RL/config.py | 37 - .../prediction_models/RLPredictionModel.py | 253 ------- .../ReinforcementLearning.py | 273 ------- .../ReinforcementLearningPPO.py | 155 ++++ .../ReinforcementLearningTDQN.py | 168 +++++ freqtrade/freqai/rl/BaseRLAgent.py | 0 13 files changed, 882 insertions(+), 1904 deletions(-) create mode 100644 freqtrade/freqai/RL/BaseRLEnv.py create mode 100644 freqtrade/freqai/RL/BaseReinforcementLearningModel.py rename freqtrade/freqai/{prediction_models/RL/RLPrediction_agent_TDQN.py => RL/TDQNagent.py} (93%) rename freqtrade/freqai/{rl => RL}/__init__.py (100%) delete mode 100644 freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py delete mode 100644 freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_3ac.py delete mode 100644 freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_5ac.py delete mode 100644 freqtrade/freqai/prediction_models/RL/config.py delete mode 100644 freqtrade/freqai/prediction_models/RLPredictionModel.py delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearning.py create mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py create mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py delete mode 100644 freqtrade/freqai/rl/BaseRLAgent.py diff --git a/freqtrade/freqai/RL/BaseRLEnv.py b/freqtrade/freqai/RL/BaseRLEnv.py new file mode 100644 index 000000000..607262acd --- /dev/null +++ b/freqtrade/freqai/RL/BaseRLEnv.py @@ -0,0 +1,318 @@ +import logging +from enum import Enum +# from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union + +import gym +import numpy as np +from gym import spaces +from gym.utils import seeding + +logger = logging.getLogger(__name__) + + +class Actions(Enum): + Short = 0 + Long = 1 + Neutral = 2 + + +class Positions(Enum): + Short = 0 + Long = 1 + Neutral = 0.5 + + def opposite(self): + return Positions.Short if self == Positions.Long else Positions.Long + + +def mean_over_std(x): + std = np.std(x, ddof=1) + mean = np.mean(x) + return mean / std if std > 0 else 0 + + +class BaseRLEnv(gym.Env): + + metadata = {'render.modes': ['human']} + + def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): + assert df.ndim == 2 + + self.seed() + self.df = df + self.signal_features = self.df + self.prices = prices + self.window_size = window_size + self.starting_point = starting_point + self.rr = reward_kwargs["rr"] + self.profit_aim = reward_kwargs["profit_aim"] + + self.fee = 0.0015 + + # # spaces + self.shape = (window_size, self.signal_features.shape[1]) + self.action_space = spaces.Discrete(len(Actions)) + self.observation_space = spaces.Box( + low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) + + # episode + self._start_tick = self.window_size + self._end_tick = len(self.prices) - 1 + self._done = None + self._current_tick = None + self._last_trade_tick = None + self._position = Positions.Neutral + self._position_history = None + self.total_reward = None + self._total_profit = None + self._first_rendering = None + self.history = None + self.trade_history = [] + + self.r_t_change = 0. + + self.returns_report = [] + + def seed(self, seed: int = 1): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def reset(self): + + self._done = False + + if self.starting_point is True: + self._position_history = (self._start_tick * [None]) + [self._position] + else: + self._position_history = (self.window_size * [None]) + [self._position] + + self._current_tick = self._start_tick + self._last_trade_tick = None + self._position = Positions.Neutral + + self.total_reward = 0. + self._total_profit = 1. # unit + self._first_rendering = True + self.history = {} + self.trade_history = [] + self.portfolio_log_returns = np.zeros(len(self.prices)) + + self._profits = [(self._start_tick, 1)] + self.close_trade_profit = [] + self.r_t_change = 0. + + self.returns_report = [] + + return self._get_observation() + + def step(self, action: int): + self._done = False + self._current_tick += 1 + + if self._current_tick == self._end_tick: + self._done = True + + self.update_portfolio_log_returns(action) + + self._update_profit(action) + step_reward = self.calculate_reward(action) + self.total_reward += step_reward + + trade_type = None + if self.is_tradesignal(action): # exclude 3 case not trade + # Update position + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + if action == Actions.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions.Long.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions.Short.value: + self._position = Positions.Short + trade_type = "short" + else: + print("case not defined") + + # Update last trade tick + self._last_trade_tick = self._current_tick + + if trade_type is not None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, + 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + + self._position_history.append(self._position) + observation = self._get_observation() + info = dict( + tick=self._current_tick, + total_reward=self.total_reward, + total_profit=self._total_profit, + position=self._position.value + ) + self._update_history(info) + + return observation, step_reward, self._done, info + + def _get_observation(self): + return self.signal_features[(self._current_tick - self.window_size):self._current_tick] + + def get_unrealized_profit(self): + + if self._last_trade_tick is None: + return 0. + + if self._position == Positions.Neutral: + return 0. + elif self._position == Positions.Short: + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + return (last_trade_price - current_price) / last_trade_price + elif self._position == Positions.Long: + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + return (current_price - last_trade_price) / last_trade_price + else: + return 0. + + def is_tradesignal(self, action: int): + # trade signal + """ + not trade signal is : + Action: Neutral, position: Neutral -> Nothing + Action: Long, position: Long -> Hold Long + Action: Short, position: Short -> Hold Short + """ + return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) + or (action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def _is_trade(self, action: Actions): + return ((action == Actions.Long.value and self._position == Positions.Short) or + (action == Actions.Short.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Short) + ) + + def is_hold(self, action): + return ((action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def add_buy_fee(self, price): + return price * (1 + self.fee) + + def add_sell_fee(self, price): + return price / (1 + self.fee) + + def _update_history(self, info): + if not self.history: + self.history = {key: [] for key in info.keys()} + + for key, value in info.items(): + self.history[key].append(value) + + def get_sharpe_ratio(self): + return mean_over_std(self.get_portfolio_log_returns()) + + def calculate_reward(self, action): + + if self._last_trade_tick is None: + return 0. + + # close long + if (action == Actions.Short.value or + action == Actions.Neutral.value) and self._position == Positions.Long: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + + # close short + if (action == Actions.Long.value or + action == Actions.Neutral.value) and self._position == Positions.Short: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + + return 0. + + def _update_profit(self, action): + if self._is_trade(action) or self._done: + pnl = self.get_unrealized_profit() + + if self._position == Positions.Long: + self._total_profit = self._total_profit + self._total_profit * pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) + + if self._position == Positions.Short: + self._total_profit = self._total_profit + self._total_profit * pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) + + def most_recent_return(self, action: int): + """ + We support Long, Neutral and Short positions. + Return is generated from rising prices in Long + and falling prices in Short positions. + The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. + """ + # Long positions + if self._position == Positions.Long: + current_price = self.prices.iloc[self._current_tick].open + if action == Actions.Short.value or action == Actions.Neutral.value: + current_price = self.add_sell_fee(current_price) + + previous_price = self.prices.iloc[self._current_tick - 1].open + + if (self._position_history[self._current_tick - 1] == Positions.Short + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_buy_fee(previous_price) + + return np.log(current_price) - np.log(previous_price) + + # Short positions + if self._position == Positions.Short: + current_price = self.prices.iloc[self._current_tick].open + if action == Actions.Long.value or action == Actions.Neutral.value: + current_price = self.add_buy_fee(current_price) + + previous_price = self.prices.iloc[self._current_tick - 1].open + if (self._position_history[self._current_tick - 1] == Positions.Long + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_sell_fee(previous_price) + + return np.log(previous_price) - np.log(current_price) + + return 0 + + def get_portfolio_log_returns(self): + return self.portfolio_log_returns[1:self._current_tick + 1] + + def update_portfolio_log_returns(self, action): + self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) + + def current_price(self) -> float: + return self.prices.iloc[self._current_tick].open + + def prev_price(self) -> float: + return self.prices.iloc[self._current_tick - 1].open + + def sharpe_ratio(self): + if len(self.close_trade_profit) == 0: + return 0. + returns = np.array(self.close_trade_profit) + reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) + return reward diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py new file mode 100644 index 000000000..accddc94d --- /dev/null +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -0,0 +1,230 @@ +import logging +from typing import Any, Dict, Tuple + +import numpy as np +import numpy.typing as npt +import pandas as pd +from pandas import DataFrame +from abc import abstractmethod +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.freqai_interface import IFreqaiModel +from freqtrade.freqai.RL.BaseRLEnv import BaseRLEnv, Actions, Positions +from freqtrade.persistence import Trade + +logger = logging.getLogger(__name__) + + +class BaseReinforcementLearningModel(IFreqaiModel): + """ + User created Reinforcement Learning Model prediction model. + """ + + def train( + self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + ) -> Any: + """ + Filter the training data and train a model to it. Train makes heavy use of the datakitchen + for storing, saving, loading, and analyzing the data. + :param unfiltered_dataframe: Full dataframe for the current training period + :param metadata: pair metadata from strategy. + :returns: + :model: Trained model which can be used to inference (self.predict) + """ + + logger.info("--------------------Starting training " f"{pair} --------------------") + + # filter the features requested by user in the configuration file and elegantly handle NaNs + features_filtered, labels_filtered = dk.filter_features( + unfiltered_dataframe, + dk.training_features_list, + dk.label_list, + training_filter=True, + ) + + data_dictionary: Dict[str, Any] = dk.make_train_test_datasets( + features_filtered, labels_filtered) + dk.fit_labels() # useless for now, but just satiating append methods + + # normalize all data based on train_dataset only + data_dictionary = dk.normalize_data(data_dictionary) + + # optional additional data cleaning/analysis + self.data_cleaning_train(dk) + + logger.info( + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + ) + logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') + + model = self.fit(data_dictionary, pair) + + if pair not in self.dd.historic_predictions: + self.set_initial_historic_predictions( + data_dictionary['train_features'], model, dk, pair) + + self.dd.save_historic_predictions_to_disk() + + logger.info(f"--------------------done training {pair}--------------------") + + return model + + @abstractmethod + def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + """ + Agent customizations and abstract Reinforcement Learning customizations + go in here. Abstract method, so this function must be overridden by + user class. + """ + + return + + def get_state_info(self, pair): + open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True)) + market_side = 0.5 + current_profit = 0 + for trade in open_trades: + if trade.pair == pair: + current_value = trade.open_trade_value + openrate = trade.open_rate + if 'long' in trade.enter_tag: + market_side = 1 + else: + market_side = 0 + current_profit = current_value / openrate - 1 + + total_profit = 0 + closed_trades = Trade.get_trades( + trade_filter=[Trade.is_open.is_(False), Trade.pair == pair]) + for trade in closed_trades: + total_profit += trade.close_profit + + return market_side, current_profit, total_profit + + def predict( + self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( + unfiltered_dataframe, dk.training_features_list, training_filter=False + ) + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe + + # optional additional data cleaning/analysis + self.data_cleaning_predict(dk, filtered_dataframe) + + pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model) + pred_df.fillna(0, inplace=True) + + return (pred_df, dk.do_predict) + + def rl_model_predict(self, dataframe: DataFrame, + dk: FreqaiDataKitchen, model: Any) -> DataFrame: + + output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list) + + def _predict(window): + observations = dataframe.iloc[window.index] + res, _ = model.predict(observations, deterministic=True) + return res + + output = output.rolling(window=self.CONV_WIDTH).apply(_predict) + + return output + + def set_initial_historic_predictions( + self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str + ) -> None: + + pred_df = self.rl_model_predict(df, dk, model) + pred_df.fillna(0, inplace=True) + self.dd.historic_predictions[pair] = pred_df + hist_preds_df = self.dd.historic_predictions[pair] + + for label in hist_preds_df.columns: + if hist_preds_df[label].dtype == object: + continue + hist_preds_df[f'{label}_mean'] = 0 + hist_preds_df[f'{label}_std'] = 0 + + hist_preds_df['do_predict'] = 0 + + if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: + hist_preds_df['DI_values'] = 0 + + for return_str in dk.data['extra_returns_per_train']: + hist_preds_df[return_str] = 0 + + +class MyRLEnv(BaseRLEnv): + + def step(self, action): + self._done = False + self._current_tick += 1 + + if self._current_tick == self._end_tick: + self._done = True + + self.update_portfolio_log_returns(action) + + self._update_profit(action) + step_reward = self._calculate_reward(action) + self.total_reward += step_reward + + trade_type = None + if self.is_tradesignal(action): # exclude 3 case not trade + # Update position + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + if action == Actions.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions.Long.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions.Short.value: + self._position = Positions.Short + trade_type = "short" + else: + print("case not defined") + + # Update last trade tick + self._last_trade_tick = self._current_tick + + if trade_type is not None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, + 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + + self._position_history.append(self._position) + observation = self._get_observation() + info = dict( + tick=self._current_tick, + total_reward=self.total_reward, + total_profit=self._total_profit, + position=self._position.value + ) + self._update_history(info) + + return observation, step_reward, self._done, info diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent_TDQN.py b/freqtrade/freqai/RL/TDQNagent.py similarity index 93% rename from freqtrade/freqai/prediction_models/RL/RLPrediction_agent_TDQN.py rename to freqtrade/freqai/RL/TDQNagent.py index 0aa3512a1..584f6a8ef 100644 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent_TDQN.py +++ b/freqtrade/freqai/RL/TDQNagent.py @@ -6,11 +6,10 @@ import torch as th from stable_baselines3 import DQN from stable_baselines3.common.buffers import ReplayBuffer from stable_baselines3.common.policies import BasePolicy -from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, CombinedExtractor, +from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor, FlattenExtractor) from stable_baselines3.common.type_aliases import GymEnv, Schedule -#from stable_baselines3.common.policies import register_policy -from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, MultiInputPolicy, +from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy, QNetwork) from torch import nn @@ -47,16 +46,17 @@ def create_mlp_( ] return modules + class TDQNetwork(QNetwork): def __init__(self, - observation_space: gym.spaces.Space, - action_space: gym.spaces.Space, - features_extractor: nn.Module, - features_dim: int, - net_arch: Optional[List[int]] = None, - activation_fn: Type[nn.Module] = nn.ReLU, - normalize_images: bool = True - ): + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + features_extractor: nn.Module, + features_dim: int, + net_arch: Optional[List[int]] = None, + activation_fn: Type[nn.Module] = nn.ReLU, + normalize_images: bool = True + ): super().__init__( observation_space=observation_space, action_space=action_space, @@ -211,10 +211,3 @@ class TDQN(DQN): device=device, _init_setup_model=_init_setup_model ) - - - -# try: -# register_policy("TMultiInputPolicy", TMultiInputPolicy) -# except: -# print("already registered") diff --git a/freqtrade/freqai/rl/__init__.py b/freqtrade/freqai/RL/__init__.py similarity index 100% rename from freqtrade/freqai/rl/__init__.py rename to freqtrade/freqai/RL/__init__.py diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py deleted file mode 100644 index 26b31f6e9..000000000 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py +++ /dev/null @@ -1,139 +0,0 @@ -# common library - -import gym -import numpy as np -from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3 -from stable_baselines3.common.callbacks import (BaseCallback, CallbackList, CheckpointCallback, - EvalCallback, StopTrainingOnRewardThreshold) -from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise - -from freqtrade.freqai.prediction_models.RL import config -#from freqtrade.freqai.prediction_models.RL.RLPrediction_agent_v2 import TDQN -from freqtrade.freqai.prediction_models.RL.RLPrediction_env import DEnv - - -# from stable_baselines3.common.vec_env import DummyVecEnv - -# from meta.env_stock_trading.env_stock_trading import StockTradingEnv - -# RL models from stable-baselines - - -MODELS = {"a2c": A2C, "ddpg": DDPG, "td3": TD3, "sac": SAC, "ppo": PPO} - - -MODEL_KWARGS = {x: config.__dict__[f"{x.upper()}_PARAMS"] for x in MODELS.keys()} - - -NOISE = { - "normal": NormalActionNoise, - "ornstein_uhlenbeck": OrnsteinUhlenbeckActionNoise, -} - - -class TensorboardCallback(BaseCallback): - """ - Custom callback for plotting additional values in tensorboard. - """ - - def __init__(self, verbose=0): - super(TensorboardCallback, self).__init__(verbose) - - def _on_step(self) -> bool: - try: - self.logger.record(key="train/reward", value=self.locals["rewards"][0]) - except BaseException: - self.logger.record(key="train/reward", value=self.locals["reward"][0]) - return True - - -class RLPrediction_agent: - """Provides implementations for DRL algorithms - Based on: - https://github.com/AI4Finance-Foundation/FinRL-Meta/blob/master/agents/stablebaselines3_models.py - Attributes - ---------- - env: gym environment class - user-defined class - - Methods - ------- - get_model() - setup DRL algorithms - train_model() - train DRL algorithms in a train dataset - and output the trained model - DRL_prediction() - make a prediction in a test dataset and get results - """ - - def __init__(self, env): - self.env = env - - def get_model( - self, - model_name, - policy="MlpPolicy", - policy_kwargs=None, - model_kwargs=None, - reward_kwargs=None, - #total_timesteps=None, - verbose=1, - seed=None - ): - if model_name not in MODELS: - raise NotImplementedError("NotImplementedError") - - if model_kwargs is None: - model_kwargs = MODEL_KWARGS[model_name] - - if "action_noise" in model_kwargs: - n_actions = self.env.action_space.shape[-1] - model_kwargs["action_noise"] = NOISE[model_kwargs["action_noise"]]( - mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions) - ) - print(model_kwargs) - model = MODELS[model_name]( - policy=policy, - env=self.env, - tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}", - verbose=verbose, - policy_kwargs=policy_kwargs, - #model_kwargs=model_kwargs, - #total_timesteps=model_kwargs["total_timesteps"], - seed=seed - #**model_kwargs, - ) - - - - - return model - - def train_model(self, model, tb_log_name, model_kwargs, train_df, test_df, price, price_test, window_size): - - - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] - train_env = DEnv(df=train_df, prices=price, window_size=window_size, reward_kwargs=reward_params) - eval_env = DEnv(df=test_df, prices=price_test, window_size=window_size, reward_kwargs=reward_params) - - # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/', - # name_prefix='rl_model') - - checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/') - - eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/best_model', log_path='./logs/results', eval_freq=500) - #callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-200, verbose=1) - - # Create the callback list - callback = CallbackList([checkpoint_callback, eval_callback]) - - - model = model.learn( - total_timesteps=model_kwargs["total_timesteps"], - tb_log_name=tb_log_name, - callback=callback, - #callback=TensorboardCallback(), - ) - return model diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_3ac.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_3ac.py deleted file mode 100644 index 184ec57ec..000000000 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_3ac.py +++ /dev/null @@ -1,513 +0,0 @@ -import logging -import random -from collections import deque -from enum import Enum -from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union - -import gym -import matplotlib.pylab as plt -import numpy as np -import pandas as pd -from gym import spaces -from gym.utils import seeding - -logger = logging.getLogger(__name__) - -class Actions(Enum): - Short = 0 - Long = 1 - Neutral = 2 - - -class Positions(Enum): - Short = 0 - Long = 1 - Neutral = 0.5 - - def opposite(self): - return Positions.Short if self == Positions.Long else Positions.Long - -def mean_over_std(x): - std = np.std(x, ddof=1) - mean = np.mean(x) - return mean / std if std > 0 else 0 - -class DEnv(gym.Env): - - metadata = {'render.modes': ['human']} - - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): - assert df.ndim == 2 - - self.seed() - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] - - self.fee=0.0015 - - # # spaces - self.shape = (window_size, self.signal_features.shape[1]) - self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) - - # episode - self._start_tick = self.window_size - self._end_tick = len(self.prices) - 1 - self._done = None - self._current_tick = None - self._last_trade_tick = None - self._position = Positions.Neutral - self._position_history = None - self.total_reward = None - self._total_profit = None - self._first_rendering = None - self.history = None - self.trade_history = [] - - # self.A_t, self.B_t = 0.000639, 0.00001954 - self.r_t_change = 0. - - self.returns_report = [] - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def reset(self): - - self._done = False - - if self.starting_point == True: - self._position_history = (self._start_tick* [None]) + [self._position] - else: - self._position_history = (self.window_size * [None]) + [self._position] - - self._current_tick = self._start_tick - self._last_trade_tick = None - #self._last_trade_tick = self._current_tick - 1 - self._position = Positions.Neutral - - self.total_reward = 0. - self._total_profit = 1. # unit - self._first_rendering = True - self.history = {} - self.trade_history = [] - self.portfolio_log_returns = np.zeros(len(self.prices)) - - self._profits = [(self._start_tick, 1)] - self.close_trade_profit = [] - self.r_t_change = 0. - - self.returns_report = [] - - return self._get_observation() - - def step(self, action): - self._done = False - self._current_tick += 1 - - if self._current_tick == self._end_tick: - self._done = True - - self.update_portfolio_log_returns(action) - - self._update_profit(action) - step_reward = self._calculate_reward(action) - self.total_reward += step_reward - - trade_type = None - if self.is_tradesignal(action): # exclude 3 case not trade - # Update position - """ - Action: Neutral, position: Long -> Close Long - Action: Neutral, position: Short -> Close Short - - Action: Long, position: Neutral -> Open Long - Action: Long, position: Short -> Close Short and Open Long - - Action: Short, position: Neutral -> Open Short - Action: Short, position: Long -> Close Long and Open Short - """ - - temp_position = self._position - if action == Actions.Neutral.value: - self._position = Positions.Neutral - trade_type = "neutral" - elif action == Actions.Long.value: - self._position = Positions.Long - trade_type = "long" - elif action == Actions.Short.value: - self._position = Positions.Short - trade_type = "short" - else: - print("case not defined") - - # Update last trade tick - self._last_trade_tick = self._current_tick - - if trade_type != None: - self.trade_history.append( - {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type}) - - if self._total_profit < 0.2: - self._done = True - - self._position_history.append(self._position) - observation = self._get_observation() - info = dict( - tick = self._current_tick, - total_reward = self.total_reward, - total_profit = self._total_profit, - position = self._position.value - ) - self._update_history(info) - - return observation, step_reward, self._done, info - - # def processState(self, state): - # return state.to_numpy() - - # def convert_mlp_Policy(self, obs_): - # pass - - def _get_observation(self): - return self.signal_features[(self._current_tick - self.window_size):self._current_tick] - - def get_unrealized_profit(self): - - if self._last_trade_tick == None: - return 0. - - if self._position == Positions.Neutral: - return 0. - elif self._position == Positions.Short: - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price)/last_trade_price - elif self._position == Positions.Long: - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - return (current_price - last_trade_price)/last_trade_price - else: - return 0. - - def is_tradesignal(self, action): - # trade signal - """ - not trade signal is : - Action: Neutral, position: Neutral -> Nothing - Action: Long, position: Long -> Hold Long - Action: Short, position: Short -> Hold Short - """ - return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) - or (action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) - - def _is_trade(self, action: Actions): - return ((action == Actions.Long.value and self._position == Positions.Short) or - (action == Actions.Short.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) - ) - - def is_hold(self, action): - return ((action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) - - def add_buy_fee(self, price): - return price * (1 + self.fee) - - def add_sell_fee(self, price): - return price / (1 + self.fee) - - def _update_history(self, info): - if not self.history: - self.history = {key: [] for key in info.keys()} - - for key, value in info.items(): - self.history[key].append(value) - - - # def render(self, mode='human'): - # def _plot_position(position, tick): - # color = None - # if position == Positions.Short: - # color = 'red' - # elif position == Positions.Long: - # color = 'green' - # if color: - # plt.scatter(tick, self.prices.loc[tick].open, color=color) - # if self._first_rendering: - # self._first_rendering = False - # plt.cla() - # plt.plot(self.prices) - # start_position = self._position_history[self._start_tick] - # _plot_position(start_position, self._start_tick) - # plt.cla() - # plt.plot(self.prices) - # _plot_position(self._position, self._current_tick) - # plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit) - # plt.pause(0.01) - - # def render_all(self): - # plt.figure() - # window_ticks = np.arange(len(self._position_history)) - # plt.plot(self.prices['open'], alpha=0.5) - # short_ticks = [] - # long_ticks = [] - # neutral_ticks = [] - # for i, tick in enumerate(window_ticks): - # if self._position_history[i] == Positions.Short: - # short_ticks.append(tick - 1) - # elif self._position_history[i] == Positions.Long: - # long_ticks.append(tick - 1) - # elif self._position_history[i] == Positions.Neutral: - # neutral_ticks.append(tick - 1) - # plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open, - # 'o', color='grey', ms=3, alpha=0.1) - # plt.plot(short_ticks, self.prices.loc[short_ticks].open, - # 'o', color='r', ms=3, alpha=0.8) - # plt.plot(long_ticks, self.prices.loc[long_ticks].open, - # 'o', color='g', ms=3, alpha=0.8) - # plt.suptitle("Generalising") - # fig = plt.gcf() - # fig.set_size_inches(15, 10) - - # def close_trade_report(self): - # small_trade = 0 - # positive_big_trade = 0 - # negative_big_trade = 0 - # small_profit = 0.003 - # for i in self.close_trade_profit: - # if i < small_profit and i > -small_profit: - # small_trade+=1 - # elif i > small_profit: - # positive_big_trade += 1 - # elif i < -small_profit: - # negative_big_trade += 1 - # print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}") - - # def report(self): - # # get total trade - # long_trade = 0 - # short_trade = 0 - # neutral_trade = 0 - # for trade in self.trade_history: - # if trade['type'] == 'long': - # long_trade += 1 - # elif trade['type'] == 'short': - # short_trade += 1 - # else: - # neutral_trade += 1 - # negative_trade = 0 - # positive_trade = 0 - # for tr in self.close_trade_profit: - # if tr < 0.: - # negative_trade += 1 - # if tr > 0.: - # positive_trade += 1 - # total_trade_lr = negative_trade+positive_trade - # total_trade = long_trade + short_trade - # sharp_ratio = self.sharpe_ratio() - # sharp_log = self.get_sharpe_ratio() - # from tabulate import tabulate - # headers = ["Performance", ""] - # performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)], - # ["Total reward", "{0:.3f}".format(self.total_reward)], - # ["Start profit(unit)", "{0:.2f}".format(1.)], - # ["End profit(unit)", "{0:.3f}".format(self._total_profit)], - # ["Sharp ratio", "{0:.3f}".format(sharp_ratio)], - # ["Sharp log", "{0:.3f}".format(sharp_log)], - # # ["Sortino ratio", "{0:.2f}".format(0) + '%'], - # ["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%'] - # ] - # tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center") - # print(tabulation) - # result = { - # "Start": "{0:.2f}".format(1.), - # "End": "{0:.2f}".format(self._total_profit), - # "Sharp": "{0:.3f}".format(sharp_ratio), - # "Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr) - # } - # return result - - # def close(self): - # plt.close() - - def get_sharpe_ratio(self): - return mean_over_std(self.get_portfolio_log_returns()) - - # def save_rendering(self, filepath): - # plt.savefig(filepath) - - # def pause_rendering(self): - # plt.show() - - def _calculate_reward(self, action): - # rw = self.transaction_profit_reward(action) - #rw = self.reward_rr_profit_config(action) - rw = self.profit_only_when_close_reward(action) - #rw = self.profit_only_when_close_reward_aim(action) - return rw - - def _update_profit(self, action): - if self._is_trade(action) or self._done: - pnl = self.get_unrealized_profit() - - if self._position == Positions.Long: - self._total_profit = self._total_profit + self._total_profit*pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - if self._position == Positions.Short: - self._total_profit = self._total_profit + self._total_profit*pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - def most_recent_return(self, action): - """ - We support Long, Neutral and Short positions. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - if action == Actions.Short.value or action == Actions.Neutral.value: - current_price = self.add_sell_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_buy_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - if action == Actions.Long.value or action == Actions.Neutral.value: - current_price = self.add_buy_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_sell_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def get_portfolio_log_returns(self): - return self.portfolio_log_returns[1:self._current_tick + 1] - - # def get_trading_log_return(self): - # return self.portfolio_log_returns[self._start_tick:] - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - - def current_price(self) -> float: - return self.prices.iloc[self._current_tick].open - - def prev_price(self) -> float: - return self.prices.iloc[self._current_tick-1].open - - def sharpe_ratio(self): - if len(self.close_trade_profit) == 0: - return 0. - returns = np.array(self.close_trade_profit) - reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) - return reward - - # def get_bnh_log_return(self): - # return np.diff(np.log(self.prices['open'][self._start_tick:])) - - def transaction_profit_reward(self, action): - rw = 0. - - pt = self.prev_price() - pt_1 = self.current_price() - - - if self._position == Positions.Long: - a_t = 1 - elif self._position == Positions.Short: - a_t = -1 - else: - a_t = 0 - - # close long - if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: - pt_1 = self.add_sell_fee(self.current_price()) - po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - - rw = a_t*(pt_1 - po)/po - #rw = rw*2 - # close short - elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: - pt_1 = self.add_buy_fee(self.current_price()) - po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - rw = a_t*(pt_1 - po)/po - #rw = rw*2 - else: - rw = a_t*(pt_1 - pt)/pt - - return np.clip(rw, 0, 1) - - def profit_only_when_close_reward_aim(self, action): - - if self._last_trade_tick == None: - return 0. - - # close long - if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - - # close short - if (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - - return 0. - - def profit_only_when_close_reward(self, action): - - if self._last_trade_tick == None: - return 0. - - # close long - if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - # close short - if (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - return 0. \ No newline at end of file diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_5ac.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_5ac.py deleted file mode 100644 index 9b01579e8..000000000 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_env_TDQN_5ac.py +++ /dev/null @@ -1,671 +0,0 @@ -import logging -import random -from collections import deque -from enum import Enum -#from sklearn.decomposition import PCA, KernelPCA -from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union - -import gym -import matplotlib.pylab as plt -import numpy as np -import pandas as pd -from gym import spaces -from gym.utils import seeding - - -logger = logging.getLogger(__name__) - -# from bokeh.io import output_notebook -# from bokeh.plotting import figure, show -# from bokeh.models import ( -# CustomJS, -# ColumnDataSource, -# NumeralTickFormatter, -# Span, -# HoverTool, -# Range1d, -# DatetimeTickFormatter, -# Scatter, -# Label, LabelSet -# ) - - -class Actions(Enum): - Neutral = 0 - Long_buy = 1 - Long_sell = 2 - Short_buy = 3 - Short_sell = 4 - - -class Positions(Enum): - Short = 0 - Long = 1 - Neutral = 0.5 - - def opposite(self): - return Positions.Short if self == Positions.Long else Positions.Long - -def mean_over_std(x): - std = np.std(x, ddof=1) - mean = np.mean(x) - return mean / std if std > 0 else 0 - -class DEnv(gym.Env): - - metadata = {'render.modes': ['human']} - - def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): - assert df.ndim == 2 - - self.seed() - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] - - self.fee=0.0015 - - # # spaces - self.shape = (window_size, self.signal_features.shape[1]) - self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) - - # episode - self._start_tick = self.window_size - self._end_tick = len(self.prices) - 1 - self._done = None - self._current_tick = None - self._last_trade_tick = None - self._position = Positions.Neutral - self._position_history = None - self.total_reward = None - self._total_profit = None - self._first_rendering = None - self.history = None - self.trade_history = [] - - # self.A_t, self.B_t = 0.000639, 0.00001954 - self.r_t_change = 0. - - self.returns_report = [] - - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - - def reset(self): - - self._done = False - - if self.starting_point == True: - self._position_history = (self._start_tick* [None]) + [self._position] - else: - self._position_history = (self.window_size * [None]) + [self._position] - - self._current_tick = self._start_tick - self._last_trade_tick = None - #self._last_trade_tick = self._current_tick - 1 - self._position = Positions.Neutral - - self.total_reward = 0. - self._total_profit = 1. # unit - self._first_rendering = True - self.history = {} - self.trade_history = [] - self.portfolio_log_returns = np.zeros(len(self.prices)) - - - self._profits = [(self._start_tick, 1)] - self.close_trade_profit = [] - self.r_t_change = 0. - - self.returns_report = [] - - return self._get_observation() - - - def step(self, action): - self._done = False - self._current_tick += 1 - - if self._current_tick == self._end_tick: - self._done = True - - self.update_portfolio_log_returns(action) - - self._update_profit(action) - step_reward = self._calculate_reward(action) - self.total_reward += step_reward - - - - - - trade_type = None - if self.is_tradesignal(action): # exclude 3 case not trade - # Update position - """ - Action: Neutral, position: Long -> Close Long - Action: Neutral, position: Short -> Close Short - - Action: Long, position: Neutral -> Open Long - Action: Long, position: Short -> Close Short and Open Long - - Action: Short, position: Neutral -> Open Short - Action: Short, position: Long -> Close Long and Open Short - """ - - - temp_position = self._position - if action == Actions.Neutral.value: - self._position = Positions.Neutral - trade_type = "neutral" - elif action == Actions.Long_buy.value: - self._position = Positions.Long - trade_type = "long" - elif action == Actions.Short_buy.value: - self._position = Positions.Short - trade_type = "short" - elif action == Actions.Long_sell.value: - self._position = Positions.Neutral - trade_type = "neutral" - elif action == Actions.Short_sell.value: - self._position = Positions.Neutral - trade_type = "neutral" - else: - print("case not defined") - - # Update last trade tick - self._last_trade_tick = self._current_tick - - if trade_type != None: - self.trade_history.append( - {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type}) - - if self._total_profit < 0.2: - self._done = True - - self._position_history.append(self._position) - observation = self._get_observation() - info = dict( - tick = self._current_tick, - total_reward = self.total_reward, - total_profit = self._total_profit, - position = self._position.value - ) - self._update_history(info) - - return observation, step_reward, self._done, info - - - # def processState(self, state): - # return state.to_numpy() - - # def convert_mlp_Policy(self, obs_): - # pass - - def _get_observation(self): - return self.signal_features[(self._current_tick - self.window_size):self._current_tick] - - - def get_unrealized_profit(self): - - if self._last_trade_tick == None: - return 0. - - if self._position == Positions.Neutral: - return 0. - elif self._position == Positions.Short: - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price)/last_trade_price - elif self._position == Positions.Long: - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - return (current_price - last_trade_price)/last_trade_price - else: - return 0. - - - def is_tradesignal(self, action): - # trade signal - """ - not trade signal is : - Action: Neutral, position: Neutral -> Nothing - Action: Long, position: Long -> Hold Long - Action: Short, position: Short -> Hold Short - """ - return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or - (action == Actions.Short_buy.value and self._position == Positions.Short) or - (action == Actions.Short_sell.value and self._position == Positions.Short) or - (action == Actions.Short_buy.value and self._position == Positions.Long) or - (action == Actions.Short_sell.value and self._position == Positions.Long) or - - (action == Actions.Long_buy.value and self._position == Positions.Long) or - (action == Actions.Long_sell.value and self._position == Positions.Long) or - (action == Actions.Long_buy.value and self._position == Positions.Short) or - (action == Actions.Long_sell.value and self._position == Positions.Short)) - - - def _is_trade(self, action: Actions): - return ((action == Actions.Long_buy.value and self._position == Positions.Short) or - (action == Actions.Short_buy.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) or - - (action == Actions.Neutral.Short_sell and self._position == Positions.Long) or - (action == Actions.Neutral.Long_sell and self._position == Positions.Short) - ) - - - def is_hold(self, action): - return ((action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) - - - - def add_buy_fee(self, price): - return price * (1 + self.fee) - - def add_sell_fee(self, price): - return price / (1 + self.fee) - - def _update_history(self, info): - if not self.history: - self.history = {key: [] for key in info.keys()} - - for key, value in info.items(): - self.history[key].append(value) - - - # def render(self, mode='human'): - - # def _plot_position(position, tick): - # color = None - # if position == Positions.Short: - # color = 'red' - # elif position == Positions.Long: - # color = 'green' - # if color: - # plt.scatter(tick, self.prices.loc[tick].open, color=color) - - # if self._first_rendering: - # self._first_rendering = False - # plt.cla() - # plt.plot(self.prices) - # start_position = self._position_history[self._start_tick] - # _plot_position(start_position, self._start_tick) - - # plt.cla() - # plt.plot(self.prices) - # _plot_position(self._position, self._current_tick) - - # plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit) - # plt.pause(0.01) - - - # def render_all(self): - # plt.figure() - # window_ticks = np.arange(len(self._position_history)) - # plt.plot(self.prices['open'], alpha=0.5) - - # short_ticks = [] - # long_ticks = [] - # neutral_ticks = [] - # for i, tick in enumerate(window_ticks): - # if self._position_history[i] == Positions.Short: - # short_ticks.append(tick - 1) - # elif self._position_history[i] == Positions.Long: - # long_ticks.append(tick - 1) - # elif self._position_history[i] == Positions.Neutral: - # neutral_ticks.append(tick - 1) - - # plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open, - # 'o', color='grey', ms=3, alpha=0.1) - # plt.plot(short_ticks, self.prices.loc[short_ticks].open, - # 'o', color='r', ms=3, alpha=0.8) - # plt.plot(long_ticks, self.prices.loc[long_ticks].open, - # 'o', color='g', ms=3, alpha=0.8) - - # plt.suptitle("Generalising") - # fig = plt.gcf() - # fig.set_size_inches(15, 10) - - - - - # def close_trade_report(self): - # small_trade = 0 - # positive_big_trade = 0 - # negative_big_trade = 0 - # small_profit = 0.003 - # for i in self.close_trade_profit: - # if i < small_profit and i > -small_profit: - # small_trade+=1 - # elif i > small_profit: - # positive_big_trade += 1 - # elif i < -small_profit: - # negative_big_trade += 1 - # print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}") - - - # def report(self): - - # # get total trade - # long_trade = 0 - # short_trade = 0 - # neutral_trade = 0 - # for trade in self.trade_history: - # if trade['type'] == 'long': - # long_trade += 1 - - # elif trade['type'] == 'short': - # short_trade += 1 - # else: - # neutral_trade += 1 - - # negative_trade = 0 - # positive_trade = 0 - # for tr in self.close_trade_profit: - # if tr < 0.: - # negative_trade += 1 - - # if tr > 0.: - # positive_trade += 1 - - # total_trade_lr = negative_trade+positive_trade - - - # total_trade = long_trade + short_trade - # sharp_ratio = self.sharpe_ratio() - # sharp_log = self.get_sharpe_ratio() - - # from tabulate import tabulate - - # headers = ["Performance", ""] - # performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)], - # ["Total reward", "{0:.3f}".format(self.total_reward)], - # ["Start profit(unit)", "{0:.2f}".format(1.)], - # ["End profit(unit)", "{0:.3f}".format(self._total_profit)], - # ["Sharp ratio", "{0:.3f}".format(sharp_ratio)], - # ["Sharp log", "{0:.3f}".format(sharp_log)], - # # ["Sortino ratio", "{0:.2f}".format(0) + '%'], - # ["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%'] - # ] - # tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center") - # print(tabulation) - - # result = { - # "Start": "{0:.2f}".format(1.), - # "End": "{0:.2f}".format(self._total_profit), - # "Sharp": "{0:.3f}".format(sharp_ratio), - # "Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr) - # } - # return result - - # def close(self): - # plt.close() - - def get_sharpe_ratio(self): - return mean_over_std(self.get_portfolio_log_returns()) - - - # def save_rendering(self, filepath): - # plt.savefig(filepath) - - - # def pause_rendering(self): - # plt.show() - - - def _calculate_reward(self, action): - # rw = self.transaction_profit_reward(action) - #rw = self.reward_rr_profit_config(action) - #rw = self.reward_rr_profit_config(action) # main - #rw = self.profit_only_when_close_reward(action) - rw = self.profit_only_when_close_reward_aim(action) - return rw - - - def _update_profit(self, action): - #if self._is_trade(action) or self._done: - if self._is_trade(action) or self._done: - pnl = self.get_unrealized_profit() - - if self._position == Positions.Long: - self._total_profit = self._total_profit + self._total_profit*pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - if self._position == Positions.Short: - self._total_profit = self._total_profit + self._total_profit*pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - - def most_recent_return(self, action): - """ - We support Long, Neutral and Short positions. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - #if action == Actions.Short.value or action == Actions.Neutral.value: - if action == Actions.Short_buy.value or action == Actions.Neutral.value: - current_price = self.add_sell_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_buy_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - #if action == Actions.Long.value or action == Actions.Neutral.value: - if action == Actions.Long_buy.value or action == Actions.Neutral.value: - current_price = self.add_buy_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_sell_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def get_portfolio_log_returns(self): - return self.portfolio_log_returns[1:self._current_tick + 1] - - - def get_trading_log_return(self): - return self.portfolio_log_returns[self._start_tick:] - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - - def current_price(self) -> float: - return self.prices.iloc[self._current_tick].open - - def prev_price(self) -> float: - return self.prices.iloc[self._current_tick-1].open - - - - def sharpe_ratio(self): - if len(self.close_trade_profit) == 0: - return 0. - returns = np.array(self.close_trade_profit) - reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) - return reward - - def get_bnh_log_return(self): - return np.diff(np.log(self.prices['open'][self._start_tick:])) - - - def transaction_profit_reward(self, action): - rw = 0. - - pt = self.prev_price() - pt_1 = self.current_price() - - - if self._position == Positions.Long: - a_t = 1 - elif self._position == Positions.Short: - a_t = -1 - else: - a_t = 0 - - # close long - if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: - pt_1 = self.add_sell_fee(self.current_price()) - po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - - rw = a_t*(pt_1 - po)/po - #rw = rw*2 - # close short - elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: - pt_1 = self.add_buy_fee(self.current_price()) - po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - rw = a_t*(pt_1 - po)/po - #rw = rw*2 - else: - rw = a_t*(pt_1 - pt)/pt - - return np.clip(rw, 0, 1) - - - def profit_only_when_close_reward(self, action): - - if self._last_trade_tick == None: - return 0. - - # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - return 0. - - def profit_only_when_close_reward_aim(self, action): - - if self._last_trade_tick == None: - return 0. - - # close long - if action == Actions.Long_sell.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if action == Actions.Long_sell.value and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) - - # close short - if action == Actions.Short_buy.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if action == Actions.Short_buy.value and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) - - return 0. - - def reward_rr_profit_config(self, action): - rw = 0. - - pt_1 = self.current_price() - - - if len(self.close_trade_profit) > 0: - # long - if self._position == Positions.Long: - pt_1 = self.add_sell_fee(self.current_price()) - po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - - if action == Actions.Short_buy.value: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - rw = 15 - elif self.close_trade_profit[-1] > 0.01 and self.close_trade_profit[-1] < self.profit_aim * self.rr: - rw = -1 - elif self.close_trade_profit[-1] < 0: - rw = -10 - elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw = -15 - - if action == Actions.Long_sell.value: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - rw = 20 - elif self.close_trade_profit[-1] > 0.01 and self.close_trade_profit[-1] < self.profit_aim * self.rr: - rw = -1 - elif self.close_trade_profit[-1] < 0: - rw = -15 - elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw = -25 - - if action == Actions.Neutral.value: - if self.close_trade_profit[-1] > 0.005: - rw = 0 - elif self.close_trade_profit[-1] < 0: - rw = 0 - - # short - if self._position == Positions.Short: - pt_1 = self.add_sell_fee(self.current_price()) - po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - - if action == Actions.Long_buy.value: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - rw = 15 - elif self.close_trade_profit[-1] > 0.01 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw = -1 - elif self.close_trade_profit[-1] < 0: - rw = -10 - elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw =- -25 - - if action == Actions.Short_sell.value: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - rw = 20 - elif self.close_trade_profit[-1] > 0.01 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw = -1 - elif self.close_trade_profit[-1] < 0: - rw = -15 - elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: - rw = -25 - - if action == Actions.Neutral.value: - if self.close_trade_profit[-1] > 0.005: - rw = 0 - elif self.close_trade_profit[-1] < 0: - rw = 0 - - return np.clip(rw, 0, 1) diff --git a/freqtrade/freqai/prediction_models/RL/config.py b/freqtrade/freqai/prediction_models/RL/config.py deleted file mode 100644 index c45eb2387..000000000 --- a/freqtrade/freqai/prediction_models/RL/config.py +++ /dev/null @@ -1,37 +0,0 @@ -# dir -DATA_SAVE_DIR = "datasets" -TRAINED_MODEL_DIR = "trained_models" -TENSORBOARD_LOG_DIR = "tensorboard_log" -RESULTS_DIR = "results" - -# Model Parameters -A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.01, "learning_rate": 0.0007} -PPO_PARAMS = { - "n_steps": 2048, - "ent_coef": 0.01, - "learning_rate": 0.00025, - "batch_size": 64, -} -DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001} -TD3_PARAMS = { - "batch_size": 100, - "buffer_size": 1000000, - "learning_rate": 0.001, -} -SAC_PARAMS = { - "batch_size": 64, - "buffer_size": 100000, - "learning_rate": 0.0001, - "learning_starts": 100, - "ent_coef": "auto_0.1", -} -ERL_PARAMS = { - "learning_rate": 3e-5, - "batch_size": 2048, - "gamma": 0.985, - "seed": 312, - "net_dimension": 512, - "target_step": 5000, - "eval_gap": 30, -} -RLlib_PARAMS = {"lr": 5e-5, "train_batch_size": 500, "gamma": 0.99} diff --git a/freqtrade/freqai/prediction_models/RLPredictionModel.py b/freqtrade/freqai/prediction_models/RLPredictionModel.py deleted file mode 100644 index b6903dd43..000000000 --- a/freqtrade/freqai/prediction_models/RLPredictionModel.py +++ /dev/null @@ -1,253 +0,0 @@ -import logging -from typing import Any, Dict, Tuple -#from matplotlib.colors import DivergingNorm - -from pandas import DataFrame -import pandas as pd -from freqtrade.exceptions import OperationalException -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -import tensorflow as tf -from freqtrade.freqai.prediction_models.BaseTensorFlowModel import BaseTensorFlowModel -from freqtrade.freqai.freqai_interface import IFreqaiModel -from tensorflow.keras.layers import Input, Conv1D, Dense, MaxPooling1D, Flatten, Dropout -from tensorflow.keras.models import Model -import numpy as np -import copy - -from keras.layers import * -import random - - -logger = logging.getLogger(__name__) - -# tf.config.run_functions_eagerly(True) -# tf.data.experimental.enable_debug_mode() - -import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' -os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - -MAX_EPOCHS = 10 -LOOKBACK = 8 - - -class RLPredictionModel_v2(IFreqaiModel): - """ - User created prediction model. The class needs to override three necessary - functions, predict(), fit(). - """ - - def fit(self, data_dictionary: Dict, pair) -> Any: - """ - User sets up the training and test data to fit their desired model here - :params: - :data_dictionary: the dictionary constructed by DataHandler to hold - all the training and test data/labels. - """ - - train_df = data_dictionary["train_features"] - train_labels = data_dictionary["train_labels"] - test_df = data_dictionary["test_features"] - test_labels = data_dictionary["test_labels"] - n_labels = len(train_labels.columns) - if n_labels > 1: - raise OperationalException( - "Neural Net not yet configured for multi-targets. Please " - " reduce number of targets to 1 in strategy." - ) - - n_features = len(data_dictionary["train_features"].columns) - BATCH_SIZE = self.freqai_info.get("batch_size", 64) - input_dims = [BATCH_SIZE, self.CONV_WIDTH, n_features] - - - w1 = WindowGenerator( - input_width=self.CONV_WIDTH, - label_width=1, - shift=1, - train_df=train_df, - val_df=test_df, - train_labels=train_labels, - val_labels=test_labels, - batch_size=BATCH_SIZE, - ) - - - # train_agent() - #pair = self.dd.historical_data[pair] - #gym_env = FreqtradeEnv(data=train_df, prices=0.01, windows_size=100, pair=pair, stake_amount=100) - - # sep = '/' - # coin = pair.split(sep, 1)[0] - - # # df1 = train_df.filter(regex='price') - # # df2 = df1.filter(regex='raw') - - # # df3 = df2.filter(regex=f"{coin}") - # # print(df3) - - # price = train_df[f"%-{coin}raw_price_5m"] - # gym_env = RLPrediction_GymAnytrading(signal_features=train_df, prices=price, window_size=100) - # sac = RLPrediction_Agent(gym_env) - - # print(sac) - - # return 0 - - - - return model - - def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first=True - ) -> Tuple[DataFrame, DataFrame]: - """ - Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. - :return: - :predictions: np.array of predictions - :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove - data (NaNs) or felt uncertain about data (PCA and DI index) - """ - - dk.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False - ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe - - # optional additional data cleaning/analysis - self.data_cleaning_predict(dk, filtered_dataframe) - - if first: - full_df = dk.data_dictionary["prediction_features"] - - w1 = WindowGenerator( - input_width=self.CONV_WIDTH, - label_width=1, - shift=1, - test_df=full_df, - batch_size=len(full_df), - ) - - predictions = self.model.predict(w1.inference) - len_diff = len(dk.do_predict) - len(predictions) - if len_diff > 0: - dk.do_predict = dk.do_predict[len_diff:] - - else: - data = dk.data_dictionary["prediction_features"] - data = tf.expand_dims(data, axis=0) - predictions = self.model(data, training=False) - - predictions = predictions[:, 0] - pred_df = DataFrame(predictions, columns=dk.label_list) - - pred_df = dk.denormalize_labels_from_metadata(pred_df) - - return (pred_df, np.ones(len(pred_df))) - - - def set_initial_historic_predictions( - self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str - ) -> None: - - pass - # w1 = WindowGenerator( - # input_width=self.CONV_WIDTH, label_width=1, shift=1, test_df=df, batch_size=len(df) - # ) - - # trained_predictions = model.predict(w1.inference) - # #trained_predictions = trained_predictions[:, 0, 0] - # trained_predictions = trained_predictions[:, 0] - - # n_lost_points = len(df) - len(trained_predictions) - # pred_df = DataFrame(trained_predictions, columns=dk.label_list) - # zeros_df = DataFrame(np.zeros((n_lost_points, len(dk.label_list))), columns=dk.label_list) - # pred_df = pd.concat([zeros_df, pred_df], axis=0) - - # pred_df = dk.denormalize_labels_from_metadata(pred_df) - - - - # self.dd.historic_predictions[pair] = DataFrame() - # self.dd.historic_predictions[pair] = copy.deepcopy(pred_df) - - -class WindowGenerator: - def __init__( - self, - input_width, - label_width, - shift, - train_df=None, - val_df=None, - test_df=None, - train_labels=None, - val_labels=None, - test_labels=None, - batch_size=None, - ): - # Store the raw data. - self.train_df = train_df - self.val_df = val_df - self.test_df = test_df - self.train_labels = train_labels - self.val_labels = val_labels - self.test_labels = test_labels - self.batch_size = batch_size - self.input_width = input_width - self.label_width = label_width - self.shift = shift - - self.total_window_size = input_width + shift - - self.input_slice = slice(0, input_width) - self.input_indices = np.arange(self.total_window_size)[self.input_slice] - - def make_dataset(self, data, labels=None): - data = np.array(data, dtype=np.float32) - if labels is not None: - labels = np.array(labels, dtype=np.float32) - ds = tf.keras.preprocessing.timeseries_dataset_from_array( - data=data, - targets=labels, - sequence_length=self.total_window_size, - sequence_stride=1, - sampling_rate=1, - shuffle=False, - batch_size=self.batch_size, - ) - - return ds - - @property - def train(self): - - - - return self.make_dataset(self.train_df, self.train_labels) - - @property - def val(self): - return self.make_dataset(self.val_df, self.val_labels) - - @property - def test(self): - return self.make_dataset(self.test_df, self.test_labels) - - @property - def inference(self): - return self.make_dataset(self.test_df) - - @property - def example(self): - """Get and cache an example batch of `inputs, labels` for plotting.""" - result = getattr(self, "_example", None) - if result is None: - # No example batch was found, so get one from the `.train` dataset - result = next(iter(self.train)) - # And cache it for next time - self._example = result - return result \ No newline at end of file diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearning.py b/freqtrade/freqai/prediction_models/ReinforcementLearning.py deleted file mode 100644 index 6ced4749e..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearning.py +++ /dev/null @@ -1,273 +0,0 @@ -import logging -from typing import Any, Dict, Tuple - -import numpy as np -import numpy.typing as npt -import pandas as pd -import torch as th -from pandas import DataFrame -from stable_baselines3 import PPO -from stable_baselines3.common.buffers import ReplayBuffer -from stable_baselines3.common.callbacks import EvalCallback -from stable_baselines3.common.monitor import Monitor -from stable_baselines3.common.vec_env import SubprocVecEnv - -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.freqai_interface import IFreqaiModel -from freqtrade.freqai.prediction_models.RL.RLPrediction_agent_TDQN import TDQN -from freqtrade.freqai.prediction_models.RL.RLPrediction_env_TDQN_5ac import DEnv -#from freqtrade.freqai.prediction_models.RL.RLPrediction_env_TDQN_3ac import DEnv -from freqtrade.persistence import Trade - -logger = logging.getLogger(__name__) - -class ReinforcementLearning(IFreqaiModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen - ) -> Any: - """ - Filter the training data and train a model to it. Train makes heavy use of the datakitchen - for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period - :param metadata: pair metadata from strategy. - :returns: - :model: Trained model which can be used to inference (self.predict) - """ - - logger.info("--------------------Starting training " f"{pair} --------------------") - - # filter the features requested by user in the configuration file and elegantly handle NaNs - features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, - dk.training_features_list, - dk.label_list, - training_filter=True, - ) - - data_dictionary: Dict[str, Any] = dk.make_train_test_datasets( - features_filtered, labels_filtered) - dk.fit_labels() # useless for now, but just satiating append methods - - # normalize all data based on train_dataset only - data_dictionary = dk.normalize_data(data_dictionary) - - # optional additional data cleaning/analysis - self.data_cleaning_train(dk) - - logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" - ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - - model = self.fit(data_dictionary, pair) - - if pair not in self.dd.historic_predictions: - self.set_initial_historic_predictions( - data_dictionary['train_features'], model, dk, pair) - - self.dd.save_historic_predictions_to_disk() - - logger.info(f"--------------------done training {pair}--------------------") - - return model - - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): - - # train_df = data_dictionary["train_features"] - # # train_labels = data_dictionary["train_labels"] - # test_df = data_dictionary["test_features"] - # # test_labels = data_dictionary["test_labels"] - # # sep = '/' - # # coin = pair.split(sep, 1)[0] - # # price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"] - # # price.reset_index(inplace=True, drop=True) - # # price = price.to_frame() - # price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index)) - # price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(test_df.index)) - # #train_env = GymAnytrading(train_df, price, self.CONV_WIDTH) - # agent_params = self.freqai_info['model_training_parameters'] - # reward_params = self.freqai_info['model_reward_parameters'] - # train_env = DEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, reward_kwargs=reward_params) - # #eval_env = DEnv(df=test_df, prices=price_test, window_size=self.CONV_WIDTH, reward_kwargs=reward_params) - # #env_instance = SubprocVecEnv([DEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, reward_kwargs=reward_params)]) - # #train_env.reset() - # #eval_env.reset() - # # model - # #policy_kwargs = dict(net_arch=[512, 512, 512]) - # policy_kwargs = dict(activation_fn=th.nn.Tanh, - # net_arch=[256, 256, 256]) - # agent = RLPrediction_agent(train_env) - # #eval_agent = RLPrediction_agent(eval_env) - - # # PPO - # model_name = 'ppo' - # model = agent.get_model(model_name, model_kwargs=agent_params, policy_kwargs=policy_kwargs) - # trained_model = agent.train_model(model=model, - # tb_log_name=model_name, - # model_kwargs=agent_params, - # train_df=train_df, - # test_df=test_df, - # price=price, - # price_test=price_test, - # window_size=self.CONV_WIDTH) - # # best_model = eval_agent.train_model(model=model, - # # tb_log_name=model_name, - # # model_kwargs=agent_params, - # # eval=eval_env) - # # TDQN - # # model_name = 'TDQN' - # # model = TDQN('TMultiInputPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log='./tensorboard_log/', - # # learning_rate=agent_params["learning_rate"], gamma=0.9, - # # target_update_interval=5000, buffer_size=50000, - # # exploration_initial_eps=1, exploration_final_eps=0.1, - # # replay_buffer_class=ReplayBuffer - # # ) - # # trained_model = agent.train_model(model=model, - # # tb_log_name=model_name, - # # model_kwargs=agent_params) - # #model.learn( - # # total_timesteps=5000, - # # callback=callback - # # ) - - agent_params = self.freqai_info['model_training_parameters'] - reward_params = self.freqai_info['model_reward_parameters'] - train_df = data_dictionary["train_features"] - test_df = data_dictionary["test_features"] - eval_freq = agent_params["eval_cycles"] * len(test_df) - total_timesteps = agent_params["train_cycles"] * len(train_df) - - # price data for model training and evaluation - price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index)) - price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(test_df.index)) - - # environments - train_env = DEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, reward_kwargs=reward_params) - eval = DEnv(df=test_df, prices=price_test, window_size=self.CONV_WIDTH, reward_kwargs=reward_params) - eval_env = Monitor(eval, ".") - eval_env.reset() - - # this should be in config - TODO - agent_type = 'tdqn' - - path = self.dk.data_path - eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", - log_path=f"{path}/{agent_type}/logs/", eval_freq=int(eval_freq), - deterministic=True, render=False) - - # model arch - policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[256, 256, 128]) - - if agent_type == 'tdqn': - model = TDQN('TMultiInputPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/{agent_type}/tensorboard/", - learning_rate=0.00025, gamma=0.9, - target_update_interval=5000, buffer_size=50000, - exploration_initial_eps=1, exploration_final_eps=0.1, - replay_buffer_class=ReplayBuffer - ) - elif agent_type == 'ppo': - model = PPO('MultiInputPolicy', train_env, policy_kwargs=policy_kwargs, tensorboard_log=f"{path}/{agent_type}/tensorboard/", - learning_rate=0.00025, gamma=0.9 - ) - - model.learn( - total_timesteps=int(total_timesteps), - callback=eval_callback - ) - - print('Training finished!') - - return model - - - - def get_state_info(self, pair): - open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True)) - market_side = 0.5 - current_profit = 0 - for trade in open_trades: - if trade.pair == pair: - current_value = trade.open_trade_value - openrate = trade.open_rate - if 'long' in trade.enter_tag: - market_side = 1 - else: - market_side = 0 - current_profit = current_value / openrate -1 - - total_profit = 0 - closed_trades = Trade.get_trades(trade_filter=[Trade.is_open.is_(False), Trade.pair == pair]) - for trade in closed_trades: - total_profit += trade.close_profit - - return market_side, current_profit, total_profit - - - def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False - ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: - """ - Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. - :return: - :pred_df: dataframe containing the predictions - :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove - data (NaNs) or felt uncertain about data (PCA and DI index) - """ - - dk.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False - ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe - - # optional additional data cleaning/analysis - self.data_cleaning_predict(dk, filtered_dataframe) - - pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model) - pred_df.fillna(0, inplace=True) - - return (pred_df, dk.do_predict) - - def rl_model_predict(self, dataframe: DataFrame, - dk: FreqaiDataKitchen, model: Any) -> DataFrame: - - output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list) - - def _predict(window): - observations = dataframe.iloc[window.index] - res, _ = model.predict(observations, deterministic=True) - return res - - output = output.rolling(window=self.CONV_WIDTH).apply(_predict) - - return output - - def set_initial_historic_predictions( - self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str - ) -> None: - - pred_df = self.rl_model_predict(df, dk, model) - pred_df.fillna(0, inplace=True) - self.dd.historic_predictions[pair] = pred_df - hist_preds_df = self.dd.historic_predictions[pair] - - for label in hist_preds_df.columns: - if hist_preds_df[label].dtype == object: - continue - hist_preds_df[f'{label}_mean'] = 0 - hist_preds_df[f'{label}_std'] = 0 - - hist_preds_df['do_predict'] = 0 - - if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: - hist_preds_df['DI_values'] = 0 - - for return_str in dk.data['extra_returns_per_train']: - hist_preds_df[return_str] = 0 diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py new file mode 100644 index 000000000..2fa87c432 --- /dev/null +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py @@ -0,0 +1,155 @@ +import logging +from typing import Any, Dict # , Tuple + +import numpy as np +# import numpy.typing as npt +# import pandas as pd +import torch as th +# from pandas import DataFrame +from stable_baselines3 import PPO +from stable_baselines3.common.callbacks import EvalCallback +from stable_baselines3.common.monitor import Monitor +# from stable_baselines3.common.vec_env import SubprocVecEnv +from freqtrade.freqai.RL.BaseRLEnv import BaseRLEnv, Actions, Positions +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel + + +logger = logging.getLogger(__name__) + + +class ReinforcementLearningPPO(BaseReinforcementLearningModel): + """ + User created Reinforcement Learning Model prediction model. + """ + + def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + + agent_params = self.freqai_info['model_training_parameters'] + reward_params = self.freqai_info['model_reward_parameters'] + train_df = data_dictionary["train_features"] + test_df = data_dictionary["test_features"] + eval_freq = agent_params["eval_cycles"] * len(test_df) + total_timesteps = agent_params["train_cycles"] * len(train_df) + + # price data for model training and evaluation + price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index)) + price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail( + len(test_df.index)) + + # environments + train_env = MyRLEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, + reward_kwargs=reward_params) + eval = MyRLEnv(df=test_df, prices=price_test, + window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + eval_env = Monitor(eval, ".") + eval_env.reset() + + path = self.dk.data_path + eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", + log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), + deterministic=True, render=False) + + # model arch + policy_kwargs = dict(activation_fn=th.nn.ReLU, + net_arch=[256, 256, 128]) + + model = PPO('MultiInputPolicy', train_env, policy_kwargs=policy_kwargs, + tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, gamma=0.9 + ) + + model.learn( + total_timesteps=int(total_timesteps), + callback=eval_callback + ) + + print('Training finished!') + + return model + + +class MyRLEnv(BaseRLEnv): + """ + User can override any function in BaseRLEnv and gym.Env + """ + + def step(self, action): + self._done = False + self._current_tick += 1 + + if self._current_tick == self._end_tick: + self._done = True + + self.update_portfolio_log_returns(action) + + self._update_profit(action) + step_reward = self._calculate_reward(action) + self.total_reward += step_reward + + trade_type = None + if self.is_tradesignal(action): + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + if action == Actions.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions.Long.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions.Short.value: + self._position = Positions.Short + trade_type = "short" + else: + print("case not defined") + + # Update last trade tick + self._last_trade_tick = self._current_tick + + if trade_type is not None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, + 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + + self._position_history.append(self._position) + observation = self._get_observation() + info = dict( + tick=self._current_tick, + total_reward=self.total_reward, + total_profit=self._total_profit, + position=self._position.value + ) + self._update_history(info) + + return observation, step_reward, self._done, info + + def calculate_reward(self, action): + + if self._last_trade_tick is None: + return 0. + + # close long + if (action == Actions.Short.value or + action == Actions.Neutral.value) and self._position == Positions.Long: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + + # close short + if (action == Actions.Long.value or + action == Actions.Neutral.value) and self._position == Positions.Short: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + + return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py new file mode 100644 index 000000000..a022a10ba --- /dev/null +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py @@ -0,0 +1,168 @@ +import logging +from typing import Any, Dict, Optional + +import numpy as np +import torch as th +from stable_baselines3.common.callbacks import EvalCallback +from stable_baselines3.common.monitor import Monitor +# from stable_baselines3.common.vec_env import SubprocVecEnv +from freqtrade.freqai.RL.BaseRLEnv import BaseRLEnv, Actions, Positions +from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel +from freqtrade.freqai.RL.TDQNagent import TDQN +from stable_baselines3.common.buffers import ReplayBuffer + + +logger = logging.getLogger(__name__) + + +class ReinforcementLearningPPO(BaseReinforcementLearningModel): + """ + User created Reinforcement Learning Model prediction model. + """ + + def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + + agent_params = self.freqai_info['model_training_parameters'] + reward_params = self.freqai_info['model_reward_parameters'] + train_df = data_dictionary["train_features"] + test_df = data_dictionary["test_features"] + eval_freq = agent_params["eval_cycles"] * len(test_df) + total_timesteps = agent_params["train_cycles"] * len(train_df) + + # price data for model training and evaluation + price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index)) + price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail( + len(test_df.index)) + + # environments + train_env = MyRLEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, + reward_kwargs=reward_params) + eval = MyRLEnv(df=test_df, prices=price_test, + window_size=self.CONV_WIDTH, reward_kwargs=reward_params) + eval_env = Monitor(eval, ".") + eval_env.reset() + + path = self.dk.data_path + eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", + log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), + deterministic=True, render=False) + + # model arch + policy_kwargs = dict(activation_fn=th.nn.ReLU, + net_arch=[256, 256, 128]) + + model = TDQN('TMultiInputPolicy', train_env, + policy_kwargs=policy_kwargs, + tensorboard_log=f"{path}/tdqn/tensorboard/", + learning_rate=0.00025, gamma=0.9, + target_update_interval=5000, buffer_size=50000, + exploration_initial_eps=1, exploration_final_eps=0.1, + replay_buffer_class=Optional(ReplayBuffer) + ) + + model.learn( + total_timesteps=int(total_timesteps), + callback=eval_callback + ) + + print('Training finished!') + + return model + + +class MyRLEnv(BaseRLEnv): + """ + User can override any function in BaseRLEnv and gym.Env + """ + + def step(self, action): + self._done = False + self._current_tick += 1 + + if self._current_tick == self._end_tick: + self._done = True + + self.update_portfolio_log_returns(action) + + self._update_profit(action) + step_reward = self._calculate_reward(action) + self.total_reward += step_reward + + trade_type = None + if self.is_tradesignal(action): + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + if action == Actions.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions.Long.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions.Short.value: + self._position = Positions.Short + trade_type = "short" + else: + print("case not defined") + + # Update last trade tick + self._last_trade_tick = self._current_tick + + if trade_type is not None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, + 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + + self._position_history.append(self._position) + observation = self._get_observation() + info = dict( + tick=self._current_tick, + total_reward=self.total_reward, + total_profit=self._total_profit, + position=self._position.value + ) + self._update_history(info) + + return observation, step_reward, self._done, info + + def calculate_reward(self, action): + + if self._last_trade_tick is None: + return 0. + + # close long + if action == Actions.Long_sell.value and self._position == Positions.Long: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(current_price) - np.log(last_trade_price)) + + if action == Actions.Long_sell.value and self._position == Positions.Long: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(current_price) - np.log(last_trade_price)) * 2) + + # close short + if action == Actions.Short_buy.value and self._position == Positions.Short: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float(np.log(last_trade_price) - np.log(current_price)) + + if action == Actions.Short_buy.value and self._position == Positions.Short: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + return float((np.log(last_trade_price) - np.log(current_price)) * 2) + + return 0. diff --git a/freqtrade/freqai/rl/BaseRLAgent.py b/freqtrade/freqai/rl/BaseRLAgent.py deleted file mode 100644 index e69de29bb..000000000