diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 5282b4f59..f9d56c4b4 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -473,7 +473,9 @@ class FreqaiDataDrawer: model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") elif model_type == 'stable_baselines': from stable_baselines3.ppo.ppo import PPO - model = PPO.load(dk.data_path / f"{dk.model_filename}_model.zip") + from stable_baselines3 import DQN + #model = PPO.load(dk.data_path / f"{dk.model_filename}_model.zip") + model = DQN.load(dk.data_path / f"best_model.zip") if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent_v2.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_agent_v2.py new file mode 100644 index 000000000..e6a931e43 --- /dev/null +++ b/freqtrade/freqai/prediction_models/RL/RLPrediction_agent_v2.py @@ -0,0 +1,225 @@ +import torch as th +from torch import nn +from typing import Dict, List, Tuple, Type, Optional, Any, Union +import gym +from stable_baselines3.common.type_aliases import GymEnv, Schedule +from stable_baselines3.common.torch_layers import ( + BaseFeaturesExtractor, + FlattenExtractor, + CombinedExtractor +) +from stable_baselines3.common.buffers import ReplayBuffer +from stable_baselines3 import DQN + + +from stable_baselines3.common.policies import BasePolicy +#from stable_baselines3.common.policies import register_policy +from stable_baselines3.dqn.policies import ( + QNetwork, DQNPolicy, MultiInputPolicy, + CnnPolicy, DQNPolicy, MlpPolicy) +import torch + + +def create_mlp_( + input_dim: int, + output_dim: int, + net_arch: List[int], + activation_fn: Type[nn.Module] = nn.ReLU, + squash_output: bool = False, +) -> List[nn.Module]: + dropout = 0.2 + if len(net_arch) > 0: + number_of_neural = net_arch[0] + + modules = [ + nn.Linear(input_dim, number_of_neural), + nn.BatchNorm1d(number_of_neural), + nn.LeakyReLU(), + nn.Dropout(dropout), + nn.Linear(number_of_neural, number_of_neural), + nn.BatchNorm1d(number_of_neural), + nn.LeakyReLU(), + nn.Dropout(dropout), + nn.Linear(number_of_neural, number_of_neural), + nn.BatchNorm1d(number_of_neural), + nn.LeakyReLU(), + nn.Dropout(dropout), + nn.Linear(number_of_neural, number_of_neural), + nn.BatchNorm1d(number_of_neural), + nn.LeakyReLU(), + nn.Dropout(dropout), + nn.Linear(number_of_neural, output_dim) + ] + return modules + +class TDQNetwork(QNetwork): + def __init__(self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + features_extractor: nn.Module, + features_dim: int, + net_arch: Optional[List[int]] = None, + activation_fn: Type[nn.Module] = nn.ReLU, + normalize_images: bool = True + ): + super().__init__( + observation_space=observation_space, + action_space=action_space, + features_extractor=features_extractor, + features_dim=features_dim, + net_arch=net_arch, + activation_fn=activation_fn, + normalize_images=normalize_images + ) + action_dim = self.action_space.n + q_net = create_mlp_(self.features_dim, action_dim, self.net_arch, self.activation_fn) + self.q_net = nn.Sequential(*q_net).apply(self.init_weights) + + def init_weights(self, m): + if type(m) == nn.Linear: + torch.nn.init.kaiming_uniform_(m.weight) + + +class TDQNPolicy(DQNPolicy): + + def __init__( + self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + lr_schedule: Schedule, + net_arch: Optional[List[int]] = None, + activation_fn: Type[nn.Module] = nn.ReLU, + features_extractor_class: Type[BaseFeaturesExtractor] = FlattenExtractor, + features_extractor_kwargs: Optional[Dict[str, Any]] = None, + normalize_images: bool = True, + optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + ): + super().__init__( + observation_space=observation_space, + action_space=action_space, + lr_schedule=lr_schedule, + net_arch=net_arch, + activation_fn=activation_fn, + features_extractor_class=features_extractor_class, + features_extractor_kwargs=features_extractor_kwargs, + normalize_images=normalize_images, + optimizer_class=optimizer_class, + optimizer_kwargs=optimizer_kwargs + ) + + @staticmethod + def init_weights(module: nn.Module, gain: float = 1) -> None: + """ + Orthogonal initialization (used in PPO and A2C) + """ + if isinstance(module, (nn.Linear, nn.Conv2d)): + nn.init.kaiming_uniform_(module.weight) + if module.bias is not None: + module.bias.data.fill_(0.0) + + def make_q_net(self) -> TDQNetwork: + # Make sure we always have separate networks for features extractors etc + net_args = self._update_features_extractor(self.net_args, features_extractor=None) + return TDQNetwork(**net_args).to(self.device) + + +class TMultiInputPolicy(TDQNPolicy): + def __init__( + self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + lr_schedule: Schedule, + net_arch: Optional[List[int]] = None, + activation_fn: Type[nn.Module] = nn.ReLU, + features_extractor_class: Type[BaseFeaturesExtractor] = FlattenExtractor, + features_extractor_kwargs: Optional[Dict[str, Any]] = None, + normalize_images: bool = True, + optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + ): + super().__init__( + observation_space, + action_space, + lr_schedule, + net_arch, + activation_fn, + features_extractor_class, + features_extractor_kwargs, + normalize_images, + optimizer_class, + optimizer_kwargs, + ) + + +class TDQN(DQN): + + policy_aliases: Dict[str, Type[BasePolicy]] = { + "MlpPolicy": MlpPolicy, + "CnnPolicy": CnnPolicy, + "TMultiInputPolicy": TMultiInputPolicy, + } + + def __init__( + self, + policy: Union[str, Type[TDQNPolicy]], + env: Union[GymEnv, str], + learning_rate: Union[float, Schedule] = 1e-4, + buffer_size: int = 1000000, # 1e6 + learning_starts: int = 50000, + batch_size: int = 32, + tau: float = 1.0, + gamma: float = 0.99, + train_freq: Union[int, Tuple[int, str]] = 4, + gradient_steps: int = 1, + replay_buffer_class: Optional[ReplayBuffer] = None, + replay_buffer_kwargs: Optional[Dict[str, Any]] = None, + optimize_memory_usage: bool = False, + target_update_interval: int = 10000, + exploration_fraction: float = 0.1, + exploration_initial_eps: float = 1.0, + exploration_final_eps: float = 0.05, + max_grad_norm: float = 10, + tensorboard_log: Optional[str] = None, + create_eval_env: bool = False, + policy_kwargs: Optional[Dict[str, Any]] = None, + verbose: int = 1, + seed: Optional[int] = None, + device: Union[th.device, str] = "auto", + _init_setup_model: bool = True, + ): + + super().__init__( + policy=policy, + env=env, + learning_rate=learning_rate, + buffer_size=buffer_size, + learning_starts=learning_starts, + batch_size=batch_size, + tau=tau, + gamma=gamma, + train_freq=train_freq, + gradient_steps=gradient_steps, + replay_buffer_class=replay_buffer_class, # No action noise + replay_buffer_kwargs=replay_buffer_kwargs, + optimize_memory_usage=optimize_memory_usage, + target_update_interval=target_update_interval, + exploration_fraction=exploration_fraction, + exploration_initial_eps=exploration_initial_eps, + exploration_final_eps=exploration_final_eps, + max_grad_norm=max_grad_norm, + tensorboard_log=tensorboard_log, + create_eval_env=create_eval_env, + policy_kwargs=policy_kwargs, + verbose=verbose, + seed=seed, + device=device, + _init_setup_model=_init_setup_model + ) + + + +# try: +# register_policy("TMultiInputPolicy", TMultiInputPolicy) +# except: +# print("already registered") \ No newline at end of file diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_env_v2.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_env_v2.py new file mode 100644 index 000000000..ac91cd200 --- /dev/null +++ b/freqtrade/freqai/prediction_models/RL/RLPrediction_env_v2.py @@ -0,0 +1,645 @@ +import gym +from gym import spaces +from gym.utils import seeding +from enum import Enum +from sklearn.decomposition import PCA, KernelPCA +import random +import numpy as np +import pandas as pd +from collections import deque +import matplotlib.pylab as plt +from typing import Dict, List, Tuple, Type, Optional, Any, Union, Callable +import logging + +logger = logging.getLogger(__name__) + +# from bokeh.io import output_notebook +# from bokeh.plotting import figure, show +# from bokeh.models import ( +# CustomJS, +# ColumnDataSource, +# NumeralTickFormatter, +# Span, +# HoverTool, +# Range1d, +# DatetimeTickFormatter, +# Scatter, +# Label, LabelSet +# ) + +class Actions(Enum): + Short = 0 + Long = 1 + Neutral = 2 + +class Actions_v2(Enum): + Neutral = 0 + Long_buy = 1 + Long_sell = 2 + Short_buy = 3 + Short_sell = 4 + + +class Positions(Enum): + Short = 0 + Long = 1 + Neutral = 0.5 + + def opposite(self): + return Positions.Short if self == Positions.Long else Positions.Long + +def mean_over_std(x): + std = np.std(x, ddof=1) + mean = np.mean(x) + return mean / std if std > 0 else 0 + +class DEnv(gym.Env): + + metadata = {'render.modes': ['human']} + + def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): + assert df.ndim == 2 + + self.seed() + self.df = df + self.signal_features = self.df + self.prices = prices + self.window_size = window_size + self.starting_point = starting_point + self.rr = reward_kwargs["rr"] + self.profit_aim = reward_kwargs["profit_aim"] + + self.fee=0.0015 + + # # spaces + self.shape = (window_size, self.signal_features.shape[1]) + self.action_space = spaces.Discrete(len(Actions_v2)) + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) + + # episode + self._start_tick = self.window_size + self._end_tick = len(self.prices) - 1 + self._done = None + self._current_tick = None + self._last_trade_tick = None + self._position = Positions.Neutral + self._position_history = None + self.total_reward = None + self._total_profit = None + self._first_rendering = None + self.history = None + self.trade_history = [] + + # self.A_t, self.B_t = 0.000639, 0.00001954 + self.r_t_change = 0. + + self.returns_report = [] + + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + + def reset(self): + + self._done = False + + if self.starting_point == True: + self._position_history = (self._start_tick* [None]) + [self._position] + else: + self._position_history = (self.window_size * [None]) + [self._position] + + self._current_tick = self._start_tick + self._last_trade_tick = None + #self._last_trade_tick = self._current_tick - 1 + self._position = Positions.Neutral + + self.total_reward = 0. + self._total_profit = 1. # unit + self._first_rendering = True + self.history = {} + self.trade_history = [] + self.portfolio_log_returns = np.zeros(len(self.prices)) + + + self._profits = [(self._start_tick, 1)] + self.close_trade_profit = [] + self.r_t_change = 0. + + self.returns_report = [] + + return self._get_observation() + + + def step(self, action): + self._done = False + self._current_tick += 1 + + if self._current_tick == self._end_tick: + self._done = True + + self.update_portfolio_log_returns(action) + + self._update_profit(action) + step_reward = self._calculate_reward(action) + self.total_reward += step_reward + + + + + + trade_type = None + if self.is_tradesignal_v2(action): # exclude 3 case not trade + # Update position + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + + temp_position = self._position + if action == Actions_v2.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions_v2.Long_buy.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions_v2.Short_buy.value: + self._position = Positions.Short + trade_type = "short" + elif action == Actions_v2.Long_sell.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions_v2.Short_sell.value: + self._position = Positions.Neutral + trade_type = "neutral" + else: + print("case not defined") + + # Update last trade tick + self._last_trade_tick = self._current_tick + + if trade_type != None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + + self._position_history.append(self._position) + observation = self._get_observation() + info = dict( + tick = self._current_tick, + total_reward = self.total_reward, + total_profit = self._total_profit, + position = self._position.value + ) + self._update_history(info) + + return observation, step_reward, self._done, info + + + def processState(self, state): + return state.to_numpy() + + def convert_mlp_Policy(self, obs_): + pass + + def _get_observation(self): + return self.signal_features[(self._current_tick - self.window_size):self._current_tick] + + + def get_unrealized_profit(self): + + if self._last_trade_tick == None: + return 0. + + if self._position == Positions.Neutral: + return 0. + elif self._position == Positions.Short: + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + return (last_trade_price - current_price)/last_trade_price + elif self._position == Positions.Long: + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + return (current_price - last_trade_price)/last_trade_price + else: + return 0. + + + def is_tradesignal(self, action): + # trade signal + """ + not trade signal is : + Action: Neutral, position: Neutral -> Nothing + Action: Long, position: Long -> Hold Long + Action: Short, position: Short -> Hold Short + """ + return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) + or (action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def is_tradesignal_v2(self, action): + # trade signal + """ + not trade signal is : + Action: Neutral, position: Neutral -> Nothing + Action: Long, position: Long -> Hold Long + Action: Short, position: Short -> Hold Short + """ + return not ((action == Actions_v2.Neutral.value and self._position == Positions.Neutral) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Short_sell.value and self._position == Positions.Short) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Short_sell.value and self._position == Positions.Long) or + + (action == Actions_v2.Long_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Long_sell.value and self._position == Positions.Long) or + (action == Actions_v2.Long_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Long_sell.value and self._position == Positions.Short)) + + + + def _is_trade(self, action: Actions): + return ((action == Actions.Long.value and self._position == Positions.Short) or + (action == Actions.Short.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Short) + ) + + def _is_trade_v2(self, action: Actions_v2): + return ((action == Actions_v2.Long_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Neutral.value and self._position == Positions.Long) or + (action == Actions_v2.Neutral.value and self._position == Positions.Short) or + + (action == Actions_v2.Neutral.Short_sell and self._position == Positions.Long) or + (action == Actions_v2.Neutral.Long_sell and self._position == Positions.Short) + ) + + + def is_hold(self, action): + return ((action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def is_hold_v2(self, action): + return ((action == Actions_v2.Short_buy.value and self._position == Positions.Short) + or (action == Actions_v2.Long_buy.value and self._position == Positions.Long)) + + + def add_buy_fee(self, price): + return price * (1 + self.fee) + + def add_sell_fee(self, price): + return price / (1 + self.fee) + + def _update_history(self, info): + if not self.history: + self.history = {key: [] for key in info.keys()} + + for key, value in info.items(): + self.history[key].append(value) + + + def render(self, mode='human'): + + def _plot_position(position, tick): + color = None + if position == Positions.Short: + color = 'red' + elif position == Positions.Long: + color = 'green' + if color: + plt.scatter(tick, self.prices.loc[tick].open, color=color) + + if self._first_rendering: + self._first_rendering = False + plt.cla() + plt.plot(self.prices) + start_position = self._position_history[self._start_tick] + _plot_position(start_position, self._start_tick) + + plt.cla() + plt.plot(self.prices) + _plot_position(self._position, self._current_tick) + + plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit) + plt.pause(0.01) + + + def render_all(self): + plt.figure() + window_ticks = np.arange(len(self._position_history)) + plt.plot(self.prices['open'], alpha=0.5) + + short_ticks = [] + long_ticks = [] + neutral_ticks = [] + for i, tick in enumerate(window_ticks): + if self._position_history[i] == Positions.Short: + short_ticks.append(tick - 1) + elif self._position_history[i] == Positions.Long: + long_ticks.append(tick - 1) + elif self._position_history[i] == Positions.Neutral: + neutral_ticks.append(tick - 1) + + plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open, + 'o', color='grey', ms=3, alpha=0.1) + plt.plot(short_ticks, self.prices.loc[short_ticks].open, + 'o', color='r', ms=3, alpha=0.8) + plt.plot(long_ticks, self.prices.loc[long_ticks].open, + 'o', color='g', ms=3, alpha=0.8) + + plt.suptitle("Generalising") + fig = plt.gcf() + fig.set_size_inches(15, 10) + + + + + def close_trade_report(self): + small_trade = 0 + positive_big_trade = 0 + negative_big_trade = 0 + small_profit = 0.003 + for i in self.close_trade_profit: + if i < small_profit and i > -small_profit: + small_trade+=1 + elif i > small_profit: + positive_big_trade += 1 + elif i < -small_profit: + negative_big_trade += 1 + print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}") + + + def report(self): + + # get total trade + long_trade = 0 + short_trade = 0 + neutral_trade = 0 + for trade in self.trade_history: + if trade['type'] == 'long': + long_trade += 1 + + elif trade['type'] == 'short': + short_trade += 1 + else: + neutral_trade += 1 + + negative_trade = 0 + positive_trade = 0 + for tr in self.close_trade_profit: + if tr < 0.: + negative_trade += 1 + + if tr > 0.: + positive_trade += 1 + + total_trade_lr = negative_trade+positive_trade + + + total_trade = long_trade + short_trade + sharp_ratio = self.sharpe_ratio() + sharp_log = self.get_sharpe_ratio() + + from tabulate import tabulate + + headers = ["Performance", ""] + performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)], + ["Total reward", "{0:.3f}".format(self.total_reward)], + ["Start profit(unit)", "{0:.2f}".format(1.)], + ["End profit(unit)", "{0:.3f}".format(self._total_profit)], + ["Sharp ratio", "{0:.3f}".format(sharp_ratio)], + ["Sharp log", "{0:.3f}".format(sharp_log)], + # ["Sortino ratio", "{0:.2f}".format(0) + '%'], + ["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%'] + ] + tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center") + print(tabulation) + + result = { + "Start": "{0:.2f}".format(1.), + "End": "{0:.2f}".format(self._total_profit), + "Sharp": "{0:.3f}".format(sharp_ratio), + "Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr) + } + return result + + def close(self): + plt.close() + + def get_sharpe_ratio(self): + return mean_over_std(self.get_portfolio_log_returns()) + + + def save_rendering(self, filepath): + plt.savefig(filepath) + + + def pause_rendering(self): + plt.show() + + + def _calculate_reward(self, action): + # rw = self.transaction_profit_reward(action) + #rw = self.reward_rr_profit_config(action) + rw = self.reward_rr_profit_config_v2(action) + return rw + + + def _update_profit(self, action): + #if self._is_trade(action) or self._done: + if self._is_trade_v2(action) or self._done: + pnl = self.get_unrealized_profit() + + if self._position == Positions.Long: + self._total_profit = self._total_profit + self._total_profit*pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) + + if self._position == Positions.Short: + self._total_profit = self._total_profit + self._total_profit*pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) + + + def most_recent_return(self, action): + """ + We support Long, Neutral and Short positions. + Return is generated from rising prices in Long + and falling prices in Short positions. + The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. + """ + # Long positions + if self._position == Positions.Long: + current_price = self.prices.iloc[self._current_tick].open + #if action == Actions.Short.value or action == Actions.Neutral.value: + if action == Actions_v2.Short_buy.value or action == Actions_v2.Neutral.value: + current_price = self.add_sell_fee(current_price) + + previous_price = self.prices.iloc[self._current_tick - 1].open + + if (self._position_history[self._current_tick - 1] == Positions.Short + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_buy_fee(previous_price) + + return np.log(current_price) - np.log(previous_price) + + # Short positions + if self._position == Positions.Short: + current_price = self.prices.iloc[self._current_tick].open + #if action == Actions.Long.value or action == Actions.Neutral.value: + if action == Actions_v2.Long_buy.value or action == Actions_v2.Neutral.value: + current_price = self.add_buy_fee(current_price) + + previous_price = self.prices.iloc[self._current_tick - 1].open + if (self._position_history[self._current_tick - 1] == Positions.Long + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_sell_fee(previous_price) + + return np.log(previous_price) - np.log(current_price) + + return 0 + + def get_portfolio_log_returns(self): + return self.portfolio_log_returns[1:self._current_tick + 1] + + + def get_trading_log_return(self): + return self.portfolio_log_returns[self._start_tick:] + + def update_portfolio_log_returns(self, action): + self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) + + def current_price(self) -> float: + return self.prices.iloc[self._current_tick].open + + def prev_price(self) -> float: + return self.prices.iloc[self._current_tick-1].open + + + + def sharpe_ratio(self): + if len(self.close_trade_profit) == 0: + return 0. + returns = np.array(self.close_trade_profit) + reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) + return reward + + def get_bnh_log_return(self): + return np.diff(np.log(self.prices['open'][self._start_tick:])) + + + def transaction_profit_reward(self, action): + rw = 0. + + pt = self.prev_price() + pt_1 = self.current_price() + + + if self._position == Positions.Long: + a_t = 1 + elif self._position == Positions.Short: + a_t = -1 + else: + a_t = 0 + + # close long + if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + rw = a_t*(pt_1 - po)/po + #rw = rw*2 + # close short + elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: + pt_1 = self.add_buy_fee(self.current_price()) + po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + rw = a_t*(pt_1 - po)/po + #rw = rw*2 + else: + rw = a_t*(pt_1 - pt)/pt + + return np.clip(rw, 0, 1) + + + + def reward_rr_profit_config_v2(self, action): + rw = 0. + + pt_1 = self.current_price() + + + if len(self.close_trade_profit) > 0: + # long + if self._position == Positions.Long: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + if action == Actions_v2.Short_buy.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 2 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr: + rw = 10 * 1 * 1 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Long_sell.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 5 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr: + rw = 10 * 1 * 3 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Neutral.value: + if self.close_trade_profit[-1] > 0: + rw = 2 + elif self.close_trade_profit[-1] < 0: + rw = 2 * -1 + + # short + if self._position == Positions.Short: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + if action == Actions_v2.Long_buy.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 2 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 1 * 1 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Short_sell.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 5 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 1 * 3 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Neutral.value: + if self.close_trade_profit[-1] > 0: + rw = 2 + elif self.close_trade_profit[-1] < 0: + rw = 2 * -1 + + return np.clip(rw, 0, 1) \ No newline at end of file diff --git a/freqtrade/freqai/prediction_models/RLPredictionModel.py b/freqtrade/freqai/prediction_models/RLPredictionModel.py new file mode 100644 index 000000000..b6903dd43 --- /dev/null +++ b/freqtrade/freqai/prediction_models/RLPredictionModel.py @@ -0,0 +1,253 @@ +import logging +from typing import Any, Dict, Tuple +#from matplotlib.colors import DivergingNorm + +from pandas import DataFrame +import pandas as pd +from freqtrade.exceptions import OperationalException +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +import tensorflow as tf +from freqtrade.freqai.prediction_models.BaseTensorFlowModel import BaseTensorFlowModel +from freqtrade.freqai.freqai_interface import IFreqaiModel +from tensorflow.keras.layers import Input, Conv1D, Dense, MaxPooling1D, Flatten, Dropout +from tensorflow.keras.models import Model +import numpy as np +import copy + +from keras.layers import * +import random + + +logger = logging.getLogger(__name__) + +# tf.config.run_functions_eagerly(True) +# tf.data.experimental.enable_debug_mode() + +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +MAX_EPOCHS = 10 +LOOKBACK = 8 + + +class RLPredictionModel_v2(IFreqaiModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), fit(). + """ + + def fit(self, data_dictionary: Dict, pair) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + train_df = data_dictionary["train_features"] + train_labels = data_dictionary["train_labels"] + test_df = data_dictionary["test_features"] + test_labels = data_dictionary["test_labels"] + n_labels = len(train_labels.columns) + if n_labels > 1: + raise OperationalException( + "Neural Net not yet configured for multi-targets. Please " + " reduce number of targets to 1 in strategy." + ) + + n_features = len(data_dictionary["train_features"].columns) + BATCH_SIZE = self.freqai_info.get("batch_size", 64) + input_dims = [BATCH_SIZE, self.CONV_WIDTH, n_features] + + + w1 = WindowGenerator( + input_width=self.CONV_WIDTH, + label_width=1, + shift=1, + train_df=train_df, + val_df=test_df, + train_labels=train_labels, + val_labels=test_labels, + batch_size=BATCH_SIZE, + ) + + + # train_agent() + #pair = self.dd.historical_data[pair] + #gym_env = FreqtradeEnv(data=train_df, prices=0.01, windows_size=100, pair=pair, stake_amount=100) + + # sep = '/' + # coin = pair.split(sep, 1)[0] + + # # df1 = train_df.filter(regex='price') + # # df2 = df1.filter(regex='raw') + + # # df3 = df2.filter(regex=f"{coin}") + # # print(df3) + + # price = train_df[f"%-{coin}raw_price_5m"] + # gym_env = RLPrediction_GymAnytrading(signal_features=train_df, prices=price, window_size=100) + # sac = RLPrediction_Agent(gym_env) + + # print(sac) + + # return 0 + + + + return model + + def predict( + self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first=True + ) -> Tuple[DataFrame, DataFrame]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :predictions: np.array of predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( + unfiltered_dataframe, dk.training_features_list, training_filter=False + ) + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe + + # optional additional data cleaning/analysis + self.data_cleaning_predict(dk, filtered_dataframe) + + if first: + full_df = dk.data_dictionary["prediction_features"] + + w1 = WindowGenerator( + input_width=self.CONV_WIDTH, + label_width=1, + shift=1, + test_df=full_df, + batch_size=len(full_df), + ) + + predictions = self.model.predict(w1.inference) + len_diff = len(dk.do_predict) - len(predictions) + if len_diff > 0: + dk.do_predict = dk.do_predict[len_diff:] + + else: + data = dk.data_dictionary["prediction_features"] + data = tf.expand_dims(data, axis=0) + predictions = self.model(data, training=False) + + predictions = predictions[:, 0] + pred_df = DataFrame(predictions, columns=dk.label_list) + + pred_df = dk.denormalize_labels_from_metadata(pred_df) + + return (pred_df, np.ones(len(pred_df))) + + + def set_initial_historic_predictions( + self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str + ) -> None: + + pass + # w1 = WindowGenerator( + # input_width=self.CONV_WIDTH, label_width=1, shift=1, test_df=df, batch_size=len(df) + # ) + + # trained_predictions = model.predict(w1.inference) + # #trained_predictions = trained_predictions[:, 0, 0] + # trained_predictions = trained_predictions[:, 0] + + # n_lost_points = len(df) - len(trained_predictions) + # pred_df = DataFrame(trained_predictions, columns=dk.label_list) + # zeros_df = DataFrame(np.zeros((n_lost_points, len(dk.label_list))), columns=dk.label_list) + # pred_df = pd.concat([zeros_df, pred_df], axis=0) + + # pred_df = dk.denormalize_labels_from_metadata(pred_df) + + + + # self.dd.historic_predictions[pair] = DataFrame() + # self.dd.historic_predictions[pair] = copy.deepcopy(pred_df) + + +class WindowGenerator: + def __init__( + self, + input_width, + label_width, + shift, + train_df=None, + val_df=None, + test_df=None, + train_labels=None, + val_labels=None, + test_labels=None, + batch_size=None, + ): + # Store the raw data. + self.train_df = train_df + self.val_df = val_df + self.test_df = test_df + self.train_labels = train_labels + self.val_labels = val_labels + self.test_labels = test_labels + self.batch_size = batch_size + self.input_width = input_width + self.label_width = label_width + self.shift = shift + + self.total_window_size = input_width + shift + + self.input_slice = slice(0, input_width) + self.input_indices = np.arange(self.total_window_size)[self.input_slice] + + def make_dataset(self, data, labels=None): + data = np.array(data, dtype=np.float32) + if labels is not None: + labels = np.array(labels, dtype=np.float32) + ds = tf.keras.preprocessing.timeseries_dataset_from_array( + data=data, + targets=labels, + sequence_length=self.total_window_size, + sequence_stride=1, + sampling_rate=1, + shuffle=False, + batch_size=self.batch_size, + ) + + return ds + + @property + def train(self): + + + + return self.make_dataset(self.train_df, self.train_labels) + + @property + def val(self): + return self.make_dataset(self.val_df, self.val_labels) + + @property + def test(self): + return self.make_dataset(self.test_df, self.test_labels) + + @property + def inference(self): + return self.make_dataset(self.test_df) + + @property + def example(self): + """Get and cache an example batch of `inputs, labels` for plotting.""" + result = getattr(self, "_example", None) + if result is None: + # No example batch was found, so get one from the `.train` dataset + result = next(iter(self.train)) + # And cache it for next time + self._example = result + return result \ No newline at end of file