From 8eeaab27467fa2e0bdc7314bdb888998bbb20af8 Mon Sep 17 00:00:00 2001 From: MukavaValkku Date: Fri, 12 Aug 2022 20:25:13 +0300 Subject: [PATCH] add reward function --- .../RL/RLPrediction_agent.py | 89 +-- .../prediction_models/RL/RLPrediction_env.py | 615 +++++++++++++++--- .../ReinforcementLearning.py | 72 +- .../ReinforcementLearningModel.py | 157 ----- 4 files changed, 597 insertions(+), 336 deletions(-) delete mode 100644 freqtrade/freqai/prediction_models/ReinforcementLearningModel.py diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py index acea025c0..2e271bd02 100644 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py +++ b/freqtrade/freqai/prediction_models/RL/RLPrediction_agent.py @@ -1,17 +1,15 @@ # common library import numpy as np -from stable_baselines3 import A2C -from stable_baselines3 import DDPG -from stable_baselines3 import PPO -from stable_baselines3 import SAC -from stable_baselines3 import TD3 -from stable_baselines3.common.callbacks import BaseCallback -from stable_baselines3.common.noise import NormalActionNoise -from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise -# from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3 +from stable_baselines3.common.callbacks import BaseCallback, EvalCallback +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise from freqtrade.freqai.prediction_models.RL import config + + +# from stable_baselines3.common.vec_env import DummyVecEnv + # from meta.env_stock_trading.env_stock_trading import StockTradingEnv # RL models from stable-baselines @@ -74,8 +72,10 @@ class RLPrediction_agent: policy="MlpPolicy", policy_kwargs=None, model_kwargs=None, + reward_kwargs=None, + #total_timesteps=None, verbose=1, - seed=None, + seed=None ): if model_name not in MODELS: raise NotImplementedError("NotImplementedError") @@ -95,68 +95,23 @@ class RLPrediction_agent: tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}", verbose=verbose, policy_kwargs=policy_kwargs, - seed=seed, - **model_kwargs, + #model_kwargs=model_kwargs, + #total_timesteps=model_kwargs["total_timesteps"], + seed=seed + #**model_kwargs, ) + + + + return model - def train_model(self, model, tb_log_name, total_timesteps=5000): + def train_model(self, model, tb_log_name, model_kwargs): + model = model.learn( - total_timesteps=total_timesteps, + total_timesteps=model_kwargs["total_timesteps"], tb_log_name=tb_log_name, + #callback=eval_callback, callback=TensorboardCallback(), ) return model - - @staticmethod - def DRL_prediction(model, environment): - test_env, test_obs = environment.get_sb_env() - """make a prediction""" - account_memory = [] - actions_memory = [] - test_env.reset() - for i in range(len(environment.df.index.unique())): - action, _states = model.predict(test_obs) - # account_memory = test_env.env_method(method_name="save_asset_memory") - # actions_memory = test_env.env_method(method_name="save_action_memory") - test_obs, rewards, dones, info = test_env.step(action) - if i == (len(environment.df.index.unique()) - 2): - account_memory = test_env.env_method(method_name="save_asset_memory") - actions_memory = test_env.env_method(method_name="save_action_memory") - if dones[0]: - print("hit end!") - break - return account_memory[0], actions_memory[0] - - @staticmethod - def DRL_prediction_load_from_file(model_name, environment, cwd): - if model_name not in MODELS: - raise NotImplementedError("NotImplementedError") - try: - # load agent - model = MODELS[model_name].load(cwd) - print("Successfully load model", cwd) - except BaseException: - raise ValueError("Fail to load agent!") - - # test on the testing env - state = environment.reset() - episode_returns = list() # the cumulative_return / initial_account - episode_total_assets = list() - episode_total_assets.append(environment.initial_total_asset) - done = False - while not done: - action = model.predict(state)[0] - state, reward, done, _ = environment.step(action) - - total_asset = ( - environment.cash - + (environment.price_array[environment.time] * environment.stocks).sum() - ) - episode_total_assets.append(total_asset) - episode_return = total_asset / environment.initial_total_asset - episode_returns.append(episode_return) - - print("episode_return", episode_return) - print("Test Finished!") - return episode_total_assets diff --git a/freqtrade/freqai/prediction_models/RL/RLPrediction_env.py b/freqtrade/freqai/prediction_models/RL/RLPrediction_env.py index 5fef7fbed..2bc7e868f 100644 --- a/freqtrade/freqai/prediction_models/RL/RLPrediction_env.py +++ b/freqtrade/freqai/prediction_models/RL/RLPrediction_env.py @@ -1,47 +1,82 @@ +import logging +import random +from collections import deque from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union import gym -import matplotlib.pyplot as plt +import matplotlib.pylab as plt import numpy as np +import pandas as pd from gym import spaces from gym.utils import seeding +from sklearn.decomposition import PCA, KernelPCA +logger = logging.getLogger(__name__) + +# from bokeh.io import output_notebook +# from bokeh.plotting import figure, show +# from bokeh.models import ( +# CustomJS, +# ColumnDataSource, +# NumeralTickFormatter, +# Span, +# HoverTool, +# Range1d, +# DatetimeTickFormatter, +# Scatter, +# Label, LabelSet +# ) + class Actions(Enum): - Hold = 0 - Buy = 1 - Sell = 2 + Short = 0 + Long = 1 + Neutral = 2 + +class Actions_v2(Enum): + Neutral = 0 + Long_buy = 1 + Long_sell = 2 + Short_buy = 3 + Short_sell = 4 class Positions(Enum): Short = 0 Long = 1 + Neutral = 0.5 def opposite(self): return Positions.Short if self == Positions.Long else Positions.Long +def mean_over_std(x): + std = np.std(x, ddof=1) + mean = np.mean(x) + return mean / std if std > 0 else 0 -class GymAnytrading(gym.Env): - """ - Based on https://github.com/AminHP/gym-anytrading - """ +class DEnv(gym.Env): metadata = {'render.modes': ['human']} - def __init__(self, signal_features, prices, window_size, fee=0.0): - assert signal_features.ndim == 2 + def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ): + assert df.ndim == 2 self.seed() - self.signal_features = signal_features + self.df = df + self.signal_features = self.df self.prices = prices self.window_size = window_size - self.fee = fee - self.shape = (window_size, self.signal_features.shape[1]) + self.starting_point = starting_point + self.rr = reward_kwargs["rr"] + self.profit_aim = reward_kwargs["profit_aim"] - # spaces - self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) + self.fee=0.0015 + + # # spaces + self.shape = (window_size, self.signal_features.shape[1]) + self.action_space = spaces.Discrete(len(Actions_v2)) + self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) # episode self._start_tick = self.window_size @@ -49,29 +84,56 @@ class GymAnytrading(gym.Env): self._done = None self._current_tick = None self._last_trade_tick = None - self._position = None + self._position = Positions.Neutral self._position_history = None - self._total_reward = None + self.total_reward = None self._total_profit = None self._first_rendering = None self.history = None + self.trade_history = [] + + # self.A_t, self.B_t = 0.000639, 0.00001954 + self.r_t_change = 0. + + self.returns_report = [] + def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] + def reset(self): + self._done = False + + if self.starting_point == True: + self._position_history = (self._start_tick* [None]) + [self._position] + else: + self._position_history = (self.window_size * [None]) + [self._position] + self._current_tick = self._start_tick - self._last_trade_tick = self._current_tick - 1 - self._position = Positions.Short - self._position_history = (self.window_size * [None]) + [self._position] - self._total_reward = 0. + self._last_trade_tick = None + #self._last_trade_tick = self._current_tick - 1 + self._position = Positions.Neutral + + self.total_reward = 0. self._total_profit = 1. # unit self._first_rendering = True self.history = {} + self.trade_history = [] + self.portfolio_log_returns = np.zeros(len(self.prices)) + + + self._profits = [(self._start_tick, 1)] + self.close_trade_profit = [] + self.r_t_change = 0. + + self.returns_report = [] + return self._get_observation() + def step(self, action): self._done = False self._current_tick += 1 @@ -79,34 +141,168 @@ class GymAnytrading(gym.Env): if self._current_tick == self._end_tick: self._done = True - step_reward = self._calculate_reward(action) - self._total_reward += step_reward + self.update_portfolio_log_returns(action) self._update_profit(action) + step_reward = self._calculate_reward(action) + self.total_reward += step_reward - trade = False - if ((action == Actions.Buy.value and self._position == Positions.Short) or - (action == Actions.Sell.value and self._position == Positions.Long)): - trade = True - if trade: - self._position = self._position.opposite() + + + + trade_type = None + if self.is_tradesignal_v2(action): # exclude 3 case not trade + # Update position + """ + Action: Neutral, position: Long -> Close Long + Action: Neutral, position: Short -> Close Short + + Action: Long, position: Neutral -> Open Long + Action: Long, position: Short -> Close Short and Open Long + + Action: Short, position: Neutral -> Open Short + Action: Short, position: Long -> Close Long and Open Short + """ + + + temp_position = self._position + if action == Actions_v2.Neutral.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions_v2.Long_buy.value: + self._position = Positions.Long + trade_type = "long" + elif action == Actions_v2.Short_buy.value: + self._position = Positions.Short + trade_type = "short" + elif action == Actions_v2.Long_sell.value: + self._position = Positions.Neutral + trade_type = "neutral" + elif action == Actions_v2.Short_sell.value: + self._position = Positions.Neutral + trade_type = "neutral" + else: + print("case not defined") + + # Update last trade tick self._last_trade_tick = self._current_tick + if trade_type != None: + self.trade_history.append( + {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type}) + + if self._total_profit < 0.2: + self._done = True + self._position_history.append(self._position) observation = self._get_observation() info = dict( - total_reward=self._total_reward, - total_profit=self._total_profit, - position=self._position.value + tick = self._current_tick, + total_reward = self.total_reward, + total_profit = self._total_profit, + position = self._position.value ) self._update_history(info) return observation, step_reward, self._done, info + + def processState(self, state): + return state.to_numpy() + + def convert_mlp_Policy(self, obs_): + pass + def _get_observation(self): return self.signal_features[(self._current_tick - self.window_size):self._current_tick] + + def get_unrealized_profit(self): + + if self._last_trade_tick == None: + return 0. + + if self._position == Positions.Neutral: + return 0. + elif self._position == Positions.Short: + current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + return (last_trade_price - current_price)/last_trade_price + elif self._position == Positions.Long: + current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + return (current_price - last_trade_price)/last_trade_price + else: + return 0. + + + def is_tradesignal(self, action): + # trade signal + """ + not trade signal is : + Action: Neutral, position: Neutral -> Nothing + Action: Long, position: Long -> Hold Long + Action: Short, position: Short -> Hold Short + """ + return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) + or (action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def is_tradesignal_v2(self, action): + # trade signal + """ + not trade signal is : + Action: Neutral, position: Neutral -> Nothing + Action: Long, position: Long -> Hold Long + Action: Short, position: Short -> Hold Short + """ + return not ((action == Actions_v2.Neutral.value and self._position == Positions.Neutral) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Short_sell.value and self._position == Positions.Short) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Short_sell.value and self._position == Positions.Long) or + + (action == Actions_v2.Long_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Long_sell.value and self._position == Positions.Long) or + (action == Actions_v2.Long_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Long_sell.value and self._position == Positions.Short)) + + + + def _is_trade(self, action: Actions): + return ((action == Actions.Long.value and self._position == Positions.Short) or + (action == Actions.Short.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Long) or + (action == Actions.Neutral.value and self._position == Positions.Short) + ) + + def _is_trade_v2(self, action: Actions_v2): + return ((action == Actions_v2.Long_buy.value and self._position == Positions.Short) or + (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or + (action == Actions_v2.Neutral.value and self._position == Positions.Long) or + (action == Actions_v2.Neutral.value and self._position == Positions.Short) or + + (action == Actions_v2.Neutral.Short_sell and self._position == Positions.Long) or + (action == Actions_v2.Neutral.Long_sell and self._position == Positions.Short) + ) + + + def is_hold(self, action): + return ((action == Actions.Short.value and self._position == Positions.Short) + or (action == Actions.Long.value and self._position == Positions.Long)) + + def is_hold_v2(self, action): + return ((action == Actions_v2.Short_buy.value and self._position == Positions.Short) + or (action == Actions_v2.Long_buy.value and self._position == Positions.Long)) + + + def add_buy_fee(self, price): + return price * (1 + self.fee) + + def add_sell_fee(self, price): + return price / (1 + self.fee) + def _update_history(self, info): if not self.history: self.history = {key: [] for key in info.keys()} @@ -114,7 +310,9 @@ class GymAnytrading(gym.Env): for key, value in info.items(): self.history[key].append(value) + def render(self, mode='human'): + def _plot_position(position, tick): color = None if position == Positions.Short: @@ -122,7 +320,7 @@ class GymAnytrading(gym.Env): elif position == Positions.Long: color = 'green' if color: - plt.scatter(tick, self.prices[tick], color=color) + plt.scatter(tick, self.prices.loc[tick].open, color=color) if self._first_rendering: self._first_rendering = False @@ -131,100 +329,319 @@ class GymAnytrading(gym.Env): start_position = self._position_history[self._start_tick] _plot_position(start_position, self._start_tick) + plt.cla() + plt.plot(self.prices) _plot_position(self._position, self._current_tick) - plt.suptitle( - "Total Reward: %.6f" % self._total_reward + ' ~ ' + - "Total Profit: %.6f" % self._total_profit - ) - + plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit) plt.pause(0.01) - def render_all(self, mode='human'): + + def render_all(self): + plt.figure() window_ticks = np.arange(len(self._position_history)) - plt.plot(self.prices) + plt.plot(self.prices['open'], alpha=0.5) short_ticks = [] long_ticks = [] + neutral_ticks = [] for i, tick in enumerate(window_ticks): if self._position_history[i] == Positions.Short: - short_ticks.append(tick) + short_ticks.append(tick - 1) elif self._position_history[i] == Positions.Long: - long_ticks.append(tick) + long_ticks.append(tick - 1) + elif self._position_history[i] == Positions.Neutral: + neutral_ticks.append(tick - 1) - plt.plot(short_ticks, self.prices[short_ticks], 'ro') - plt.plot(long_ticks, self.prices[long_ticks], 'go') + plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open, + 'o', color='grey', ms=3, alpha=0.1) + plt.plot(short_ticks, self.prices.loc[short_ticks].open, + 'o', color='r', ms=3, alpha=0.8) + plt.plot(long_ticks, self.prices.loc[long_ticks].open, + 'o', color='g', ms=3, alpha=0.8) - plt.suptitle( - "Total Reward: %.6f" % self._total_reward + ' ~ ' + - "Total Profit: %.6f" % self._total_profit - ) + plt.suptitle("Generalising") + fig = plt.gcf() + fig.set_size_inches(15, 10) + + + + + def close_trade_report(self): + small_trade = 0 + positive_big_trade = 0 + negative_big_trade = 0 + small_profit = 0.003 + for i in self.close_trade_profit: + if i < small_profit and i > -small_profit: + small_trade+=1 + elif i > small_profit: + positive_big_trade += 1 + elif i < -small_profit: + negative_big_trade += 1 + print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}") + + + def report(self): + + # get total trade + long_trade = 0 + short_trade = 0 + neutral_trade = 0 + for trade in self.trade_history: + if trade['type'] == 'long': + long_trade += 1 + + elif trade['type'] == 'short': + short_trade += 1 + else: + neutral_trade += 1 + + negative_trade = 0 + positive_trade = 0 + for tr in self.close_trade_profit: + if tr < 0.: + negative_trade += 1 + + if tr > 0.: + positive_trade += 1 + + total_trade_lr = negative_trade+positive_trade + + + total_trade = long_trade + short_trade + sharp_ratio = self.sharpe_ratio() + sharp_log = self.get_sharpe_ratio() + + from tabulate import tabulate + + headers = ["Performance", ""] + performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)], + ["Total reward", "{0:.3f}".format(self.total_reward)], + ["Start profit(unit)", "{0:.2f}".format(1.)], + ["End profit(unit)", "{0:.3f}".format(self._total_profit)], + ["Sharp ratio", "{0:.3f}".format(sharp_ratio)], + ["Sharp log", "{0:.3f}".format(sharp_log)], + # ["Sortino ratio", "{0:.2f}".format(0) + '%'], + ["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%'] + ] + tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center") + print(tabulation) + + result = { + "Start": "{0:.2f}".format(1.), + "End": "{0:.2f}".format(self._total_profit), + "Sharp": "{0:.3f}".format(sharp_ratio), + "Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr) + } + return result def close(self): plt.close() + def get_sharpe_ratio(self): + return mean_over_std(self.get_portfolio_log_returns()) + + def save_rendering(self, filepath): plt.savefig(filepath) + def pause_rendering(self): plt.show() + def _calculate_reward(self, action): - step_reward = 0 + # rw = self.transaction_profit_reward(action) + #rw = self.reward_rr_profit_config(action) + rw = self.reward_rr_profit_config_v2(action) + return rw - trade = False - if ((action == Actions.Buy.value and self._position == Positions.Short) or - (action == Actions.Sell.value and self._position == Positions.Long)): - trade = True - - if trade: - current_price = self.prices[self._current_tick] - last_trade_price = self.prices[self._last_trade_tick] - price_diff = current_price - last_trade_price - - if self._position == Positions.Long: - step_reward += price_diff - - return step_reward def _update_profit(self, action): - trade = False - if ((action == Actions.Buy.value and self._position == Positions.Short) or - (action == Actions.Sell.value and self._position == Positions.Long)): - trade = True - - if trade or self._done: - current_price = self.prices[self._current_tick] - last_trade_price = self.prices[self._last_trade_tick] + #if self._is_trade(action) or self._done: + if self._is_trade_v2(action) or self._done: + pnl = self.get_unrealized_profit() if self._position == Positions.Long: - shares = (self._total_profit * (1 - self.fee)) / last_trade_price - self._total_profit = (shares * (1 - self.fee)) * current_price + self._total_profit = self._total_profit + self._total_profit*pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) - def max_possible_profit(self): - current_tick = self._start_tick - last_trade_tick = current_tick - 1 - profit = 1. + if self._position == Positions.Short: + self._total_profit = self._total_profit + self._total_profit*pnl + self._profits.append((self._current_tick, self._total_profit)) + self.close_trade_profit.append(pnl) - while current_tick <= self._end_tick: - position = None - if self.prices[current_tick] < self.prices[current_tick - 1]: - while (current_tick <= self._end_tick and - self.prices[current_tick] < self.prices[current_tick - 1]): - current_tick += 1 - position = Positions.Short - else: - while (current_tick <= self._end_tick and - self.prices[current_tick] >= self.prices[current_tick - 1]): - current_tick += 1 - position = Positions.Long - if position == Positions.Long: - current_price = self.prices[current_tick - 1] - last_trade_price = self.prices[last_trade_tick] - shares = profit / last_trade_price - profit = shares * current_price - last_trade_tick = current_tick - 1 - print(profit) + def most_recent_return(self, action): + """ + We support Long, Neutral and Short positions. + Return is generated from rising prices in Long + and falling prices in Short positions. + The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. + """ + # Long positions + if self._position == Positions.Long: + current_price = self.prices.iloc[self._current_tick].open + #if action == Actions.Short.value or action == Actions.Neutral.value: + if action == Actions_v2.Short_buy.value or action == Actions_v2.Neutral.value: + current_price = self.add_sell_fee(current_price) - return profit + previous_price = self.prices.iloc[self._current_tick - 1].open + + if (self._position_history[self._current_tick - 1] == Positions.Short + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_buy_fee(previous_price) + + return np.log(current_price) - np.log(previous_price) + + # Short positions + if self._position == Positions.Short: + current_price = self.prices.iloc[self._current_tick].open + #if action == Actions.Long.value or action == Actions.Neutral.value: + if action == Actions_v2.Long_buy.value or action == Actions_v2.Neutral.value: + current_price = self.add_buy_fee(current_price) + + previous_price = self.prices.iloc[self._current_tick - 1].open + if (self._position_history[self._current_tick - 1] == Positions.Long + or self._position_history[self._current_tick - 1] == Positions.Neutral): + previous_price = self.add_sell_fee(previous_price) + + return np.log(previous_price) - np.log(current_price) + + return 0 + + def get_portfolio_log_returns(self): + return self.portfolio_log_returns[1:self._current_tick + 1] + + + def get_trading_log_return(self): + return self.portfolio_log_returns[self._start_tick:] + + def update_portfolio_log_returns(self, action): + self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) + + def current_price(self) -> float: + return self.prices.iloc[self._current_tick].open + + def prev_price(self) -> float: + return self.prices.iloc[self._current_tick-1].open + + + + def sharpe_ratio(self): + if len(self.close_trade_profit) == 0: + return 0. + returns = np.array(self.close_trade_profit) + reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) + return reward + + def get_bnh_log_return(self): + return np.diff(np.log(self.prices['open'][self._start_tick:])) + + + def transaction_profit_reward(self, action): + rw = 0. + + pt = self.prev_price() + pt_1 = self.current_price() + + + if self._position == Positions.Long: + a_t = 1 + elif self._position == Positions.Short: + a_t = -1 + else: + a_t = 0 + + # close long + if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + rw = a_t*(pt_1 - po)/po + #rw = rw*2 + # close short + elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short: + pt_1 = self.add_buy_fee(self.current_price()) + po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) + rw = a_t*(pt_1 - po)/po + #rw = rw*2 + else: + rw = a_t*(pt_1 - pt)/pt + + return np.clip(rw, 0, 1) + + + + def reward_rr_profit_config_v2(self, action): + rw = 0. + + pt_1 = self.current_price() + + + if len(self.close_trade_profit) > 0: + # long + if self._position == Positions.Long: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + if action == Actions_v2.Short_buy.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 2 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr: + rw = 10 * 1 * 1 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Long_sell.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 5 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr: + rw = 10 * 1 * 3 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Neutral.value: + if self.close_trade_profit[-1] > 0: + rw = 2 + elif self.close_trade_profit[-1] < 0: + rw = 2 * -1 + + # short + if self._position == Positions.Short: + pt_1 = self.add_sell_fee(self.current_price()) + po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) + + if action == Actions_v2.Long_buy.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 2 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 1 * 1 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Short_sell.value: + if self.close_trade_profit[-1] > self.profit_aim * self.rr: + rw = 10 * 5 + elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 1 * 3 + elif self.close_trade_profit[-1] < 0: + rw = 10 * -1 + elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr: + rw = 10 * 3 * -1 + + if action == Actions_v2.Neutral.value: + if self.close_trade_profit[-1] > 0: + rw = 2 + elif self.close_trade_profit[-1] < 0: + rw = 2 * -1 + + return np.clip(rw, 0, 1) diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearning.py b/freqtrade/freqai/prediction_models/ReinforcementLearning.py index dded1ac3b..e208707eb 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearning.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearning.py @@ -1,13 +1,19 @@ import logging -from typing import Any, Tuple, Dict -from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading -from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent -from pandas import DataFrame -import pandas as pd -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from typing import Any, Dict, Tuple + import numpy as np import numpy.typing as npt +import pandas as pd +from pandas import DataFrame +from stable_baselines.common.callbacks import CallbackList, CheckpointCallback, EvalCallback + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.freqai_interface import IFreqaiModel +from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent +#from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading +from freqtrade.freqai.prediction_models.RL.RLPrediction_env import DEnv +from freqtrade.persistence import Trade + logger = logging.getLogger(__name__) @@ -69,29 +75,69 @@ class ReinforcementLearningModel(IFreqaiModel): def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): train_df = data_dictionary["train_features"] + # train_labels = data_dictionary["train_labels"] + test_df = data_dictionary["test_features"] + # test_labels = data_dictionary["test_labels"] + + # sep = '/' + # coin = pair.split(sep, 1)[0] + # price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"] + # price.reset_index(inplace=True, drop=True) + # price = price.to_frame() + price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index)) - sep = '/' - coin = pair.split(sep, 1)[0] - price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"] - price.reset_index(inplace=True, drop=True) model_name = 'ppo' - env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH) + #env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH) agent_params = self.freqai_info['model_training_parameters'] - total_timesteps = agent_params.get('total_timesteps', 1000) + reward_params = self.freqai_info['model_reward_parameters'] + env_instance = DEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, reward_kwargs=reward_params) agent = RLPrediction_agent(env_instance) + # checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/') + # eval_callback = EvalCallback(test_df, best_model_save_path='./models/', + # log_path='./logs/', eval_freq=10000, + # deterministic=True, render=False) + + # #Create the callback list + # callback = CallbackList([checkpoint_callback, eval_callback]) + model = agent.get_model(model_name, model_kwargs=agent_params) trained_model = agent.train_model(model=model, tb_log_name=model_name, - total_timesteps=total_timesteps) + model_kwargs=agent_params) + #eval_callback=callback) + + print('Training finished!') return trained_model + def get_state_info(self, pair): + open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True)) + market_side = 0.5 + current_profit = 0 + for trade in open_trades: + if trade.pair == pair: + current_value = trade.open_trade_value + openrate = trade.open_rate + if 'long' in trade.enter_tag: + market_side = 1 + else: + market_side = 0 + current_profit = current_value / openrate -1 + + total_profit = 0 + closed_trades = Trade.get_trades(trade_filter=[Trade.is_open.is_(False), Trade.pair == pair]) + for trade in closed_trades: + total_profit += trade.close_profit + + return market_side, current_profit, total_profit + + def predict( self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningModel.py b/freqtrade/freqai/prediction_models/ReinforcementLearningModel.py deleted file mode 100644 index dded1ac3b..000000000 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningModel.py +++ /dev/null @@ -1,157 +0,0 @@ -import logging -from typing import Any, Tuple, Dict -from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading -from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent -from pandas import DataFrame -import pandas as pd -from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -import numpy as np -import numpy.typing as npt -from freqtrade.freqai.freqai_interface import IFreqaiModel - -logger = logging.getLogger(__name__) - - -class ReinforcementLearningModel(IFreqaiModel): - """ - User created Reinforcement Learning Model prediction model. - """ - - def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen - ) -> Any: - """ - Filter the training data and train a model to it. Train makes heavy use of the datakitchen - for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period - :param metadata: pair metadata from strategy. - :returns: - :model: Trained model which can be used to inference (self.predict) - """ - - logger.info("--------------------Starting training " f"{pair} --------------------") - - # filter the features requested by user in the configuration file and elegantly handle NaNs - features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, - dk.training_features_list, - dk.label_list, - training_filter=True, - ) - - data_dictionary: Dict[str, Any] = dk.make_train_test_datasets( - features_filtered, labels_filtered) - dk.fit_labels() # useless for now, but just satiating append methods - - # normalize all data based on train_dataset only - data_dictionary = dk.normalize_data(data_dictionary) - - # optional additional data cleaning/analysis - self.data_cleaning_train(dk) - - logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" - ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - - model = self.fit(data_dictionary, pair) - - if pair not in self.dd.historic_predictions: - self.set_initial_historic_predictions( - data_dictionary['train_features'], model, dk, pair) - - self.dd.save_historic_predictions_to_disk() - - logger.info(f"--------------------done training {pair}--------------------") - - return model - - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): - - train_df = data_dictionary["train_features"] - - sep = '/' - coin = pair.split(sep, 1)[0] - price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"] - price.reset_index(inplace=True, drop=True) - - model_name = 'ppo' - - env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH) - - agent_params = self.freqai_info['model_training_parameters'] - total_timesteps = agent_params.get('total_timesteps', 1000) - - agent = RLPrediction_agent(env_instance) - - model = agent.get_model(model_name, model_kwargs=agent_params) - trained_model = agent.train_model(model=model, - tb_log_name=model_name, - total_timesteps=total_timesteps) - print('Training finished!') - - return trained_model - - def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False - ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: - """ - Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. - :return: - :pred_df: dataframe containing the predictions - :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove - data (NaNs) or felt uncertain about data (PCA and DI index) - """ - - dk.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False - ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe - - # optional additional data cleaning/analysis - self.data_cleaning_predict(dk, filtered_dataframe) - - pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model) - pred_df.fillna(0, inplace=True) - - return (pred_df, dk.do_predict) - - def rl_model_predict(self, dataframe: DataFrame, - dk: FreqaiDataKitchen, model: Any) -> DataFrame: - - output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list) - - def _predict(window): - observations = dataframe.iloc[window.index] - res, _ = model.predict(observations, deterministic=True) - return res - - output = output.rolling(window=self.CONV_WIDTH).apply(_predict) - - return output - - def set_initial_historic_predictions( - self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str - ) -> None: - - pred_df = self.rl_model_predict(df, dk, model) - pred_df.fillna(0, inplace=True) - self.dd.historic_predictions[pair] = pred_df - hist_preds_df = self.dd.historic_predictions[pair] - - for label in hist_preds_df.columns: - if hist_preds_df[label].dtype == object: - continue - hist_preds_df[f'{label}_mean'] = 0 - hist_preds_df[f'{label}_std'] = 0 - - hist_preds_df['do_predict'] = 0 - - if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: - hist_preds_df['DI_values'] = 0 - - for return_str in dk.data['extra_returns_per_train']: - hist_preds_df[return_str] = 0