add reward function

This commit is contained in:
MukavaValkku 2022-08-12 20:25:13 +03:00 committed by robcaulk
parent ec813434f5
commit 8eeaab2746
4 changed files with 597 additions and 336 deletions

View File

@ -1,17 +1,15 @@
# common library # common library
import numpy as np import numpy as np
from stable_baselines3 import A2C from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3
from stable_baselines3 import DDPG from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3 import PPO from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines3 import SAC
from stable_baselines3 import TD3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
# from stable_baselines3.common.vec_env import DummyVecEnv
from freqtrade.freqai.prediction_models.RL import config from freqtrade.freqai.prediction_models.RL import config
# from stable_baselines3.common.vec_env import DummyVecEnv
# from meta.env_stock_trading.env_stock_trading import StockTradingEnv # from meta.env_stock_trading.env_stock_trading import StockTradingEnv
# RL models from stable-baselines # RL models from stable-baselines
@ -74,8 +72,10 @@ class RLPrediction_agent:
policy="MlpPolicy", policy="MlpPolicy",
policy_kwargs=None, policy_kwargs=None,
model_kwargs=None, model_kwargs=None,
reward_kwargs=None,
#total_timesteps=None,
verbose=1, verbose=1,
seed=None, seed=None
): ):
if model_name not in MODELS: if model_name not in MODELS:
raise NotImplementedError("NotImplementedError") raise NotImplementedError("NotImplementedError")
@ -95,68 +95,23 @@ class RLPrediction_agent:
tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}", tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
verbose=verbose, verbose=verbose,
policy_kwargs=policy_kwargs, policy_kwargs=policy_kwargs,
seed=seed, #model_kwargs=model_kwargs,
**model_kwargs, #total_timesteps=model_kwargs["total_timesteps"],
seed=seed
#**model_kwargs,
) )
return model return model
def train_model(self, model, tb_log_name, total_timesteps=5000): def train_model(self, model, tb_log_name, model_kwargs):
model = model.learn( model = model.learn(
total_timesteps=total_timesteps, total_timesteps=model_kwargs["total_timesteps"],
tb_log_name=tb_log_name, tb_log_name=tb_log_name,
#callback=eval_callback,
callback=TensorboardCallback(), callback=TensorboardCallback(),
) )
return model return model
@staticmethod
def DRL_prediction(model, environment):
test_env, test_obs = environment.get_sb_env()
"""make a prediction"""
account_memory = []
actions_memory = []
test_env.reset()
for i in range(len(environment.df.index.unique())):
action, _states = model.predict(test_obs)
# account_memory = test_env.env_method(method_name="save_asset_memory")
# actions_memory = test_env.env_method(method_name="save_action_memory")
test_obs, rewards, dones, info = test_env.step(action)
if i == (len(environment.df.index.unique()) - 2):
account_memory = test_env.env_method(method_name="save_asset_memory")
actions_memory = test_env.env_method(method_name="save_action_memory")
if dones[0]:
print("hit end!")
break
return account_memory[0], actions_memory[0]
@staticmethod
def DRL_prediction_load_from_file(model_name, environment, cwd):
if model_name not in MODELS:
raise NotImplementedError("NotImplementedError")
try:
# load agent
model = MODELS[model_name].load(cwd)
print("Successfully load model", cwd)
except BaseException:
raise ValueError("Fail to load agent!")
# test on the testing env
state = environment.reset()
episode_returns = list() # the cumulative_return / initial_account
episode_total_assets = list()
episode_total_assets.append(environment.initial_total_asset)
done = False
while not done:
action = model.predict(state)[0]
state, reward, done, _ = environment.step(action)
total_asset = (
environment.cash
+ (environment.price_array[environment.time] * environment.stocks).sum()
)
episode_total_assets.append(total_asset)
episode_return = total_asset / environment.initial_total_asset
episode_returns.append(episode_return)
print("episode_return", episode_return)
print("Test Finished!")
return episode_total_assets

View File

@ -1,47 +1,82 @@
import logging
import random
from collections import deque
from enum import Enum from enum import Enum
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
import gym import gym
import matplotlib.pyplot as plt import matplotlib.pylab as plt
import numpy as np import numpy as np
import pandas as pd
from gym import spaces from gym import spaces
from gym.utils import seeding from gym.utils import seeding
from sklearn.decomposition import PCA, KernelPCA
logger = logging.getLogger(__name__)
# from bokeh.io import output_notebook
# from bokeh.plotting import figure, show
# from bokeh.models import (
# CustomJS,
# ColumnDataSource,
# NumeralTickFormatter,
# Span,
# HoverTool,
# Range1d,
# DatetimeTickFormatter,
# Scatter,
# Label, LabelSet
# )
class Actions(Enum): class Actions(Enum):
Hold = 0 Short = 0
Buy = 1 Long = 1
Sell = 2 Neutral = 2
class Actions_v2(Enum):
Neutral = 0
Long_buy = 1
Long_sell = 2
Short_buy = 3
Short_sell = 4
class Positions(Enum): class Positions(Enum):
Short = 0 Short = 0
Long = 1 Long = 1
Neutral = 0.5
def opposite(self): def opposite(self):
return Positions.Short if self == Positions.Long else Positions.Long return Positions.Short if self == Positions.Long else Positions.Long
def mean_over_std(x):
std = np.std(x, ddof=1)
mean = np.mean(x)
return mean / std if std > 0 else 0
class GymAnytrading(gym.Env): class DEnv(gym.Env):
"""
Based on https://github.com/AminHP/gym-anytrading
"""
metadata = {'render.modes': ['human']} metadata = {'render.modes': ['human']}
def __init__(self, signal_features, prices, window_size, fee=0.0): def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ):
assert signal_features.ndim == 2 assert df.ndim == 2
self.seed() self.seed()
self.signal_features = signal_features self.df = df
self.signal_features = self.df
self.prices = prices self.prices = prices
self.window_size = window_size self.window_size = window_size
self.fee = fee self.starting_point = starting_point
self.shape = (window_size, self.signal_features.shape[1]) self.rr = reward_kwargs["rr"]
self.profit_aim = reward_kwargs["profit_aim"]
# spaces self.fee=0.0015
self.action_space = spaces.Discrete(len(Actions))
self.observation_space = spaces.Box( # # spaces
low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) self.shape = (window_size, self.signal_features.shape[1])
self.action_space = spaces.Discrete(len(Actions_v2))
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
# episode # episode
self._start_tick = self.window_size self._start_tick = self.window_size
@ -49,29 +84,56 @@ class GymAnytrading(gym.Env):
self._done = None self._done = None
self._current_tick = None self._current_tick = None
self._last_trade_tick = None self._last_trade_tick = None
self._position = None self._position = Positions.Neutral
self._position_history = None self._position_history = None
self._total_reward = None self.total_reward = None
self._total_profit = None self._total_profit = None
self._first_rendering = None self._first_rendering = None
self.history = None self.history = None
self.trade_history = []
# self.A_t, self.B_t = 0.000639, 0.00001954
self.r_t_change = 0.
self.returns_report = []
def seed(self, seed=None): def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
return [seed] return [seed]
def reset(self): def reset(self):
self._done = False self._done = False
if self.starting_point == True:
self._position_history = (self._start_tick* [None]) + [self._position]
else:
self._position_history = (self.window_size * [None]) + [self._position]
self._current_tick = self._start_tick self._current_tick = self._start_tick
self._last_trade_tick = self._current_tick - 1 self._last_trade_tick = None
self._position = Positions.Short #self._last_trade_tick = self._current_tick - 1
self._position_history = (self.window_size * [None]) + [self._position] self._position = Positions.Neutral
self._total_reward = 0.
self.total_reward = 0.
self._total_profit = 1. # unit self._total_profit = 1. # unit
self._first_rendering = True self._first_rendering = True
self.history = {} self.history = {}
self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices))
self._profits = [(self._start_tick, 1)]
self.close_trade_profit = []
self.r_t_change = 0.
self.returns_report = []
return self._get_observation() return self._get_observation()
def step(self, action): def step(self, action):
self._done = False self._done = False
self._current_tick += 1 self._current_tick += 1
@ -79,34 +141,168 @@ class GymAnytrading(gym.Env):
if self._current_tick == self._end_tick: if self._current_tick == self._end_tick:
self._done = True self._done = True
step_reward = self._calculate_reward(action) self.update_portfolio_log_returns(action)
self._total_reward += step_reward
self._update_profit(action) self._update_profit(action)
step_reward = self._calculate_reward(action)
self.total_reward += step_reward
trade = False
if ((action == Actions.Buy.value and self._position == Positions.Short) or
(action == Actions.Sell.value and self._position == Positions.Long)):
trade = True
if trade:
self._position = self._position.opposite()
trade_type = None
if self.is_tradesignal_v2(action): # exclude 3 case not trade
# Update position
"""
Action: Neutral, position: Long -> Close Long
Action: Neutral, position: Short -> Close Short
Action: Long, position: Neutral -> Open Long
Action: Long, position: Short -> Close Short and Open Long
Action: Short, position: Neutral -> Open Short
Action: Short, position: Long -> Close Long and Open Short
"""
temp_position = self._position
if action == Actions_v2.Neutral.value:
self._position = Positions.Neutral
trade_type = "neutral"
elif action == Actions_v2.Long_buy.value:
self._position = Positions.Long
trade_type = "long"
elif action == Actions_v2.Short_buy.value:
self._position = Positions.Short
trade_type = "short"
elif action == Actions_v2.Long_sell.value:
self._position = Positions.Neutral
trade_type = "neutral"
elif action == Actions_v2.Short_sell.value:
self._position = Positions.Neutral
trade_type = "neutral"
else:
print("case not defined")
# Update last trade tick
self._last_trade_tick = self._current_tick self._last_trade_tick = self._current_tick
if trade_type != None:
self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick, 'type': trade_type})
if self._total_profit < 0.2:
self._done = True
self._position_history.append(self._position) self._position_history.append(self._position)
observation = self._get_observation() observation = self._get_observation()
info = dict( info = dict(
total_reward=self._total_reward, tick = self._current_tick,
total_profit=self._total_profit, total_reward = self.total_reward,
position=self._position.value total_profit = self._total_profit,
position = self._position.value
) )
self._update_history(info) self._update_history(info)
return observation, step_reward, self._done, info return observation, step_reward, self._done, info
def processState(self, state):
return state.to_numpy()
def convert_mlp_Policy(self, obs_):
pass
def _get_observation(self): def _get_observation(self):
return self.signal_features[(self._current_tick - self.window_size):self._current_tick] return self.signal_features[(self._current_tick - self.window_size):self._current_tick]
def get_unrealized_profit(self):
if self._last_trade_tick == None:
return 0.
if self._position == Positions.Neutral:
return 0.
elif self._position == Positions.Short:
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
return (last_trade_price - current_price)/last_trade_price
elif self._position == Positions.Long:
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price)/last_trade_price
else:
return 0.
def is_tradesignal(self, action):
# trade signal
"""
not trade signal is :
Action: Neutral, position: Neutral -> Nothing
Action: Long, position: Long -> Hold Long
Action: Short, position: Short -> Hold Short
"""
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral)
or (action == Actions.Short.value and self._position == Positions.Short)
or (action == Actions.Long.value and self._position == Positions.Long))
def is_tradesignal_v2(self, action):
# trade signal
"""
not trade signal is :
Action: Neutral, position: Neutral -> Nothing
Action: Long, position: Long -> Hold Long
Action: Short, position: Short -> Hold Short
"""
return not ((action == Actions_v2.Neutral.value and self._position == Positions.Neutral) or
(action == Actions_v2.Short_buy.value and self._position == Positions.Short) or
(action == Actions_v2.Short_sell.value and self._position == Positions.Short) or
(action == Actions_v2.Short_buy.value and self._position == Positions.Long) or
(action == Actions_v2.Short_sell.value and self._position == Positions.Long) or
(action == Actions_v2.Long_buy.value and self._position == Positions.Long) or
(action == Actions_v2.Long_sell.value and self._position == Positions.Long) or
(action == Actions_v2.Long_buy.value and self._position == Positions.Short) or
(action == Actions_v2.Long_sell.value and self._position == Positions.Short))
def _is_trade(self, action: Actions):
return ((action == Actions.Long.value and self._position == Positions.Short) or
(action == Actions.Short.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Short)
)
def _is_trade_v2(self, action: Actions_v2):
return ((action == Actions_v2.Long_buy.value and self._position == Positions.Short) or
(action == Actions_v2.Short_buy.value and self._position == Positions.Long) or
(action == Actions_v2.Neutral.value and self._position == Positions.Long) or
(action == Actions_v2.Neutral.value and self._position == Positions.Short) or
(action == Actions_v2.Neutral.Short_sell and self._position == Positions.Long) or
(action == Actions_v2.Neutral.Long_sell and self._position == Positions.Short)
)
def is_hold(self, action):
return ((action == Actions.Short.value and self._position == Positions.Short)
or (action == Actions.Long.value and self._position == Positions.Long))
def is_hold_v2(self, action):
return ((action == Actions_v2.Short_buy.value and self._position == Positions.Short)
or (action == Actions_v2.Long_buy.value and self._position == Positions.Long))
def add_buy_fee(self, price):
return price * (1 + self.fee)
def add_sell_fee(self, price):
return price / (1 + self.fee)
def _update_history(self, info): def _update_history(self, info):
if not self.history: if not self.history:
self.history = {key: [] for key in info.keys()} self.history = {key: [] for key in info.keys()}
@ -114,7 +310,9 @@ class GymAnytrading(gym.Env):
for key, value in info.items(): for key, value in info.items():
self.history[key].append(value) self.history[key].append(value)
def render(self, mode='human'): def render(self, mode='human'):
def _plot_position(position, tick): def _plot_position(position, tick):
color = None color = None
if position == Positions.Short: if position == Positions.Short:
@ -122,7 +320,7 @@ class GymAnytrading(gym.Env):
elif position == Positions.Long: elif position == Positions.Long:
color = 'green' color = 'green'
if color: if color:
plt.scatter(tick, self.prices[tick], color=color) plt.scatter(tick, self.prices.loc[tick].open, color=color)
if self._first_rendering: if self._first_rendering:
self._first_rendering = False self._first_rendering = False
@ -131,100 +329,319 @@ class GymAnytrading(gym.Env):
start_position = self._position_history[self._start_tick] start_position = self._position_history[self._start_tick]
_plot_position(start_position, self._start_tick) _plot_position(start_position, self._start_tick)
plt.cla()
plt.plot(self.prices)
_plot_position(self._position, self._current_tick) _plot_position(self._position, self._current_tick)
plt.suptitle( plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit)
"Total Reward: %.6f" % self._total_reward + ' ~ ' +
"Total Profit: %.6f" % self._total_profit
)
plt.pause(0.01) plt.pause(0.01)
def render_all(self, mode='human'):
def render_all(self):
plt.figure()
window_ticks = np.arange(len(self._position_history)) window_ticks = np.arange(len(self._position_history))
plt.plot(self.prices) plt.plot(self.prices['open'], alpha=0.5)
short_ticks = [] short_ticks = []
long_ticks = [] long_ticks = []
neutral_ticks = []
for i, tick in enumerate(window_ticks): for i, tick in enumerate(window_ticks):
if self._position_history[i] == Positions.Short: if self._position_history[i] == Positions.Short:
short_ticks.append(tick) short_ticks.append(tick - 1)
elif self._position_history[i] == Positions.Long: elif self._position_history[i] == Positions.Long:
long_ticks.append(tick) long_ticks.append(tick - 1)
elif self._position_history[i] == Positions.Neutral:
neutral_ticks.append(tick - 1)
plt.plot(short_ticks, self.prices[short_ticks], 'ro') plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open,
plt.plot(long_ticks, self.prices[long_ticks], 'go') 'o', color='grey', ms=3, alpha=0.1)
plt.plot(short_ticks, self.prices.loc[short_ticks].open,
'o', color='r', ms=3, alpha=0.8)
plt.plot(long_ticks, self.prices.loc[long_ticks].open,
'o', color='g', ms=3, alpha=0.8)
plt.suptitle( plt.suptitle("Generalising")
"Total Reward: %.6f" % self._total_reward + ' ~ ' + fig = plt.gcf()
"Total Profit: %.6f" % self._total_profit fig.set_size_inches(15, 10)
)
def close_trade_report(self):
small_trade = 0
positive_big_trade = 0
negative_big_trade = 0
small_profit = 0.003
for i in self.close_trade_profit:
if i < small_profit and i > -small_profit:
small_trade+=1
elif i > small_profit:
positive_big_trade += 1
elif i < -small_profit:
negative_big_trade += 1
print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}")
def report(self):
# get total trade
long_trade = 0
short_trade = 0
neutral_trade = 0
for trade in self.trade_history:
if trade['type'] == 'long':
long_trade += 1
elif trade['type'] == 'short':
short_trade += 1
else:
neutral_trade += 1
negative_trade = 0
positive_trade = 0
for tr in self.close_trade_profit:
if tr < 0.:
negative_trade += 1
if tr > 0.:
positive_trade += 1
total_trade_lr = negative_trade+positive_trade
total_trade = long_trade + short_trade
sharp_ratio = self.sharpe_ratio()
sharp_log = self.get_sharpe_ratio()
from tabulate import tabulate
headers = ["Performance", ""]
performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)],
["Total reward", "{0:.3f}".format(self.total_reward)],
["Start profit(unit)", "{0:.2f}".format(1.)],
["End profit(unit)", "{0:.3f}".format(self._total_profit)],
["Sharp ratio", "{0:.3f}".format(sharp_ratio)],
["Sharp log", "{0:.3f}".format(sharp_log)],
# ["Sortino ratio", "{0:.2f}".format(0) + '%'],
["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%']
]
tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center")
print(tabulation)
result = {
"Start": "{0:.2f}".format(1.),
"End": "{0:.2f}".format(self._total_profit),
"Sharp": "{0:.3f}".format(sharp_ratio),
"Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr)
}
return result
def close(self): def close(self):
plt.close() plt.close()
def get_sharpe_ratio(self):
return mean_over_std(self.get_portfolio_log_returns())
def save_rendering(self, filepath): def save_rendering(self, filepath):
plt.savefig(filepath) plt.savefig(filepath)
def pause_rendering(self): def pause_rendering(self):
plt.show() plt.show()
def _calculate_reward(self, action): def _calculate_reward(self, action):
step_reward = 0 # rw = self.transaction_profit_reward(action)
#rw = self.reward_rr_profit_config(action)
rw = self.reward_rr_profit_config_v2(action)
return rw
trade = False
if ((action == Actions.Buy.value and self._position == Positions.Short) or
(action == Actions.Sell.value and self._position == Positions.Long)):
trade = True
if trade:
current_price = self.prices[self._current_tick]
last_trade_price = self.prices[self._last_trade_tick]
price_diff = current_price - last_trade_price
if self._position == Positions.Long:
step_reward += price_diff
return step_reward
def _update_profit(self, action): def _update_profit(self, action):
trade = False #if self._is_trade(action) or self._done:
if ((action == Actions.Buy.value and self._position == Positions.Short) or if self._is_trade_v2(action) or self._done:
(action == Actions.Sell.value and self._position == Positions.Long)): pnl = self.get_unrealized_profit()
trade = True
if trade or self._done:
current_price = self.prices[self._current_tick]
last_trade_price = self.prices[self._last_trade_tick]
if self._position == Positions.Long: if self._position == Positions.Long:
shares = (self._total_profit * (1 - self.fee)) / last_trade_price self._total_profit = self._total_profit + self._total_profit*pnl
self._total_profit = (shares * (1 - self.fee)) * current_price self._profits.append((self._current_tick, self._total_profit))
self.close_trade_profit.append(pnl)
def max_possible_profit(self): if self._position == Positions.Short:
current_tick = self._start_tick self._total_profit = self._total_profit + self._total_profit*pnl
last_trade_tick = current_tick - 1 self._profits.append((self._current_tick, self._total_profit))
profit = 1. self.close_trade_profit.append(pnl)
while current_tick <= self._end_tick:
position = None
if self.prices[current_tick] < self.prices[current_tick - 1]:
while (current_tick <= self._end_tick and
self.prices[current_tick] < self.prices[current_tick - 1]):
current_tick += 1
position = Positions.Short
else:
while (current_tick <= self._end_tick and
self.prices[current_tick] >= self.prices[current_tick - 1]):
current_tick += 1
position = Positions.Long
if position == Positions.Long: def most_recent_return(self, action):
current_price = self.prices[current_tick - 1] """
last_trade_price = self.prices[last_trade_tick] We support Long, Neutral and Short positions.
shares = profit / last_trade_price Return is generated from rising prices in Long
profit = shares * current_price and falling prices in Short positions.
last_trade_tick = current_tick - 1 The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
print(profit) """
# Long positions
if self._position == Positions.Long:
current_price = self.prices.iloc[self._current_tick].open
#if action == Actions.Short.value or action == Actions.Neutral.value:
if action == Actions_v2.Short_buy.value or action == Actions_v2.Neutral.value:
current_price = self.add_sell_fee(current_price)
return profit previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Short
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_buy_fee(previous_price)
return np.log(current_price) - np.log(previous_price)
# Short positions
if self._position == Positions.Short:
current_price = self.prices.iloc[self._current_tick].open
#if action == Actions.Long.value or action == Actions.Neutral.value:
if action == Actions_v2.Long_buy.value or action == Actions_v2.Neutral.value:
current_price = self.add_buy_fee(current_price)
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Long
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_sell_fee(previous_price)
return np.log(previous_price) - np.log(current_price)
return 0
def get_portfolio_log_returns(self):
return self.portfolio_log_returns[1:self._current_tick + 1]
def get_trading_log_return(self):
return self.portfolio_log_returns[self._start_tick:]
def update_portfolio_log_returns(self, action):
self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
def current_price(self) -> float:
return self.prices.iloc[self._current_tick].open
def prev_price(self) -> float:
return self.prices.iloc[self._current_tick-1].open
def sharpe_ratio(self):
if len(self.close_trade_profit) == 0:
return 0.
returns = np.array(self.close_trade_profit)
reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
return reward
def get_bnh_log_return(self):
return np.diff(np.log(self.prices['open'][self._start_tick:]))
def transaction_profit_reward(self, action):
rw = 0.
pt = self.prev_price()
pt_1 = self.current_price()
if self._position == Positions.Long:
a_t = 1
elif self._position == Positions.Short:
a_t = -1
else:
a_t = 0
# close long
if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long:
pt_1 = self.add_sell_fee(self.current_price())
po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
rw = a_t*(pt_1 - po)/po
#rw = rw*2
# close short
elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short:
pt_1 = self.add_buy_fee(self.current_price())
po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
rw = a_t*(pt_1 - po)/po
#rw = rw*2
else:
rw = a_t*(pt_1 - pt)/pt
return np.clip(rw, 0, 1)
def reward_rr_profit_config_v2(self, action):
rw = 0.
pt_1 = self.current_price()
if len(self.close_trade_profit) > 0:
# long
if self._position == Positions.Long:
pt_1 = self.add_sell_fee(self.current_price())
po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
if action == Actions_v2.Short_buy.value:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
rw = 10 * 2
elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:
rw = 10 * 1 * 1
elif self.close_trade_profit[-1] < 0:
rw = 10 * -1
elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 3 * -1
if action == Actions_v2.Long_sell.value:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
rw = 10 * 5
elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:
rw = 10 * 1 * 3
elif self.close_trade_profit[-1] < 0:
rw = 10 * -1
elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 3 * -1
if action == Actions_v2.Neutral.value:
if self.close_trade_profit[-1] > 0:
rw = 2
elif self.close_trade_profit[-1] < 0:
rw = 2 * -1
# short
if self._position == Positions.Short:
pt_1 = self.add_sell_fee(self.current_price())
po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
if action == Actions_v2.Long_buy.value:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
rw = 10 * 2
elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 1 * 1
elif self.close_trade_profit[-1] < 0:
rw = 10 * -1
elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 3 * -1
if action == Actions_v2.Short_sell.value:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
rw = 10 * 5
elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 1 * 3
elif self.close_trade_profit[-1] < 0:
rw = 10 * -1
elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
rw = 10 * 3 * -1
if action == Actions_v2.Neutral.value:
if self.close_trade_profit[-1] > 0:
rw = 2
elif self.close_trade_profit[-1] < 0:
rw = 2 * -1
return np.clip(rw, 0, 1)

View File

@ -1,13 +1,19 @@
import logging import logging
from typing import Any, Tuple, Dict from typing import Any, Dict, Tuple
from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading
from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent
from pandas import DataFrame
import pandas as pd
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from stable_baselines.common.callbacks import CallbackList, CheckpointCallback, EvalCallback
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent
#from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading
from freqtrade.freqai.prediction_models.RL.RLPrediction_env import DEnv
from freqtrade.persistence import Trade
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -69,29 +75,69 @@ class ReinforcementLearningModel(IFreqaiModel):
def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): def fit(self, data_dictionary: Dict[str, Any], pair: str = ''):
train_df = data_dictionary["train_features"] train_df = data_dictionary["train_features"]
# train_labels = data_dictionary["train_labels"]
test_df = data_dictionary["test_features"]
# test_labels = data_dictionary["test_labels"]
# sep = '/'
# coin = pair.split(sep, 1)[0]
# price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"]
# price.reset_index(inplace=True, drop=True)
# price = price.to_frame()
price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index))
sep = '/'
coin = pair.split(sep, 1)[0]
price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"]
price.reset_index(inplace=True, drop=True)
model_name = 'ppo' model_name = 'ppo'
env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH) #env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH)
agent_params = self.freqai_info['model_training_parameters'] agent_params = self.freqai_info['model_training_parameters']
total_timesteps = agent_params.get('total_timesteps', 1000) reward_params = self.freqai_info['model_reward_parameters']
env_instance = DEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, reward_kwargs=reward_params)
agent = RLPrediction_agent(env_instance) agent = RLPrediction_agent(env_instance)
# checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/')
# eval_callback = EvalCallback(test_df, best_model_save_path='./models/',
# log_path='./logs/', eval_freq=10000,
# deterministic=True, render=False)
# #Create the callback list
# callback = CallbackList([checkpoint_callback, eval_callback])
model = agent.get_model(model_name, model_kwargs=agent_params) model = agent.get_model(model_name, model_kwargs=agent_params)
trained_model = agent.train_model(model=model, trained_model = agent.train_model(model=model,
tb_log_name=model_name, tb_log_name=model_name,
total_timesteps=total_timesteps) model_kwargs=agent_params)
#eval_callback=callback)
print('Training finished!') print('Training finished!')
return trained_model return trained_model
def get_state_info(self, pair):
open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True))
market_side = 0.5
current_profit = 0
for trade in open_trades:
if trade.pair == pair:
current_value = trade.open_trade_value
openrate = trade.open_rate
if 'long' in trade.enter_tag:
market_side = 1
else:
market_side = 0
current_profit = current_value / openrate -1
total_profit = 0
closed_trades = Trade.get_trades(trade_filter=[Trade.is_open.is_(False), Trade.pair == pair])
for trade in closed_trades:
total_profit += trade.close_profit
return market_side, current_profit, total_profit
def predict( def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.NDArray[np.int_]]: ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:

View File

@ -1,157 +0,0 @@
import logging
from typing import Any, Tuple, Dict
from freqtrade.freqai.prediction_models.RL.RLPrediction_env import GymAnytrading
from freqtrade.freqai.prediction_models.RL.RLPrediction_agent import RLPrediction_agent
from pandas import DataFrame
import pandas as pd
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
import numpy as np
import numpy.typing as npt
from freqtrade.freqai.freqai_interface import IFreqaiModel
logger = logging.getLogger(__name__)
class ReinforcementLearningModel(IFreqaiModel):
"""
User created Reinforcement Learning Model prediction model.
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info("--------------------Starting training " f"{pair} --------------------")
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
features_filtered, labels_filtered)
dk.fit_labels() # useless for now, but just satiating append methods
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary, pair)
if pair not in self.dd.historic_predictions:
self.set_initial_historic_predictions(
data_dictionary['train_features'], model, dk, pair)
self.dd.save_historic_predictions_to_disk()
logger.info(f"--------------------done training {pair}--------------------")
return model
def fit(self, data_dictionary: Dict[str, Any], pair: str = ''):
train_df = data_dictionary["train_features"]
sep = '/'
coin = pair.split(sep, 1)[0]
price = train_df[f"%-{coin}raw_price_{self.config['timeframe']}"]
price.reset_index(inplace=True, drop=True)
model_name = 'ppo'
env_instance = GymAnytrading(train_df, price, self.CONV_WIDTH)
agent_params = self.freqai_info['model_training_parameters']
total_timesteps = agent_params.get('total_timesteps', 1000)
agent = RLPrediction_agent(env_instance)
model = agent.get_model(model_name, model_kwargs=agent_params)
trained_model = agent.train_model(model=model,
tb_log_name=model_name,
total_timesteps=total_timesteps)
print('Training finished!')
return trained_model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
pred_df.fillna(0, inplace=True)
return (pred_df, dk.do_predict)
def rl_model_predict(self, dataframe: DataFrame,
dk: FreqaiDataKitchen, model: Any) -> DataFrame:
output = pd.DataFrame(np.full((len(dataframe), 1), 2), columns=dk.label_list)
def _predict(window):
observations = dataframe.iloc[window.index]
res, _ = model.predict(observations, deterministic=True)
return res
output = output.rolling(window=self.CONV_WIDTH).apply(_predict)
return output
def set_initial_historic_predictions(
self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str
) -> None:
pred_df = self.rl_model_predict(df, dk, model)
pred_df.fillna(0, inplace=True)
self.dd.historic_predictions[pair] = pred_df
hist_preds_df = self.dd.historic_predictions[pair]
for label in hist_preds_df.columns:
if hist_preds_df[label].dtype == object:
continue
hist_preds_df[f'{label}_mean'] = 0
hist_preds_df[f'{label}_std'] = 0
hist_preds_df['do_predict'] = 0
if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
hist_preds_df['DI_values'] = 0
for return_str in dk.data['extra_returns_per_train']:
hist_preds_df[return_str] = 0