refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling

This commit is contained in:
robcaulk 2022-08-28 19:21:57 +02:00
parent 8c313b431d
commit 7766350c15
8 changed files with 339 additions and 440 deletions

View File

@ -1,14 +1,11 @@
import logging import logging
from enum import Enum from enum import Enum
from typing import Optional
import gym
import numpy as np
from gym import spaces from gym import spaces
from gym.utils import seeding
from pandas import DataFrame from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
import pandas as pd
from abc import abstractmethod
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -19,95 +16,13 @@ class Actions(Enum):
Short_enter = 3 Short_enter = 3
class Base4ActionRLEnv(BaseEnvironment):
class Positions(Enum):
Short = 0
Long = 1
Neutral = 0.5
def opposite(self):
return Positions.Short if self == Positions.Long else Positions.Long
def mean_over_std(x):
std = np.std(x, ddof=1)
mean = np.mean(x)
return mean / std if std > 0 else 0
class Base4ActionRLEnv(gym.Env):
""" """
Base class for a 5 action environment Base class for a 4 action environment
""" """
metadata = {'render.modes': ['human']}
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), def set_action_space(self):
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}):
self.rl_config = config['freqai']['rl_config']
self.id = id
self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
reward_kwargs: dict, starting_point=True):
self.df = df
self.signal_features = self.df
self.prices = prices
self.window_size = window_size
self.starting_point = starting_point
self.rr = reward_kwargs["rr"]
self.profit_aim = reward_kwargs["profit_aim"]
self.fee = 0.0015
# # spaces
self.shape = (window_size, self.signal_features.shape[1] + 3)
self.action_space = spaces.Discrete(len(Actions)) self.action_space = spaces.Discrete(len(Actions))
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
# episode
self._start_tick: int = self.window_size
self._end_tick: int = len(self.prices) - 1
self._done: bool = False
self._current_tick: int = self._start_tick
self._last_trade_tick: Optional[int] = None
self._position = Positions.Neutral
self._position_history: list = [None]
self.total_reward: float = 0
self._total_profit: float = 1
self.history: dict = {}
self.trade_history: list = []
def seed(self, seed: int = 1):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self._done = False
if self.starting_point is True:
self._position_history = (self._start_tick * [None]) + [self._position]
else:
self._position_history = (self.window_size * [None]) + [self._position]
self._current_tick = self._start_tick
self._last_trade_tick = None
self._position = Positions.Neutral
self.total_reward = 0.
self._total_profit = 1. # unit
self.history = {}
self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices))
self._profits = [(self._start_tick, 1)]
self.close_trade_profit = []
return self._get_observation()
def step(self, action: int): def step(self, action: int):
self._done = False self._done = False
@ -181,43 +96,6 @@ class Base4ActionRLEnv(gym.Env):
return observation, step_reward, self._done, info return observation, step_reward, self._done, info
def _get_observation(self):
features_window = self.signal_features[(
self._current_tick - self.window_size):self._current_tick]
features_and_state = DataFrame(np.zeros((len(features_window), 3)),
columns=['current_profit_pct', 'position', 'trade_duration'],
index=features_window.index)
features_and_state['current_profit_pct'] = self.get_unrealized_profit()
features_and_state['position'] = self._position.value
features_and_state['trade_duration'] = self.get_trade_duration()
features_and_state = pd.concat([features_window, features_and_state], axis=1)
return features_and_state
def get_trade_duration(self):
if self._last_trade_tick is None:
return 0
else:
return self._current_tick - self._last_trade_tick
def get_unrealized_profit(self):
if self._last_trade_tick is None:
return 0.
if self._position == Positions.Neutral:
return 0.
elif self._position == Positions.Short:
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
return (last_trade_price - current_price) / last_trade_price
elif self._position == Positions.Long:
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price) / last_trade_price
else:
return 0.
def is_tradesignal(self, action: int): def is_tradesignal(self, action: int):
# trade signal # trade signal
""" """
@ -228,7 +106,7 @@ class Base4ActionRLEnv(gym.Env):
(action == Actions.Neutral.value and self._position == Positions.Short) or (action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Long) or (action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Short_enter.value and self._position == Positions.Short) or (action == Actions.Short_enter.value and self._position == Positions.Short) or
(action == Actions.Short_enter.value and self._position == Positions.Long) or (action == Actions.Short_enter.value and self._position == Positions.Long) or
(action == Actions.Exit.value and self._position == Positions.Neutral) or (action == Actions.Exit.value and self._position == Positions.Neutral) or
(action == Actions.Long_enter.value and self._position == Positions.Long) or (action == Actions.Long_enter.value and self._position == Positions.Long) or
(action == Actions.Long_enter.value and self._position == Positions.Short)) (action == Actions.Long_enter.value and self._position == Positions.Short))
@ -240,7 +118,7 @@ class Base4ActionRLEnv(gym.Env):
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
""" """
# Agent should only try to exit if it is in position # Agent should only try to exit if it is in position
if action in (Actions.Exit.value): if action == Actions.Exit.value:
if self._position not in (Positions.Short, Positions.Long): if self._position not in (Positions.Short, Positions.Long):
return False return False
@ -250,97 +128,3 @@ class Base4ActionRLEnv(gym.Env):
return False return False
return True return True
def _is_trade(self, action: Actions):
return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or
(action == Actions.Short_enter.value and self._position == Positions.Neutral))
def is_hold(self, action):
return ((action == Actions.Short_enter.value and self._position == Positions.Short) or
(action == Actions.Long_enter.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Neutral))
def add_entry_fee(self, price):
return price * (1 + self.fee)
def add_exit_fee(self, price):
return price / (1 + self.fee)
def _update_history(self, info):
if not self.history:
self.history = {key: [] for key in info.keys()}
for key, value in info.items():
self.history[key].append(value)
def get_sharpe_ratio(self):
return mean_over_std(self.get_portfolio_log_returns())
@abstractmethod
def calculate_reward(self, action):
"""
Reward is created by BaseReinforcementLearningModel and can
be inherited/edited by the user made ReinforcementLearner file.
"""
return 0.
def _update_profit(self, action):
if self._is_trade(action) or self._done:
pnl = self.get_unrealized_profit()
if self._position in (Positions.Long, Positions.Short):
self._total_profit *= (1 + pnl)
self._profits.append((self._current_tick, self._total_profit))
self.close_trade_profit.append(pnl)
def most_recent_return(self, action: int):
"""
Calculate the tick to tick return if in a trade.
Return is generated from rising prices in Long
and falling prices in Short positions.
The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
"""
# Long positions
if self._position == Positions.Long:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Short
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_entry_fee(previous_price)
return np.log(current_price) - np.log(previous_price)
# Short positions
if self._position == Positions.Short:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Long
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_exit_fee(previous_price)
return np.log(previous_price) - np.log(current_price)
return 0
def get_portfolio_log_returns(self):
return self.portfolio_log_returns[1:self._current_tick + 1]
def update_portfolio_log_returns(self, action):
self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
def current_price(self) -> float:
return self.prices.iloc[self._current_tick].open
def prev_price(self) -> float:
return self.prices.iloc[self._current_tick - 1].open
def sharpe_ratio(self):
if len(self.close_trade_profit) == 0:
return 0.
returns = np.array(self.close_trade_profit)
reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
return reward

View File

@ -1,14 +1,14 @@
import logging import logging
from enum import Enum from enum import Enum
from typing import Optional
import gym
import numpy as np import numpy as np
from gym import spaces
from gym.utils import seeding
from pandas import DataFrame
import pandas as pd import pandas as pd
from abc import abstractmethod from gym import spaces
from pandas import DataFrame
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -20,70 +20,19 @@ class Actions(Enum):
Short_exit = 4 Short_exit = 4
class Positions(Enum):
Short = 0
Long = 1
Neutral = 0.5
def opposite(self):
return Positions.Short if self == Positions.Long else Positions.Long
def mean_over_std(x): def mean_over_std(x):
std = np.std(x, ddof=1) std = np.std(x, ddof=1)
mean = np.mean(x) mean = np.mean(x)
return mean / std if std > 0 else 0 return mean / std if std > 0 else 0
class Base5ActionRLEnv(gym.Env): class Base5ActionRLEnv(BaseEnvironment):
""" """
Base class for a 5 action environment Base class for a 5 action environment
""" """
metadata = {'render.modes': ['human']}
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), def set_action_space(self):
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}):
self.rl_config = config['freqai']['rl_config']
self.id = id
self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
reward_kwargs: dict, starting_point=True):
self.df = df
self.signal_features = self.df
self.prices = prices
self.window_size = window_size
self.starting_point = starting_point
self.rr = reward_kwargs["rr"]
self.profit_aim = reward_kwargs["profit_aim"]
self.fee = 0.0015
# # spaces
self.shape = (window_size, self.signal_features.shape[1] + 3)
self.action_space = spaces.Discrete(len(Actions)) self.action_space = spaces.Discrete(len(Actions))
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
# episode
self._start_tick: int = self.window_size
self._end_tick: int = len(self.prices) - 1
self._done: bool = False
self._current_tick: int = self._start_tick
self._last_trade_tick: Optional[int] = None
self._position = Positions.Neutral
self._position_history: list = [None]
self.total_reward: float = 0
self._total_profit: float = 1
self.history: dict = {}
self.trade_history: list = []
def seed(self, seed: int = 1):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self): def reset(self):
@ -106,6 +55,7 @@ class Base5ActionRLEnv(gym.Env):
self._profits = [(self._start_tick, 1)] self._profits = [(self._start_tick, 1)]
self.close_trade_profit = [] self.close_trade_profit = []
self._total_unrealized_profit = 1
return self._get_observation() return self._get_observation()
@ -118,7 +68,7 @@ class Base5ActionRLEnv(gym.Env):
self.update_portfolio_log_returns(action) self.update_portfolio_log_returns(action)
self._update_profit(action) self._update_unrealized_total_profit()
step_reward = self.calculate_reward(action) step_reward = self.calculate_reward(action)
self.total_reward += step_reward self.total_reward += step_reward
@ -148,10 +98,12 @@ class Base5ActionRLEnv(gym.Env):
trade_type = "short" trade_type = "short"
self._last_trade_tick = self._current_tick self._last_trade_tick = self._current_tick
elif action == Actions.Long_exit.value: elif action == Actions.Long_exit.value:
self._update_total_profit()
self._position = Positions.Neutral self._position = Positions.Neutral
trade_type = "neutral" trade_type = "neutral"
self._last_trade_tick = None self._last_trade_tick = None
elif action == Actions.Short_exit.value: elif action == Actions.Short_exit.value:
self._update_total_profit()
self._position = Positions.Neutral self._position = Positions.Neutral
trade_type = "neutral" trade_type = "neutral"
self._last_trade_tick = None self._last_trade_tick = None
@ -163,7 +115,8 @@ class Base5ActionRLEnv(gym.Env):
{'price': self.current_price(), 'index': self._current_tick, {'price': self.current_price(), 'index': self._current_tick,
'type': trade_type}) 'type': trade_type})
if self._total_profit < 1 - self.rl_config.get('max_training_drawdown_pct', 0.8): if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
self._done = True self._done = True
self._position_history.append(self._position) self._position_history.append(self._position)
@ -200,24 +153,6 @@ class Base5ActionRLEnv(gym.Env):
else: else:
return self._current_tick - self._last_trade_tick return self._current_tick - self._last_trade_tick
def get_unrealized_profit(self):
if self._last_trade_tick is None:
return 0.
if self._position == Positions.Neutral:
return 0.
elif self._position == Positions.Short:
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
return (last_trade_price - current_price) / last_trade_price
elif self._position == Positions.Long:
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price) / last_trade_price
else:
return 0.
def is_tradesignal(self, action: int): def is_tradesignal(self, action: int):
# trade signal # trade signal
""" """
@ -253,97 +188,3 @@ class Base5ActionRLEnv(gym.Env):
return False return False
return True return True
def _is_trade(self, action: Actions):
return ((action == Actions.Long_enter.value and self._position == Positions.Neutral) or
(action == Actions.Short_enter.value and self._position == Positions.Neutral))
def is_hold(self, action):
return ((action == Actions.Short_enter.value and self._position == Positions.Short) or
(action == Actions.Long_enter.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Neutral))
def add_entry_fee(self, price):
return price * (1 + self.fee)
def add_exit_fee(self, price):
return price / (1 + self.fee)
def _update_history(self, info):
if not self.history:
self.history = {key: [] for key in info.keys()}
for key, value in info.items():
self.history[key].append(value)
def get_sharpe_ratio(self):
return mean_over_std(self.get_portfolio_log_returns())
@abstractmethod
def calculate_reward(self, action):
"""
Reward is created by BaseReinforcementLearningModel and can
be inherited/edited by the user made ReinforcementLearner file.
"""
return 0.
def _update_profit(self, action):
if self._is_trade(action) or self._done:
pnl = self.get_unrealized_profit()
if self._position in (Positions.Long, Positions.Short):
self._total_profit *= (1 + pnl)
self._profits.append((self._current_tick, self._total_profit))
self.close_trade_profit.append(pnl)
def most_recent_return(self, action: int):
"""
Calculate the tick to tick return if in a trade.
Return is generated from rising prices in Long
and falling prices in Short positions.
The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
"""
# Long positions
if self._position == Positions.Long:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Short
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_entry_fee(previous_price)
return np.log(current_price) - np.log(previous_price)
# Short positions
if self._position == Positions.Short:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Long
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_exit_fee(previous_price)
return np.log(previous_price) - np.log(current_price)
return 0
def get_portfolio_log_returns(self):
return self.portfolio_log_returns[1:self._current_tick + 1]
def update_portfolio_log_returns(self, action):
self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
def current_price(self) -> float:
return self.prices.iloc[self._current_tick].open
def prev_price(self) -> float:
return self.prices.iloc[self._current_tick - 1].open
def sharpe_ratio(self):
if len(self.close_trade_profit) == 0:
return 0.
returns = np.array(self.close_trade_profit)
reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
return reward

View File

@ -0,0 +1,270 @@
import logging
from abc import abstractmethod
from enum import Enum
from typing import Optional
import gym
import numpy as np
import pandas as pd
from gym import spaces
from gym.utils import seeding
from pandas import DataFrame
logger = logging.getLogger(__name__)
class Positions(Enum):
Short = 0
Long = 1
Neutral = 0.5
def opposite(self):
return Positions.Short if self == Positions.Long else Positions.Long
class BaseEnvironment(gym.Env):
"""
Base class for environments. This class is agnostic to action count.
Inherited classes customize this to include varying action counts/types,
See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py
"""
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}):
self.rl_config = config['freqai']['rl_config']
self.id = id
self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
self.compound_trades = config['stake_amount'] == 'unlimited'
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
reward_kwargs: dict, starting_point=True):
self.df = df
self.signal_features = self.df
self.prices = prices
self.window_size = window_size
self.starting_point = starting_point
self.rr = reward_kwargs["rr"]
self.profit_aim = reward_kwargs["profit_aim"]
self.fee = 0.0015
# # spaces
self.shape = (window_size, self.signal_features.shape[1] + 3)
self.set_action_space()
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)
# episode
self._start_tick: int = self.window_size
self._end_tick: int = len(self.prices) - 1
self._done: bool = False
self._current_tick: int = self._start_tick
self._last_trade_tick: Optional[int] = None
self._position = Positions.Neutral
self._position_history: list = [None]
self.total_reward: float = 0
self._total_profit: float = 1
self._total_unrealized_profit: float = 1
self.history: dict = {}
self.trade_history: list = []
@abstractmethod
def set_action_space(self):
"""
Unique to the environment action count. Must be inherited.
"""
def seed(self, seed: int = 1):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def reset(self):
self._done = False
if self.starting_point is True:
self._position_history = (self._start_tick * [None]) + [self._position]
else:
self._position_history = (self.window_size * [None]) + [self._position]
self._current_tick = self._start_tick
self._last_trade_tick = None
self._position = Positions.Neutral
self.total_reward = 0.
self._total_profit = 1. # unit
self.history = {}
self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices))
self._profits = [(self._start_tick, 1)]
self.close_trade_profit = []
self._total_unrealized_profit = 1
return self._get_observation()
@abstractmethod
def step(self, action: int):
"""
Step depeneds on action types, this must be inherited.
"""
return
def _get_observation(self):
"""
This may or may not be independent of action types, user can inherit
this in their custom "MyRLEnv"
"""
features_window = self.signal_features[(
self._current_tick - self.window_size):self._current_tick]
features_and_state = DataFrame(np.zeros((len(features_window), 3)),
columns=['current_profit_pct', 'position', 'trade_duration'],
index=features_window.index)
features_and_state['current_profit_pct'] = self.get_unrealized_profit()
features_and_state['position'] = self._position.value
features_and_state['trade_duration'] = self.get_trade_duration()
features_and_state = pd.concat([features_window, features_and_state], axis=1)
return features_and_state
def get_trade_duration(self):
if self._last_trade_tick is None:
return 0
else:
return self._current_tick - self._last_trade_tick
def get_unrealized_profit(self):
if self._last_trade_tick is None:
return 0.
if self._position == Positions.Neutral:
return 0.
elif self._position == Positions.Short:
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
return (last_trade_price - current_price) / last_trade_price
elif self._position == Positions.Long:
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price) / last_trade_price
else:
return 0.
@abstractmethod
def is_tradesignal(self, action: int):
# trade signal
"""
Determine if the signal is a trade signal. This is
unique to the actions in the environment, and therefore must be
inherited.
"""
return
def _is_valid(self, action: int):
# trade signal
"""
Determine if the signal is valid.This is
unique to the actions in the environment, and therefore must be
inherited.
"""
return
def add_entry_fee(self, price):
return price * (1 + self.fee)
def add_exit_fee(self, price):
return price / (1 + self.fee)
def _update_history(self, info):
if not self.history:
self.history = {key: [] for key in info.keys()}
for key, value in info.items():
self.history[key].append(value)
@abstractmethod
def calculate_reward(self, action):
"""
Reward is created by BaseReinforcementLearningModel and can
be inherited/edited by the user made ReinforcementLearner file.
"""
return 0.
def _update_unrealized_total_profit(self):
"""
Update the unrealized total profit incase of episode end.
"""
if self._position in (Positions.Long, Positions.Short):
pnl = self.get_unrealized_profit()
if self.compound_trades:
# assumes unit stake and compounding
unrl_profit = self._total_profit * (1 + pnl)
else:
# assumes unit stake and no compounding
unrl_profit = self._total_profit + pnl
self._total_unrealized_profit = unrl_profit
def _update_total_profit(self):
pnl = self.get_unrealized_profit()
if self.compound_trades:
# assumes unite stake and compounding
self._total_profit = self._total_profit * (1 + pnl)
else:
# assumes unit stake and no compounding
self._total_profit += pnl
def most_recent_return(self, action: int):
"""
Calculate the tick to tick return if in a trade.
Return is generated from rising prices in Long
and falling prices in Short positions.
The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
"""
# Long positions
if self._position == Positions.Long:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Short
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_entry_fee(previous_price)
return np.log(current_price) - np.log(previous_price)
# Short positions
if self._position == Positions.Short:
current_price = self.prices.iloc[self._current_tick].open
previous_price = self.prices.iloc[self._current_tick - 1].open
if (self._position_history[self._current_tick - 1] == Positions.Long
or self._position_history[self._current_tick - 1] == Positions.Neutral):
previous_price = self.add_exit_fee(previous_price)
return np.log(previous_price) - np.log(current_price)
return 0
def get_portfolio_log_returns(self):
return self.portfolio_log_returns[1:self._current_tick + 1]
def update_portfolio_log_returns(self, action):
self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
def current_price(self) -> float:
return self.prices.iloc[self._current_tick].open
def prev_price(self) -> float:
return self.prices.iloc[self._current_tick - 1].open
def sharpe_ratio(self):
if len(self.close_trade_profit) == 0:
return 0.
returns = np.array(self.close_trade_profit)
reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
return reward

View File

@ -1,25 +1,28 @@
import logging import logging
from typing import Any, Dict, Tuple from abc import abstractmethod
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, Dict, Tuple
import gym
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
import torch as th
import torch.multiprocessing
from pandas import DataFrame from pandas import DataFrame
from abc import abstractmethod from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import set_random_seed
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.RL.Base5ActionRLEnv import Base5ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
from freqtrade.persistence import Trade from freqtrade.persistence import Trade
import torch.multiprocessing
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
import torch as th
from typing import Callable
from datetime import datetime, timezone
from stable_baselines3.common.utils import set_random_seed
import gym
from pathlib import Path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy('file_system') torch.multiprocessing.set_sharing_strategy('file_system')
@ -37,8 +40,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
super().__init__(config=kwargs['config']) super().__init__(config=kwargs['config'])
th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4)) th.set_num_threads(self.freqai_info['rl_config'].get('thread_count', 4))
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
self.train_env: Base5ActionRLEnv = None self.train_env: BaseEnvironment = None
self.eval_env: Base5ActionRLEnv = None self.eval_env: BaseEnvironment = None
self.eval_callback: EvalCallback = None self.eval_callback: EvalCallback = None
self.model_type = self.freqai_info['rl_config']['model_type'] self.model_type = self.freqai_info['rl_config']['model_type']
self.rl_config = self.freqai_info['rl_config'] self.rl_config = self.freqai_info['rl_config']
@ -194,7 +197,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def _predict(window): def _predict(window):
market_side, current_profit, trade_duration = self.get_state_info(dk.pair) market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
observations = dataframe.iloc[window.index] observations = dataframe.iloc[window.index]
observations['current_profit'] = current_profit observations['current_profit_pct'] = current_profit
observations['position'] = market_side observations['position'] = market_side
observations['trade_duration'] = trade_duration observations['trade_duration'] = trade_duration
res, _ = model.predict(observations, deterministic=True) res, _ = model.predict(observations, deterministic=True)
@ -306,7 +309,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return return
def make_env(MyRLEnv: Base5ActionRLEnv, env_id: str, rank: int, def make_env(MyRLEnv: BaseEnvironment, env_id: str, rank: int,
seed: int, train_df: DataFrame, price: DataFrame, seed: int, train_df: DataFrame, price: DataFrame,
reward_params: Dict[str, int], window_size: int, monitor: bool = False, reward_params: Dict[str, int], window_size: int, monitor: bool = False,
config: Dict[str, Any] = {}) -> Callable: config: Dict[str, Any] = {}) -> Callable:

View File

@ -1,19 +1,20 @@
import logging import logging
import torch as th from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union from typing import Any, Dict, List, Optional, Tuple, Type, Union
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
import gym
import torch as th
from stable_baselines3 import DQN from stable_baselines3 import DQN
from stable_baselines3.common.buffers import ReplayBuffer from stable_baselines3.common.buffers import ReplayBuffer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from pathlib import Path
from stable_baselines3.dqn.policies import (CnnPolicy, DQNPolicy, MlpPolicy,
QNetwork)
from torch import nn
import gym
from stable_baselines3.common.torch_layers import (BaseFeaturesExtractor,
FlattenExtractor)
from stable_baselines3.common.type_aliases import GymEnv, Schedule
from stable_baselines3.common.policies import BasePolicy from stable_baselines3.common.policies import BasePolicy
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor
from stable_baselines3.common.type_aliases import GymEnv, Schedule
from stable_baselines3.dqn.policies import CnnPolicy, DQNPolicy, MlpPolicy, QNetwork
from torch import nn
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@ -7,7 +7,7 @@ import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock
from typing import Any, Dict, Tuple, Optional from typing import Any, Dict, Optional, Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd

View File

@ -1,15 +1,14 @@
import logging import logging
from pathlib import Path
from typing import Any, Dict from typing import Any, Dict
import torch as th
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from pathlib import Path
# from pandas import DataFrame
# from stable_baselines3.common.callbacks import EvalCallback
# from stable_baselines3.common.monitor import Monitor
import numpy as np import numpy as np
import torch as th
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -53,7 +52,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
return model return model
class MyRLEnv(BaseReinforcementLearningModel.MyRLEnv): class MyRLEnv(Base5ActionRLEnv):
""" """
User can override any function in BaseRLEnv and gym.Env. Here the user User can override any function in BaseRLEnv and gym.Env. Here the user
sets a custom reward based on profit and trade duration. sets a custom reward based on profit and trade duration.

View File

@ -1,15 +1,16 @@
import logging import logging
from pathlib import Path
from typing import Any, Dict # , Tuple from typing import Any, Dict # , Tuple
# import numpy.typing as npt # import numpy.typing as npt
import torch as th import torch as th
from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.vec_env import SubprocVecEnv
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel, from freqtrade.freqai.RL.BaseReinforcementLearningModel import (BaseReinforcementLearningModel,
make_env) make_env)
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from pathlib import Path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -26,7 +27,7 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
# model arch # model arch
policy_kwargs = dict(activation_fn=th.nn.ReLU, policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=[256, 256]) net_arch=[256, 256, 128])
if dk.pair not in self.dd.model_dictionary or not self.continual_learning: if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs,
@ -64,9 +65,9 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
test_df = data_dictionary["test_features"] test_df = data_dictionary["test_features"]
env_id = "train_env" env_id = "train_env"
num_cpu = int(self.freqai_info["rl_config"]["thread_count"] / 2) num_cpu = int(self.freqai_info["rl_config"]["thread_count"])
self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH, self.reward_params, self.CONV_WIDTH, monitor=True,
config=self.config) for i config=self.config) for i
in range(num_cpu)]) in range(num_cpu)])