stable/freqtrade/freqai/RL/BaseEnvironment.py

import logging
from abc import abstractmethod
from enum import Enum
from typing import Optional

import gym
import numpy as np
import pandas as pd
from gym import spaces
from gym.utils import seeding
from pandas import DataFrame


logger = logging.getLogger(__name__)


class Positions(Enum):
    Short = 0
    Long = 1
    Neutral = 0.5

    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long


class BaseEnvironment(gym.Env):
    """
    Base class for environments. This class is agnostic to action count.
    Inherited classes customize this to include varying action counts/types,
    See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py
    """

    def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
                 reward_kwargs: dict = {}, window_size=10, starting_point=True,
                 id: str = 'baseenv-1', seed: int = 1, config: dict = {}):

        self.rl_config = config['freqai']['rl_config']
        self.id = id
        self.seed(seed)
        self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
        self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
        self.compound_trades = config['stake_amount'] == 'unlimited'

    def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
                  reward_kwargs: dict, starting_point=True):
        """
        Resets the environment when the agent fails (in our case, if the drawdown
        exceeds the user set max_training_drawdown_pct)
        """
        self.df = df
        self.signal_features = self.df
        self.prices = prices
        self.window_size = window_size
        self.starting_point = starting_point
        self.rr = reward_kwargs["rr"]
        self.profit_aim = reward_kwargs["profit_aim"]

        self.fee = 0.0015

        # # spaces
        self.shape = (window_size, self.signal_features.shape[1] + 3)
        self.set_action_space()
        self.observation_space = spaces.Box(
            low=-1, high=1, shape=self.shape, dtype=np.float32)

        # episode
        self._start_tick: int = self.window_size
        self._end_tick: int = len(self.prices) - 1
        self._done: bool = False
        self._current_tick: int = self._start_tick
        self._last_trade_tick: Optional[int] = None
        self._position = Positions.Neutral
        self._position_history: list = [None]
        self.total_reward: float = 0
        self._total_profit: float = 1
        self._total_unrealized_profit: float = 1
        self.history: dict = {}
        self.trade_history: list = []

    @abstractmethod
    def set_action_space(self):
        """
        Unique to the environment action count. Must be inherited.
        """

    def seed(self, seed: int = 1):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def reset(self):

        self._done = False

        if self.starting_point is True:
            self._position_history = (self._start_tick * [None]) + [self._position]
        else:
            self._position_history = (self.window_size * [None]) + [self._position]

        self._current_tick = self._start_tick
        self._last_trade_tick = None
        self._position = Positions.Neutral

        self.total_reward = 0.
        self._total_profit = 1.  # unit
        self.history = {}
        self.trade_history = []
        self.portfolio_log_returns = np.zeros(len(self.prices))

        self._profits = [(self._start_tick, 1)]
        self.close_trade_profit = []
        self._total_unrealized_profit = 1

        return self._get_observation()

    @abstractmethod
    def step(self, action: int):
        """
        Step depeneds on action types, this must be inherited.
        """
        return

    def _get_observation(self):
        """
        This may or may not be independent of action types, user can inherit
        this in their custom "MyRLEnv"
        """
        features_window = self.signal_features[(
            self._current_tick - self.window_size):self._current_tick]
        features_and_state = DataFrame(np.zeros((len(features_window), 3)),
                                       columns=['current_profit_pct', 'position', 'trade_duration'],
                                       index=features_window.index)

        features_and_state['current_profit_pct'] = self.get_unrealized_profit()
        features_and_state['position'] = self._position.value
        features_and_state['trade_duration'] = self.get_trade_duration()
        features_and_state = pd.concat([features_window, features_and_state], axis=1)
        return features_and_state

    def get_trade_duration(self):
        """
        Get the trade duration if the agent is in a trade
        """
        if self._last_trade_tick is None:
            return 0
        else:
            return self._current_tick - self._last_trade_tick

    def get_unrealized_profit(self):
        """
        Get the unrealized profit if the agent is in a trade
        """
        if self._last_trade_tick is None:
            return 0.

        if self._position == Positions.Neutral:
            return 0.
        elif self._position == Positions.Short:
            current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
            last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
            return (last_trade_price - current_price) / last_trade_price
        elif self._position == Positions.Long:
            current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
            last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
            return (current_price - last_trade_price) / last_trade_price
        else:
            return 0.

    @abstractmethod
    def is_tradesignal(self, action: int):
        """
        Determine if the signal is a trade signal. This is
        unique to the actions in the environment, and therefore must be
        inherited.
        """
        return

    def _is_valid(self, action: int):
        """
        Determine if the signal is valid.This is
        unique to the actions in the environment, and therefore must be
        inherited.
        """
        return

    def add_entry_fee(self, price):
        return price * (1 + self.fee)

    def add_exit_fee(self, price):
        return price / (1 + self.fee)

    def _update_history(self, info):
        if not self.history:
            self.history = {key: [] for key in info.keys()}

        for key, value in info.items():
            self.history[key].append(value)

    @abstractmethod
    def calculate_reward(self, action):
        """
        An example reward function. This is the one function that users will likely
        wish to inject their own creativity into.
        :params:
        action: int = The action made by the agent for the current candle.
        :returns:
        float = the reward to give to the agent for current step (used for optimization
            of weights in NN)
        """

    def _update_unrealized_total_profit(self):
        """
        Update the unrealized total profit incase of episode end.
        """
        if self._position in (Positions.Long, Positions.Short):
            pnl = self.get_unrealized_profit()
            if self.compound_trades:
                # assumes unit stake and compounding
                unrl_profit = self._total_profit * (1 + pnl)
            else:
                # assumes unit stake and no compounding
                unrl_profit = self._total_profit + pnl
            self._total_unrealized_profit = unrl_profit

    def _update_total_profit(self):
        pnl = self.get_unrealized_profit()
        if self.compound_trades:
            # assumes unite stake and compounding
            self._total_profit = self._total_profit * (1 + pnl)
        else:
            # assumes unit stake and no compounding
            self._total_profit += pnl

    def most_recent_return(self, action: int):
        """
        Calculate the tick to tick return if in a trade.
        Return is generated from rising prices in Long
        and falling prices in Short positions.
        The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
        """
        # Long positions
        if self._position == Positions.Long:
            current_price = self.prices.iloc[self._current_tick].open
            previous_price = self.prices.iloc[self._current_tick - 1].open

            if (self._position_history[self._current_tick - 1] == Positions.Short
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_entry_fee(previous_price)

            return np.log(current_price) - np.log(previous_price)

        # Short positions
        if self._position == Positions.Short:
            current_price = self.prices.iloc[self._current_tick].open
            previous_price = self.prices.iloc[self._current_tick - 1].open
            if (self._position_history[self._current_tick - 1] == Positions.Long
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_exit_fee(previous_price)

            return np.log(previous_price) - np.log(current_price)

        return 0

    def update_portfolio_log_returns(self, action):
        self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)

    def current_price(self) -> float:
        return self.prices.iloc[self._current_tick].open
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`import logging`
			`from abc import abstractmethod`
			`from enum import Enum`
			`from typing import Optional`

			`import gym`
			`import numpy as np`
			`import pandas as pd`
			`from gym import spaces`
			`from gym.utils import seeding`
			`from pandas import DataFrame`


			`logger = logging.getLogger(__name__)`


			`class Positions(Enum):`
			`Short = 0`
			`Long = 1`
			`Neutral = 0.5`

			`def opposite(self):`
			`return Positions.Short if self == Positions.Long else Positions.Long`


			`class BaseEnvironment(gym.Env):`
			`"""`
			`Base class for environments. This class is agnostic to action count.`
			`Inherited classes customize this to include varying action counts/types,`
			`See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py`
			`"""`

			`def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),`
			`reward_kwargs: dict = {}, window_size=10, starting_point=True,`
			`id: str = 'baseenv-1', seed: int = 1, config: dict = {}):`

			`self.rl_config = config['freqai']['rl_config']`
			`self.id = id`
			`self.seed(seed)`
			`self.reset_env(df, prices, window_size, reward_kwargs, starting_point)`
			`self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)`
			`self.compound_trades = config['stake_amount'] == 'unlimited'`

			`def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,`
			`reward_kwargs: dict, starting_point=True):`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`"""`
			`Resets the environment when the agent fails (in our case, if the drawdown`
			`exceeds the user set max_training_drawdown_pct)`
			`"""`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self.df = df`
			`self.signal_features = self.df`
			`self.prices = prices`
			`self.window_size = window_size`
			`self.starting_point = starting_point`
			`self.rr = reward_kwargs["rr"]`
			`self.profit_aim = reward_kwargs["profit_aim"]`

			`self.fee = 0.0015`

			`# # spaces`
			`self.shape = (window_size, self.signal_features.shape[1] + 3)`
			`self.set_action_space()`
			`self.observation_space = spaces.Box(`
Unnecessary lines in Base4, and changes for box space, to fit better for our needs (#7324) 2022-08-31 14:37:02 +00:00			`low=-1, high=1, shape=self.shape, dtype=np.float32)`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00
			`# episode`
			`self._start_tick: int = self.window_size`
			`self._end_tick: int = len(self.prices) - 1`
			`self._done: bool = False`
			`self._current_tick: int = self._start_tick`
			`self._last_trade_tick: Optional[int] = None`
			`self._position = Positions.Neutral`
			`self._position_history: list = [None]`
			`self.total_reward: float = 0`
			`self._total_profit: float = 1`
			`self._total_unrealized_profit: float = 1`
			`self.history: dict = {}`
			`self.trade_history: list = []`

			`@abstractmethod`
			`def set_action_space(self):`
			`"""`
			`Unique to the environment action count. Must be inherited.`
			`"""`

			`def seed(self, seed: int = 1):`
			`self.np_random, seed = seeding.np_random(seed)`
			`return [seed]`

			`def reset(self):`

			`self._done = False`

			`if self.starting_point is True:`
			`self._position_history = (self._start_tick * [None]) + [self._position]`
			`else:`
			`self._position_history = (self.window_size * [None]) + [self._position]`

			`self._current_tick = self._start_tick`
			`self._last_trade_tick = None`
			`self._position = Positions.Neutral`

			`self.total_reward = 0.`
			`self._total_profit = 1. # unit`
			`self.history = {}`
			`self.trade_history = []`
			`self.portfolio_log_returns = np.zeros(len(self.prices))`

			`self._profits = [(self._start_tick, 1)]`
			`self.close_trade_profit = []`
			`self._total_unrealized_profit = 1`

			`return self._get_observation()`

			`@abstractmethod`
			`def step(self, action: int):`
			`"""`
			`Step depeneds on action types, this must be inherited.`
			`"""`
			`return`

			`def _get_observation(self):`
			`"""`
			`This may or may not be independent of action types, user can inherit`
			`this in their custom "MyRLEnv"`
			`"""`
			`features_window = self.signal_features[(`
			`self._current_tick - self.window_size):self._current_tick]`
			`features_and_state = DataFrame(np.zeros((len(features_window), 3)),`
			`columns=['current_profit_pct', 'position', 'trade_duration'],`
			`index=features_window.index)`

			`features_and_state['current_profit_pct'] = self.get_unrealized_profit()`
			`features_and_state['position'] = self._position.value`
			`features_and_state['trade_duration'] = self.get_trade_duration()`
			`features_and_state = pd.concat([features_window, features_and_state], axis=1)`
			`return features_and_state`

			`def get_trade_duration(self):`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`"""`
			`Get the trade duration if the agent is in a trade`
			`"""`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`if self._last_trade_tick is None:`
			`return 0`
			`else:`
			`return self._current_tick - self._last_trade_tick`

			`def get_unrealized_profit(self):`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`"""`
			`Get the unrealized profit if the agent is in a trade`
			`"""`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`if self._last_trade_tick is None:`
			`return 0.`

			`if self._position == Positions.Neutral:`
			`return 0.`
			`elif self._position == Positions.Short:`
			`current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)`
			`last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)`
			`return (last_trade_price - current_price) / last_trade_price`
			`elif self._position == Positions.Long:`
			`current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)`
			`last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)`
			`return (current_price - last_trade_price) / last_trade_price`
			`else:`
			`return 0.`

			`@abstractmethod`
			`def is_tradesignal(self, action: int):`
			`"""`
			`Determine if the signal is a trade signal. This is`
			`unique to the actions in the environment, and therefore must be`
			`inherited.`
			`"""`
			`return`

			`def _is_valid(self, action: int):`
			`"""`
			`Determine if the signal is valid.This is`
			`unique to the actions in the environment, and therefore must be`
			`inherited.`
			`"""`
			`return`

			`def add_entry_fee(self, price):`
			`return price * (1 + self.fee)`

			`def add_exit_fee(self, price):`
			`return price / (1 + self.fee)`

			`def _update_history(self, info):`
			`if not self.history:`
			`self.history = {key: [] for key in info.keys()}`

			`for key, value in info.items():`
			`self.history[key].append(value)`

			`@abstractmethod`
			`def calculate_reward(self, action):`
			`"""`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`An example reward function. This is the one function that users will likely`
			`wish to inject their own creativity into.`
			`:params:`
			`action: int = The action made by the agent for the current candle.`
			`:returns:`
			`float = the reward to give to the agent for current step (used for optimization`
			`of weights in NN)`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`"""`

			`def _update_unrealized_total_profit(self):`
			`"""`
			`Update the unrealized total profit incase of episode end.`
			`"""`
			`if self._position in (Positions.Long, Positions.Short):`
			`pnl = self.get_unrealized_profit()`
			`if self.compound_trades:`
			`# assumes unit stake and compounding`
			`unrl_profit = self._total_profit * (1 + pnl)`
			`else:`
			`# assumes unit stake and no compounding`
			`unrl_profit = self._total_profit + pnl`
			`self._total_unrealized_profit = unrl_profit`

			`def _update_total_profit(self):`
			`pnl = self.get_unrealized_profit()`
			`if self.compound_trades:`
			`# assumes unite stake and compounding`
			`self._total_profit = self._total_profit * (1 + pnl)`
			`else:`
			`# assumes unit stake and no compounding`
			`self._total_profit += pnl`

			`def most_recent_return(self, action: int):`
			`"""`
			`Calculate the tick to tick return if in a trade.`
			`Return is generated from rising prices in Long`
			`and falling prices in Short positions.`
			`The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.`
			`"""`
			`# Long positions`
			`if self._position == Positions.Long:`
			`current_price = self.prices.iloc[self._current_tick].open`
			`previous_price = self.prices.iloc[self._current_tick - 1].open`

			`if (self._position_history[self._current_tick - 1] == Positions.Short`
			`or self._position_history[self._current_tick - 1] == Positions.Neutral):`
			`previous_price = self.add_entry_fee(previous_price)`

			`return np.log(current_price) - np.log(previous_price)`

			`# Short positions`
			`if self._position == Positions.Short:`
			`current_price = self.prices.iloc[self._current_tick].open`
			`previous_price = self.prices.iloc[self._current_tick - 1].open`
			`if (self._position_history[self._current_tick - 1] == Positions.Long`
			`or self._position_history[self._current_tick - 1] == Positions.Neutral):`
			`previous_price = self.add_exit_fee(previous_price)`

			`return np.log(previous_price) - np.log(current_price)`

			`return 0`

			`def update_portfolio_log_returns(self, action):`
			`self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)`

			`def current_price(self) -> float:`
			`return self.prices.iloc[self._current_tick].open`