stable/freqtrade/freqai/RL/Base5ActionRLEnv.py

import logging
from enum import Enum

import numpy as np
import pandas as pd
from gym import spaces
from pandas import DataFrame

from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions


logger = logging.getLogger(__name__)


class Actions(Enum):
    Neutral = 0
    Long_enter = 1
    Long_exit = 2
    Short_enter = 3
    Short_exit = 4


def mean_over_std(x):
    std = np.std(x, ddof=1)
    mean = np.mean(x)
    return mean / std if std > 0 else 0


class Base5ActionRLEnv(BaseEnvironment):
    """
    Base class for a 5 action environment
    """

    def set_action_space(self):
        self.action_space = spaces.Discrete(len(Actions))

    def reset(self):

        self._done = False

        if self.starting_point is True:
            self._position_history = (self._start_tick * [None]) + [self._position]
        else:
            self._position_history = (self.window_size * [None]) + [self._position]

        self._current_tick = self._start_tick
        self._last_trade_tick = None
        self._position = Positions.Neutral

        self.total_reward = 0.
        self._total_profit = 1.  # unit
        self.history = {}
        self.trade_history = []
        self.portfolio_log_returns = np.zeros(len(self.prices))

        self._profits = [(self._start_tick, 1)]
        self.close_trade_profit = []
        self._total_unrealized_profit = 1

        return self._get_observation()

    def step(self, action: int):
        """
        Logic for a single step (incrementing one candle in time)
        by the agent
        :param: action: int = the action type that the agent plans
            to take for the current step.
        :returns:
            observation = current state of environment
            step_reward = the reward from `calculate_reward()`
            _done = if the agent "died" or if the candles finished
            info = dict passed back to openai gym lib
        """
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        self.update_portfolio_log_returns(action)

        self._update_unrealized_total_profit()
        step_reward = self.calculate_reward(action)
        self.total_reward += step_reward

        trade_type = None
        if self.is_tradesignal(action):
            """
            Action: Neutral, position: Long ->  Close Long
            Action: Neutral, position: Short -> Close Short

            Action: Long, position: Neutral -> Open Long
            Action: Long, position: Short -> Close Short and Open Long

            Action: Short, position: Neutral -> Open Short
            Action: Short, position: Long -> Close Long and Open Short
            """

            if action == Actions.Neutral.value:
                self._position = Positions.Neutral
                trade_type = "neutral"
                self._last_trade_tick = None
            elif action == Actions.Long_enter.value:
                self._position = Positions.Long
                trade_type = "long"
                self._last_trade_tick = self._current_tick
            elif action == Actions.Short_enter.value:
                self._position = Positions.Short
                trade_type = "short"
                self._last_trade_tick = self._current_tick
            elif action == Actions.Long_exit.value:
                self._update_total_profit()
                self._position = Positions.Neutral
                trade_type = "neutral"
                self._last_trade_tick = None
            elif action == Actions.Short_exit.value:
                self._update_total_profit()
                self._position = Positions.Neutral
                trade_type = "neutral"
                self._last_trade_tick = None
            else:
                print("case not defined")

            if trade_type is not None:
                self.trade_history.append(
                    {'price': self.current_price(), 'index': self._current_tick,
                     'type': trade_type})

        if (self._total_profit < self.max_drawdown or
                self._total_unrealized_profit < self.max_drawdown):
            self._done = True

        self._position_history.append(self._position)

        info = dict(
            tick=self._current_tick,
            total_reward=self.total_reward,
            total_profit=self._total_profit,
            position=self._position.value
        )

        observation = self._get_observation()

        self._update_history(info)

        return observation, step_reward, self._done, info

    def _get_observation(self):
        features_window = self.signal_features[(
            self._current_tick - self.window_size):self._current_tick]
        features_and_state = DataFrame(np.zeros((len(features_window), 3)),
                                       columns=['current_profit_pct', 'position', 'trade_duration'],
                                       index=features_window.index)

        features_and_state['current_profit_pct'] = self.get_unrealized_profit()
        features_and_state['position'] = self._position.value
        features_and_state['trade_duration'] = self.get_trade_duration()
        features_and_state = pd.concat([features_window, features_and_state], axis=1)
        return features_and_state

    def get_trade_duration(self):
        if self._last_trade_tick is None:
            return 0
        else:
            return self._current_tick - self._last_trade_tick

    def is_tradesignal(self, action: int):
        # trade signal
        """
        Determine if the signal is a trade signal
        e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
        """
        return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
                    (action == Actions.Neutral.value and self._position == Positions.Short) or
                    (action == Actions.Neutral.value and self._position == Positions.Long) or
                    (action == Actions.Short_enter.value and self._position == Positions.Short) or
                    (action == Actions.Short_enter.value and self._position == Positions.Long) or
                    (action == Actions.Short_exit.value and self._position == Positions.Long) or
                    (action == Actions.Short_exit.value and self._position == Positions.Neutral) or
                    (action == Actions.Long_enter.value and self._position == Positions.Long) or
                    (action == Actions.Long_enter.value and self._position == Positions.Short) or
                    (action == Actions.Long_exit.value and self._position == Positions.Short) or
                    (action == Actions.Long_exit.value and self._position == Positions.Neutral))

    def _is_valid(self, action: int):
        # trade signal
        """
        Determine if the signal is valid.
        e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
        """
        # Agent should only try to exit if it is in position
        if action in (Actions.Short_exit.value, Actions.Long_exit.value):
            if self._position not in (Positions.Short, Positions.Long):
                return False

        # Agent should only try to enter if it is not in position
        if action in (Actions.Short_enter.value, Actions.Long_enter.value):
            if self._position != Positions.Neutral:
                return False

        return True
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`import logging`
			`from enum import Enum`

			`import numpy as np`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`import pandas as pd`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`from gym import spaces`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`from pandas import DataFrame`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00
			`from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions`


restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`logger = logging.getLogger(__name__)`


			`class Actions(Enum):`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`Neutral = 0`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`Long_enter = 1`
			`Long_exit = 2`
			`Short_enter = 3`
			`Short_exit = 4`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00

			`def mean_over_std(x):`
			`std = np.std(x, ddof=1)`
			`mean = np.mean(x)`
			`return mean / std if std > 0 else 0`

fix coding convention 2022-08-18 10:01:04 +00:00
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`class Base5ActionRLEnv(BaseEnvironment):`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`"""`
			`Base class for a 5 action environment`
			`"""`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`def set_action_space(self):`
			`self.action_space = spaces.Discrete(len(Actions))`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`def reset(self):`

			`self._done = False`

			`if self.starting_point is True:`
			`self._position_history = (self._start_tick * [None]) + [self._position]`
			`else:`
			`self._position_history = (self.window_size * [None]) + [self._position]`

			`self._current_tick = self._start_tick`
			`self._last_trade_tick = None`
			`self._position = Positions.Neutral`

			`self.total_reward = 0.`
			`self._total_profit = 1. # unit`
			`self.history = {}`
			`self.trade_history = []`
			`self.portfolio_log_returns = np.zeros(len(self.prices))`

			`self._profits = [(self._start_tick, 1)]`
			`self.close_trade_profit = []`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._total_unrealized_profit = 1`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`return self._get_observation()`

5ac base fixes in logic 2022-08-17 05:36:10 +00:00			`def step(self, action: int):`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`"""`
			`Logic for a single step (incrementing one candle in time)`
			`by the agent`
			`:param: action: int = the action type that the agent plans`
			`to take for the current step.`
			`:returns:`
			`observation = current state of environment`
			step_reward = the reward from `calculate_reward()`
			`_done = if the agent "died" or if the candles finished`
			`info = dict passed back to openai gym lib`
			`"""`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._done = False`
			`self._current_tick += 1`

			`if self._current_tick == self._end_tick:`
			`self._done = True`

			`self.update_portfolio_log_returns(action)`

refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_unrealized_total_profit()`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`step_reward = self.calculate_reward(action)`
			`self.total_reward += step_reward`

			`trade_type = None`
add continual retraining feature, handly mypy typing reqs, improve docstrings 2022-08-24 10:54:02 +00:00			`if self.is_tradesignal(action):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`"""`
			`Action: Neutral, position: Long -> Close Long`
			`Action: Neutral, position: Short -> Close Short`

			`Action: Long, position: Neutral -> Open Long`
			`Action: Long, position: Short -> Close Short and Open Long`

			`Action: Short, position: Neutral -> Open Short`
			`Action: Short, position: Long -> Close Long and Open Short`
			`"""`

			`if action == Actions.Neutral.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Long_enter.value:`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._position = Positions.Long`
			`trade_type = "long"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = self._current_tick`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Short_enter.value:`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._position = Positions.Short`
			`trade_type = "short"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = self._current_tick`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Long_exit.value:`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_total_profit()`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Short_exit.value:`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_total_profit()`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`else:`
			`print("case not defined")`

			`if trade_type is not None:`
			`self.trade_history.append(`
			`{'price': self.current_price(), 'index': self._current_tick,`
			`'type': trade_type})`

refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`if (self._total_profit < self.max_drawdown or`
			`self._total_unrealized_profit < self.max_drawdown):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._done = True`

			`self._position_history.append(self._position)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`info = dict(`
			`tick=self._current_tick,`
			`total_reward=self.total_reward,`
			`total_profit=self._total_profit,`
			`position=self._position.value`
			`)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00
			`observation = self._get_observation()`

restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._update_history(info)`

			`return observation, step_reward, self._done, info`

			`def _get_observation(self):`
persist a single training environment. 2022-08-18 14:07:19 +00:00			`features_window = self.signal_features[(`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`self._current_tick - self.window_size):self._current_tick]`
add live rate, add trade duration 2022-08-23 07:44:44 +00:00			`features_and_state = DataFrame(np.zeros((len(features_window), 3)),`
			`columns=['current_profit_pct', 'position', 'trade_duration'],`
persist a single training environment. 2022-08-18 14:07:19 +00:00			`index=features_window.index)`

reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`features_and_state['current_profit_pct'] = self.get_unrealized_profit()`
			`features_and_state['position'] = self._position.value`
add live rate, add trade duration 2022-08-23 07:44:44 +00:00			`features_and_state['trade_duration'] = self.get_trade_duration()`
persist a single training environment. 2022-08-18 14:07:19 +00:00			`features_and_state = pd.concat([features_window, features_and_state], axis=1)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`return features_and_state`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
add live rate, add trade duration 2022-08-23 07:44:44 +00:00			`def get_trade_duration(self):`
			`if self._last_trade_tick is None:`
			`return 0`
			`else:`
			`return self._current_tick - self._last_trade_tick`

5ac base fixes in logic 2022-08-17 05:36:10 +00:00			`def is_tradesignal(self, action: int):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`# trade signal`
			`"""`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`Determine if the signal is a trade signal`
add continual retraining feature, handly mypy typing reqs, improve docstrings 2022-08-24 10:54:02 +00:00			`e.g.: agent wants a Actions.Long_exit while it is in a Positions.short`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`"""`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or`
5ac base fixes in logic 2022-08-17 05:36:10 +00:00			`(action == Actions.Neutral.value and self._position == Positions.Short) or`
			`(action == Actions.Neutral.value and self._position == Positions.Long) or`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`(action == Actions.Short_enter.value and self._position == Positions.Short) or`
			`(action == Actions.Short_enter.value and self._position == Positions.Long) or`
			`(action == Actions.Short_exit.value and self._position == Positions.Long) or`
			`(action == Actions.Short_exit.value and self._position == Positions.Neutral) or`
			`(action == Actions.Long_enter.value and self._position == Positions.Long) or`
			`(action == Actions.Long_enter.value and self._position == Positions.Short) or`
			`(action == Actions.Long_exit.value and self._position == Positions.Short) or`
			`(action == Actions.Long_exit.value and self._position == Positions.Neutral))`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`def _is_valid(self, action: int):`
			`# trade signal`
			`"""`
			`Determine if the signal is valid.`
			`e.g.: agent wants a Actions.Long_exit while it is in a Positions.short`
			`"""`
			`# Agent should only try to exit if it is in position`
			`if action in (Actions.Short_exit.value, Actions.Long_exit.value):`
			`if self._position not in (Positions.Short, Positions.Long):`
			`return False`

			`# Agent should only try to enter if it is not in position`
			`if action in (Actions.Short_enter.value, Actions.Long_enter.value):`
			`if self._position != Positions.Neutral:`
			`return False`

			`return True`