stable/freqtrade/freqai/RL/Base5ActionRLEnv.py

import logging
from enum import Enum

from gym import spaces

from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions


logger = logging.getLogger(__name__)


class Actions(Enum):
    Neutral = 0
    Long_enter = 1
    Long_exit = 2
    Short_enter = 3
    Short_exit = 4


class Base5ActionRLEnv(BaseEnvironment):
    """
    Base class for a 5 action environment
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.actions = Actions

    def set_action_space(self):
        self.action_space = spaces.Discrete(len(Actions))

    def step(self, action: int):
        """
        Logic for a single step (incrementing one candle in time)
        by the agent
        :param: action: int = the action type that the agent plans
            to take for the current step.
        :returns:
            observation = current state of environment
            step_reward = the reward from `calculate_reward()`
            _done = if the agent "died" or if the candles finished
            info = dict passed back to openai gym lib
        """
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        self._update_unrealized_total_profit()
        step_reward = self.calculate_reward(action)
        self.total_reward += step_reward
        self.tensorboard_log(self.actions._member_names_[action], category="actions")

        trade_type = None
        if self.is_tradesignal(action):

            if action == Actions.Neutral.value:
                self._position = Positions.Neutral
                trade_type = "neutral"
                self._last_trade_tick = None
            elif action == Actions.Long_enter.value:
                self._position = Positions.Long
                trade_type = "enter_long"
                self._last_trade_tick = self._current_tick
            elif action == Actions.Short_enter.value:
                self._position = Positions.Short
                trade_type = "enter_short"
                self._last_trade_tick = self._current_tick
            elif action == Actions.Long_exit.value:
                self._update_total_profit()
                self._position = Positions.Neutral
                trade_type = "exit_long"
                self._last_trade_tick = None
            elif action == Actions.Short_exit.value:
                self._update_total_profit()
                self._position = Positions.Neutral
                trade_type = "exit_short"
                self._last_trade_tick = None
            else:
                print("case not defined")

            if trade_type is not None:
                self.trade_history.append(
                    {'price': self.current_price(), 'index': self._current_tick,
                     'type': trade_type, 'profit': self.get_unrealized_profit()})

        if (self._total_profit < self.max_drawdown or
                self._total_unrealized_profit < self.max_drawdown):
            self._done = True

        self._position_history.append(self._position)

        info = dict(
            tick=self._current_tick,
            action=action,
            total_reward=self.total_reward,
            total_profit=self._total_profit,
            position=self._position.value,
            trade_duration=self.get_trade_duration(),
            current_profit_pct=self.get_unrealized_profit()
        )

        observation = self._get_observation()

        self._update_history(info)

        return observation, step_reward, self._done, info

    def is_tradesignal(self, action: int) -> bool:
        """
        Determine if the signal is a trade signal
        e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
        """
        return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
                    (action == Actions.Neutral.value and self._position == Positions.Short) or
                    (action == Actions.Neutral.value and self._position == Positions.Long) or
                    (action == Actions.Short_enter.value and self._position == Positions.Short) or
                    (action == Actions.Short_enter.value and self._position == Positions.Long) or
                    (action == Actions.Short_exit.value and self._position == Positions.Long) or
                    (action == Actions.Short_exit.value and self._position == Positions.Neutral) or
                    (action == Actions.Long_enter.value and self._position == Positions.Long) or
                    (action == Actions.Long_enter.value and self._position == Positions.Short) or
                    (action == Actions.Long_exit.value and self._position == Positions.Short) or
                    (action == Actions.Long_exit.value and self._position == Positions.Neutral))

    def _is_valid(self, action: int) -> bool:
        # trade signal
        """
        Determine if the signal is valid.
        e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
        """
        # Agent should only try to exit if it is in position
        if action in (Actions.Short_exit.value, Actions.Long_exit.value):
            if self._position not in (Positions.Short, Positions.Long):
                return False

        # Agent should only try to enter if it is not in position
        if action in (Actions.Short_enter.value, Actions.Long_enter.value):
            if self._position != Positions.Neutral:
                return False

        return True
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`import logging`
			`from enum import Enum`

			`from gym import spaces`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00
			`from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions`


restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`logger = logging.getLogger(__name__)`


			`class Actions(Enum):`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`Neutral = 0`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`Long_enter = 1`
			`Long_exit = 2`
			`Short_enter = 3`
			`Short_exit = 4`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00

refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`class Base5ActionRLEnv(BaseEnvironment):`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`"""`
			`Base class for a 5 action environment`
			`"""`
reorganize/generalize tensorboard callback 2022-12-04 12:54:30 +00:00			`def __init__(self, **kwargs):`
			`super().__init__(**kwargs)`
			`self.actions = Actions`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`def set_action_space(self):`
			`self.action_space = spaces.Discrete(len(Actions))`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
5ac base fixes in logic 2022-08-17 05:36:10 +00:00			`def step(self, action: int):`
improve typing, improve docstrings, ensure global tests pass 2022-09-23 17:17:27 +00:00			`"""`
			`Logic for a single step (incrementing one candle in time)`
			`by the agent`
			`:param: action: int = the action type that the agent plans`
			`to take for the current step.`
			`:returns:`
			`observation = current state of environment`
			step_reward = the reward from `calculate_reward()`
			`_done = if the agent "died" or if the candles finished`
			`info = dict passed back to openai gym lib`
			`"""`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._done = False`
			`self._current_tick += 1`

			`if self._current_tick == self._end_tick:`
			`self._done = True`

refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_unrealized_total_profit()`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`step_reward = self.calculate_reward(action)`
			`self.total_reward += step_reward`
add tensorboard category 2023-03-11 22:32:55 +00:00			`self.tensorboard_log(self.actions._member_names_[action], category="actions")`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`trade_type = None`
add continual retraining feature, handly mypy typing reqs, improve docstrings 2022-08-24 10:54:02 +00:00			`if self.is_tradesignal(action):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`if action == Actions.Neutral.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Long_enter.value:`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._position = Positions.Long`
make trade_type value more explicit, add profit to trade_history dict 2023-04-01 08:05:58 +00:00			`trade_type = "enter_long"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = self._current_tick`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Short_enter.value:`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._position = Positions.Short`
make trade_type value more explicit, add profit to trade_history dict 2023-04-01 08:05:58 +00:00			`trade_type = "enter_short"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = self._current_tick`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Long_exit.value:`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_total_profit()`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`self._position = Positions.Neutral`
make trade_type value more explicit, add profit to trade_history dict 2023-04-01 08:05:58 +00:00			`trade_type = "exit_long"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`elif action == Actions.Short_exit.value:`
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`self._update_total_profit()`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`self._position = Positions.Neutral`
make trade_type value more explicit, add profit to trade_history dict 2023-04-01 08:05:58 +00:00			`trade_type = "exit_short"`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`self._last_trade_tick = None`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`else:`
			`print("case not defined")`

			`if trade_type is not None:`
			`self.trade_history.append(`
			`{'price': self.current_price(), 'index': self._current_tick,`
make trade_type value more explicit, add profit to trade_history dict 2023-04-01 08:05:58 +00:00			`'type': trade_type, 'profit': self.get_unrealized_profit()})`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
refactor environment inheritence tree to accommodate flexible action types/counts. fix bug in train profit handling 2022-08-28 17:21:57 +00:00			`if (self._total_profit < self.max_drawdown or`
			`self._total_unrealized_profit < self.max_drawdown):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._done = True`

			`self._position_history.append(self._position)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`info = dict(`
			`tick=self._current_tick,`
reorganize/generalize tensorboard callback 2022-12-04 12:54:30 +00:00			`action=action,`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`total_reward=self.total_reward,`
			`total_profit=self._total_profit,`
reorganize/generalize tensorboard callback 2022-12-04 12:54:30 +00:00			`position=self._position.value,`
			`trade_duration=self.get_trade_duration(),`
			`current_profit_pct=self.get_unrealized_profit()`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00
			`observation = self._get_observation()`

restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`self._update_history(info)`

			`return observation, step_reward, self._done, info`

Improve typehints / reduce warnings from mypy 2022-11-26 12:03:07 +00:00			`def is_tradesignal(self, action: int) -> bool:`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`"""`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`Determine if the signal is a trade signal`
add continual retraining feature, handly mypy typing reqs, improve docstrings 2022-08-24 10:54:02 +00:00			`e.g.: agent wants a Actions.Long_exit while it is in a Positions.short`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`"""`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or`
5ac base fixes in logic 2022-08-17 05:36:10 +00:00			`(action == Actions.Neutral.value and self._position == Positions.Short) or`
			`(action == Actions.Neutral.value and self._position == Positions.Long) or`
improve nomenclature and fix short exit bug 2022-08-19 09:04:15 +00:00			`(action == Actions.Short_enter.value and self._position == Positions.Short) or`
			`(action == Actions.Short_enter.value and self._position == Positions.Long) or`
			`(action == Actions.Short_exit.value and self._position == Positions.Long) or`
			`(action == Actions.Short_exit.value and self._position == Positions.Neutral) or`
			`(action == Actions.Long_enter.value and self._position == Positions.Long) or`
			`(action == Actions.Long_enter.value and self._position == Positions.Short) or`
			`(action == Actions.Long_exit.value and self._position == Positions.Short) or`
			`(action == Actions.Long_exit.value and self._position == Positions.Neutral))`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
ensure typing, remove unsued code 2022-11-26 11:11:59 +00:00			`def _is_valid(self, action: int) -> bool:`
improve default reward, fix bugs in environment 2022-08-24 16:32:40 +00:00			`# trade signal`
			`"""`
			`Determine if the signal is valid.`
			`e.g.: agent wants a Actions.Long_exit while it is in a Positions.short`
			`"""`
			`# Agent should only try to exit if it is in position`
			`if action in (Actions.Short_exit.value, Actions.Long_exit.value):`
			`if self._position not in (Positions.Short, Positions.Long):`
			`return False`

			`# Agent should only try to enter if it is not in position`
			`if action in (Actions.Short_enter.value, Actions.Long_enter.value):`
			`if self._position != Positions.Neutral:`
			`return False`

			`return True`