stable/freqtrade/freqai/prediction_models/RL/RLPrediction_env_v2.py

import gym
from gym import spaces
from gym.utils import seeding
from enum import Enum
from sklearn.decomposition import PCA, KernelPCA
import random
import numpy as np
import pandas as pd
from collections import deque
import matplotlib.pylab as plt
from typing import Dict, List, Tuple, Type, Optional, Any, Union, Callable
import logging

logger = logging.getLogger(__name__)

# from bokeh.io import output_notebook
# from bokeh.plotting import figure, show
# from bokeh.models import (
#     CustomJS,
#     ColumnDataSource,
#     NumeralTickFormatter,
#     Span,
#     HoverTool,
#     Range1d,
#     DatetimeTickFormatter,
#     Scatter,
#     Label, LabelSet
# )

class Actions(Enum):
    Short = 0
    Long = 1
    Neutral = 2

class Actions_v2(Enum):
    Neutral = 0
    Long_buy = 1
    Long_sell = 2
    Short_buy = 3
    Short_sell = 4


class Positions(Enum):
    Short = 0
    Long = 1
    Neutral = 0.5

    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long

def mean_over_std(x):
    std = np.std(x, ddof=1)
    mean = np.mean(x)
    return mean / std if std > 0 else 0

class DEnv(gym.Env):

    metadata = {'render.modes': ['human']}

    def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ):
        assert df.ndim == 2

        self.seed()
        self.df = df
        self.signal_features = self.df
        self.prices = prices
        self.window_size = window_size
        self.starting_point = starting_point
        self.rr = reward_kwargs["rr"]
        self.profit_aim = reward_kwargs["profit_aim"]

        self.fee=0.0015

        # # spaces
        self.shape = (window_size, self.signal_features.shape[1])
        self.action_space = spaces.Discrete(len(Actions_v2))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

        # episode
        self._start_tick = self.window_size
        self._end_tick = len(self.prices) - 1
        self._done = None
        self._current_tick = None
        self._last_trade_tick = None
        self._position = Positions.Neutral
        self._position_history = None
        self.total_reward = None
        self._total_profit = None
        self._first_rendering = None
        self.history = None
        self.trade_history = []
        
        # self.A_t, self.B_t = 0.000639, 0.00001954
        self.r_t_change = 0.
        
        self.returns_report = []


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def reset(self):
        
        self._done = False
        
        if self.starting_point == True:
            self._position_history = (self._start_tick* [None]) + [self._position]
        else:
            self._position_history = (self.window_size * [None]) + [self._position]
            
        self._current_tick = self._start_tick
        self._last_trade_tick = None
        #self._last_trade_tick = self._current_tick - 1
        self._position = Positions.Neutral
        
        self.total_reward = 0.
        self._total_profit = 1.  # unit
        self._first_rendering = True
        self.history = {}
        self.trade_history = []
        self.portfolio_log_returns = np.zeros(len(self.prices))
        
        
        self._profits = [(self._start_tick, 1)]
        self.close_trade_profit = []
        self.r_t_change = 0.
        
        self.returns_report = []
        
        return self._get_observation()


    def step(self, action):
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        self.update_portfolio_log_returns(action)
        
        self._update_profit(action)
        step_reward = self._calculate_reward(action)
        self.total_reward += step_reward

        
        trade_type = None
        if self.is_tradesignal_v2(action): # exclude 3 case not trade  
            # Update position
            """
            Action: Neutral, position: Long ->  Close Long 
            Action: Neutral, position: Short -> Close Short 
            
            Action: Long, position: Neutral -> Open Long 
            Action: Long, position: Short -> Close Short and Open Long
            
            Action: Short, position: Neutral -> Open Short 
            Action: Short, position: Long -> Close Long and Open Short
            """


            temp_position = self._position
            if action == Actions_v2.Neutral.value:
                self._position = Positions.Neutral
                trade_type = "neutral"
            elif action == Actions_v2.Long_buy.value:
                self._position = Positions.Long
                trade_type = "long"
            elif action == Actions_v2.Short_buy.value:  
                self._position = Positions.Short
                trade_type = "short"
            elif action == Actions_v2.Long_sell.value:
                self._position = Positions.Neutral
                trade_type = "neutral"
            elif action == Actions_v2.Short_sell.value:  
                self._position = Positions.Neutral
                trade_type = "neutral"
            else:
                print("case not defined")
                
            # Update last trade tick
            self._last_trade_tick = self._current_tick 

            if trade_type != None:
                self.trade_history.append(
                    {'price': self.current_price(), 'index': self._current_tick, 'type': trade_type})
                
        if self._total_profit < 0.2:
            self._done = True

        self._position_history.append(self._position)
        observation = self._get_observation()
        info = dict(
            tick = self._current_tick,
            total_reward = self.total_reward,
            total_profit = self._total_profit,
            position = self._position.value
        )
        self._update_history(info)

        return observation, step_reward, self._done, info

    
    def processState(self, state):
        return state.to_numpy()
    
    def convert_mlp_Policy(self, obs_):
        pass

    def _get_observation(self):
        return self.signal_features[(self._current_tick - self.window_size):self._current_tick]
    
    
    def get_unrealized_profit(self):
        
        if self._last_trade_tick == None:
            return 0.

        if self._position == Positions.Neutral:
            return 0.
        elif self._position == Positions.Short:
            current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
            last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
            return  (last_trade_price - current_price)/last_trade_price
        elif self._position == Positions.Long:
            current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
            last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
            return (current_price - last_trade_price)/last_trade_price
        else:
            return 0.

    
    def is_tradesignal(self, action):
        # trade signal 
        """
        not trade signal is :
        Action: Neutral, position: Neutral -> Nothing 
        Action: Long, position: Long -> Hold Long
        Action: Short, position: Short -> Hold Short
        """
        return not ((action == Actions.Neutral.value and self._position == Positions.Neutral)
                    or (action == Actions.Short.value and self._position == Positions.Short)
                    or (action == Actions.Long.value and self._position == Positions.Long))
    
    def is_tradesignal_v2(self, action):
        # trade signal 
        """
        not trade signal is :
        Action: Neutral, position: Neutral -> Nothing 
        Action: Long, position: Long -> Hold Long
        Action: Short, position: Short -> Hold Short
        """
        return not ((action == Actions_v2.Neutral.value and self._position == Positions.Neutral) or
                    (action == Actions_v2.Short_buy.value and self._position == Positions.Short) or
                    (action == Actions_v2.Short_sell.value and self._position == Positions.Short) or
                    (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or
                    (action == Actions_v2.Short_sell.value and self._position == Positions.Long) or

                    (action == Actions_v2.Long_buy.value and self._position == Positions.Long) or
                    (action == Actions_v2.Long_sell.value and self._position == Positions.Long) or
                    (action == Actions_v2.Long_buy.value and self._position == Positions.Short) or 
                    (action == Actions_v2.Long_sell.value and self._position == Positions.Short))

    
    def _is_trade(self, action: Actions):
        return ((action == Actions.Long.value and self._position == Positions.Short) or 
        (action == Actions.Short.value and self._position == Positions.Long) or 
        (action == Actions.Neutral.value and self._position == Positions.Long) or 
        (action == Actions.Neutral.value and self._position == Positions.Short)
        )

    def _is_trade_v2(self, action: Actions_v2):
        return ((action == Actions_v2.Long_buy.value and self._position == Positions.Short) or 
        (action == Actions_v2.Short_buy.value and self._position == Positions.Long) or 
        (action == Actions_v2.Neutral.value and self._position == Positions.Long) or 
        (action == Actions_v2.Neutral.value and self._position == Positions.Short) or

        (action == Actions_v2.Neutral.Short_sell and self._position == Positions.Long) or 
        (action == Actions_v2.Neutral.Long_sell and self._position == Positions.Short) 
        )
    
    
    def is_hold(self, action):
        return ((action == Actions.Short.value and self._position == Positions.Short)
                or (action == Actions.Long.value and self._position == Positions.Long))
    
    def is_hold_v2(self, action):
        return ((action == Actions_v2.Short_buy.value and self._position == Positions.Short)
                or (action == Actions_v2.Long_buy.value and self._position == Positions.Long))
    
    
    def add_buy_fee(self, price):
        return price * (1 + self.fee)

    def add_sell_fee(self, price):
        return price / (1 + self.fee)
    
    def _update_history(self, info):
        if not self.history:
            self.history = {key: [] for key in info.keys()}

        for key, value in info.items():
            self.history[key].append(value)


    def render(self, mode='human'):

        def _plot_position(position, tick):
            color = None
            if position == Positions.Short:
                color = 'red'
            elif position == Positions.Long:
                color = 'green'
            if color:
                plt.scatter(tick, self.prices.loc[tick].open, color=color)

        if self._first_rendering:
            self._first_rendering = False
            plt.cla()
            plt.plot(self.prices)
            start_position = self._position_history[self._start_tick]
            _plot_position(start_position, self._start_tick)

        plt.cla()
        plt.plot(self.prices)
        _plot_position(self._position, self._current_tick)

        plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit)
        plt.pause(0.01)


    def render_all(self):
        plt.figure()
        window_ticks = np.arange(len(self._position_history))
        plt.plot(self.prices['open'], alpha=0.5)

        short_ticks = []
        long_ticks = []
        neutral_ticks = []
        for i, tick in enumerate(window_ticks):
            if self._position_history[i] == Positions.Short:
                short_ticks.append(tick - 1)
            elif self._position_history[i] == Positions.Long:
                long_ticks.append(tick - 1)
            elif self._position_history[i] == Positions.Neutral:
                neutral_ticks.append(tick - 1)

        plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open,
                 'o', color='grey', ms=3, alpha=0.1)
        plt.plot(short_ticks, self.prices.loc[short_ticks].open,
                 'o', color='r', ms=3, alpha=0.8)
        plt.plot(long_ticks, self.prices.loc[long_ticks].open,
                 'o', color='g', ms=3, alpha=0.8)

        plt.suptitle("Generalising")
        fig = plt.gcf()
        fig.set_size_inches(15, 10)


    def close_trade_report(self):
        small_trade = 0
        positive_big_trade = 0
        negative_big_trade = 0
        small_profit = 0.003
        for i in self.close_trade_profit:
            if i < small_profit and i > -small_profit:
                small_trade+=1
            elif i > small_profit:
                positive_big_trade += 1
            elif i < -small_profit:
                negative_big_trade += 1
        print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}")


    def report(self):
        
        # get total trade
        long_trade = 0
        short_trade = 0
        neutral_trade = 0
        for trade in self.trade_history:
            if trade['type'] == 'long':
                long_trade += 1

            elif trade['type'] == 'short':
                short_trade += 1
            else:
                neutral_trade += 1
        
        negative_trade = 0
        positive_trade = 0
        for tr in self.close_trade_profit:
            if tr < 0.:
                negative_trade += 1
            
            if tr > 0.:
                positive_trade += 1
        
        total_trade_lr = negative_trade+positive_trade
                
    
        total_trade = long_trade + short_trade
        sharp_ratio = self.sharpe_ratio() 
        sharp_log = self.get_sharpe_ratio()
        
        from tabulate import tabulate
        
        headers = ["Performance", ""]
        performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)], 
                         ["Total reward", "{0:.3f}".format(self.total_reward)],
                         ["Start profit(unit)", "{0:.2f}".format(1.)],
                         ["End profit(unit)", "{0:.3f}".format(self._total_profit)],
                         ["Sharp ratio", "{0:.3f}".format(sharp_ratio)],
                         ["Sharp log", "{0:.3f}".format(sharp_log)],
                         # ["Sortino ratio", "{0:.2f}".format(0) + '%'],
                         ["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%']
                         ]
        tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center")
        print(tabulation)

        result = {
            "Start": "{0:.2f}".format(1.),
            "End": "{0:.2f}".format(self._total_profit),
            "Sharp": "{0:.3f}".format(sharp_ratio),
            "Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr)
        }
        return result
        
    def close(self):
        plt.close()
        
    def get_sharpe_ratio(self):
        return mean_over_std(self.get_portfolio_log_returns())


    def save_rendering(self, filepath):
        plt.savefig(filepath)


    def pause_rendering(self):
        plt.show()
    

    def _calculate_reward(self, action):
        # rw = self.transaction_profit_reward(action)
        #rw = self.reward_rr_profit_config(action)
        rw = self.reward_rr_profit_config_v2(action)
        return rw


    def _update_profit(self, action):
        #if self._is_trade(action) or self._done:
        if self._is_trade_v2(action) or self._done:
            pnl = self.get_unrealized_profit()

            if self._position == Positions.Long:
                self._total_profit = self._total_profit + self._total_profit*pnl
                self._profits.append((self._current_tick, self._total_profit))
                self.close_trade_profit.append(pnl)

            if self._position == Positions.Short:
                self._total_profit = self._total_profit + self._total_profit*pnl
                self._profits.append((self._current_tick, self._total_profit))
                self.close_trade_profit.append(pnl)


    def most_recent_return(self, action):
        """
        We support Long, Neutral and Short positions.
        Return is generated from rising prices in Long
        and falling prices in Short positions.
        The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
        """
        # Long positions
        if self._position == Positions.Long:
            current_price = self.prices.iloc[self._current_tick].open
            #if action == Actions.Short.value or action == Actions.Neutral.value:
            if action == Actions_v2.Short_buy.value or action == Actions_v2.Neutral.value:
                current_price = self.add_sell_fee(current_price)

            previous_price = self.prices.iloc[self._current_tick - 1].open
            
            if (self._position_history[self._current_tick - 1] == Positions.Short
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_buy_fee(previous_price)

            return np.log(current_price) - np.log(previous_price)

        # Short positions
        if self._position == Positions.Short:
            current_price = self.prices.iloc[self._current_tick].open
            #if action == Actions.Long.value or action == Actions.Neutral.value:
            if action == Actions_v2.Long_buy.value or action == Actions_v2.Neutral.value:
                current_price = self.add_buy_fee(current_price)

            previous_price = self.prices.iloc[self._current_tick - 1].open
            if (self._position_history[self._current_tick - 1] == Positions.Long
                    or self._position_history[self._current_tick - 1] == Positions.Neutral):
                previous_price = self.add_sell_fee(previous_price)

            return np.log(previous_price) - np.log(current_price)

        return 0
    
    def get_portfolio_log_returns(self):
        return self.portfolio_log_returns[1:self._current_tick + 1]
    
    
    def get_trading_log_return(self):
        return self.portfolio_log_returns[self._start_tick:]

    def update_portfolio_log_returns(self, action):
        self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
        
    def current_price(self) -> float:
        return self.prices.iloc[self._current_tick].open
    
    def prev_price(self) -> float:
        return self.prices.iloc[self._current_tick-1].open


    def sharpe_ratio(self):
        if len(self.close_trade_profit) == 0:
            return 0.
        returns = np.array(self.close_trade_profit)
        reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)
        return reward
    
    def get_bnh_log_return(self):
        return np.diff(np.log(self.prices['open'][self._start_tick:]))

        
    def transaction_profit_reward(self, action):
        rw = 0.
        
        pt  = self.prev_price()
        pt_1 = self.current_price()

        
        if self._position == Positions.Long:
            a_t = 1
        elif self._position == Positions.Short:
            a_t = -1
        else:
            a_t = 0

        # close long
        if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long:
            pt_1 = self.add_sell_fee(self.current_price())
            po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
            
            rw = a_t*(pt_1 - po)/po
            #rw = rw*2
        # close short
        elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short:
            pt_1 = self.add_buy_fee(self.current_price())
            po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
            rw = a_t*(pt_1 - po)/po
            #rw = rw*2
        else:
            rw = a_t*(pt_1 - pt)/pt
        
        return np.clip(rw, 0, 1)
    

    def reward_rr_profit_config_v2(self, action):
        rw = 0.

        pt_1 = self.current_price()
      

        if len(self.close_trade_profit) > 0:
            # long
            if self._position == Positions.Long:
                pt_1 = self.add_sell_fee(self.current_price())
                po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)

                if action == Actions_v2.Short_buy.value:
                    if self.close_trade_profit[-1] > self.profit_aim * self.rr:
                        rw = 10 * 2
                    elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:
                        rw = 10 * 1 * 1
                    elif self.close_trade_profit[-1] < 0:
                        rw = 10 * -1
                    elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 3 * -1
                
                if action == Actions_v2.Long_sell.value:
                    if self.close_trade_profit[-1] > self.profit_aim * self.rr:
                        rw = 10 * 5
                    elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:
                        rw = 10 * 1 * 3
                    elif self.close_trade_profit[-1] < 0:
                        rw = 10 * -1
                    elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 3 * -1

                if action == Actions_v2.Neutral.value:
                    if self.close_trade_profit[-1] > 0:
                        rw = 2
                    elif self.close_trade_profit[-1] < 0:
                        rw = 2 * -1
                    
            # short
            if self._position == Positions.Short:
                pt_1 = self.add_sell_fee(self.current_price())
                po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)

                if action == Actions_v2.Long_buy.value:
                    if self.close_trade_profit[-1] > self.profit_aim * self.rr:
                        rw = 10 * 2
                    elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 1 * 1
                    elif self.close_trade_profit[-1] < 0:
                        rw = 10 * -1
                    elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 3 * -1
                
                if action == Actions_v2.Short_sell.value:
                    if self.close_trade_profit[-1] > self.profit_aim * self.rr:
                        rw = 10 * 5
                    elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 1 * 3
                    elif self.close_trade_profit[-1] < 0:
                        rw = 10 * -1
                    elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:
                        rw = 10 * 3 * -1

                if action == Actions_v2.Neutral.value:
                    if self.close_trade_profit[-1] > 0:
                        rw = 2
                    elif self.close_trade_profit[-1] < 0:
                        rw = 2 * -1
        
        return np.clip(rw, 0, 1)
callback function and TDQN model added 2022-08-13 15:48:58 +00:00			`import gym`
			`from gym import spaces`
			`from gym.utils import seeding`
			`from enum import Enum`
			`from sklearn.decomposition import PCA, KernelPCA`
			`import random`
			`import numpy as np`
			`import pandas as pd`
			`from collections import deque`
			`import matplotlib.pylab as plt`
			`from typing import Dict, List, Tuple, Type, Optional, Any, Union, Callable`
			`import logging`

			`logger = logging.getLogger(__name__)`

			`# from bokeh.io import output_notebook`
			`# from bokeh.plotting import figure, show`
			`# from bokeh.models import (`
			`# CustomJS,`
			`# ColumnDataSource,`
			`# NumeralTickFormatter,`
			`# Span,`
			`# HoverTool,`
			`# Range1d,`
			`# DatetimeTickFormatter,`
			`# Scatter,`
			`# Label, LabelSet`
			`# )`

			`class Actions(Enum):`
			`Short = 0`
			`Long = 1`
			`Neutral = 2`

			`class Actions_v2(Enum):`
			`Neutral = 0`
			`Long_buy = 1`
			`Long_sell = 2`
			`Short_buy = 3`
			`Short_sell = 4`


			`class Positions(Enum):`
			`Short = 0`
			`Long = 1`
			`Neutral = 0.5`

			`def opposite(self):`
			`return Positions.Short if self == Positions.Long else Positions.Long`

			`def mean_over_std(x):`
			`std = np.std(x, ddof=1)`
			`mean = np.mean(x)`
			`return mean / std if std > 0 else 0`

			`class DEnv(gym.Env):`

			`metadata = {'render.modes': ['human']}`

			`def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, ):`
			`assert df.ndim == 2`

			`self.seed()`
			`self.df = df`
			`self.signal_features = self.df`
			`self.prices = prices`
			`self.window_size = window_size`
			`self.starting_point = starting_point`
			`self.rr = reward_kwargs["rr"]`
			`self.profit_aim = reward_kwargs["profit_aim"]`

			`self.fee=0.0015`

			`# # spaces`
			`self.shape = (window_size, self.signal_features.shape[1])`
			`self.action_space = spaces.Discrete(len(Actions_v2))`
			`self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)`

			`# episode`
			`self._start_tick = self.window_size`
			`self._end_tick = len(self.prices) - 1`
			`self._done = None`
			`self._current_tick = None`
			`self._last_trade_tick = None`
			`self._position = Positions.Neutral`
			`self._position_history = None`
			`self.total_reward = None`
			`self._total_profit = None`
			`self._first_rendering = None`
			`self.history = None`
			`self.trade_history = []`

			`# self.A_t, self.B_t = 0.000639, 0.00001954`
			`self.r_t_change = 0.`

			`self.returns_report = []`


			`def seed(self, seed=None):`
			`self.np_random, seed = seeding.np_random(seed)`
			`return [seed]`


			`def reset(self):`

			`self._done = False`

			`if self.starting_point == True:`
			`self._position_history = (self._start_tick* [None]) + [self._position]`
			`else:`
			`self._position_history = (self.window_size * [None]) + [self._position]`

			`self._current_tick = self._start_tick`
			`self._last_trade_tick = None`
			`#self._last_trade_tick = self._current_tick - 1`
			`self._position = Positions.Neutral`

			`self.total_reward = 0.`
			`self._total_profit = 1. # unit`
			`self._first_rendering = True`
			`self.history = {}`
			`self.trade_history = []`
			`self.portfolio_log_returns = np.zeros(len(self.prices))`


			`self._profits = [(self._start_tick, 1)]`
			`self.close_trade_profit = []`
			`self.r_t_change = 0.`

			`self.returns_report = []`

			`return self._get_observation()`


			`def step(self, action):`
			`self._done = False`
			`self._current_tick += 1`

			`if self._current_tick == self._end_tick:`
			`self._done = True`

			`self.update_portfolio_log_returns(action)`

			`self._update_profit(action)`
			`step_reward = self._calculate_reward(action)`
			`self.total_reward += step_reward`





			`trade_type = None`
			`if self.is_tradesignal_v2(action): # exclude 3 case not trade`
			`# Update position`
			`"""`
			`Action: Neutral, position: Long -> Close Long`
			`Action: Neutral, position: Short -> Close Short`

			`Action: Long, position: Neutral -> Open Long`
			`Action: Long, position: Short -> Close Short and Open Long`

			`Action: Short, position: Neutral -> Open Short`
			`Action: Short, position: Long -> Close Long and Open Short`
			`"""`


			`temp_position = self._position`
			`if action == Actions_v2.Neutral.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
			`elif action == Actions_v2.Long_buy.value:`
			`self._position = Positions.Long`
			`trade_type = "long"`
			`elif action == Actions_v2.Short_buy.value:`
			`self._position = Positions.Short`
			`trade_type = "short"`
			`elif action == Actions_v2.Long_sell.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
			`elif action == Actions_v2.Short_sell.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
			`else:`
			`print("case not defined")`

			`# Update last trade tick`
			`self._last_trade_tick = self._current_tick`

			`if trade_type != None:`
			`self.trade_history.append(`
			`{'price': self.current_price(), 'index': self._current_tick, 'type': trade_type})`

			`if self._total_profit < 0.2:`
			`self._done = True`

			`self._position_history.append(self._position)`
			`observation = self._get_observation()`
			`info = dict(`
			`tick = self._current_tick,`
			`total_reward = self.total_reward,`
			`total_profit = self._total_profit,`
			`position = self._position.value`
			`)`
			`self._update_history(info)`

			`return observation, step_reward, self._done, info`


			`def processState(self, state):`
			`return state.to_numpy()`

			`def convert_mlp_Policy(self, obs_):`
			`pass`

			`def _get_observation(self):`
			`return self.signal_features[(self._current_tick - self.window_size):self._current_tick]`


			`def get_unrealized_profit(self):`

			`if self._last_trade_tick == None:`
			`return 0.`

			`if self._position == Positions.Neutral:`
			`return 0.`
			`elif self._position == Positions.Short:`
			`current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)`
			`last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)`
			`return (last_trade_price - current_price)/last_trade_price`
			`elif self._position == Positions.Long:`
			`current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)`
			`last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)`
			`return (current_price - last_trade_price)/last_trade_price`
			`else:`
			`return 0.`


			`def is_tradesignal(self, action):`
			`# trade signal`
			`"""`
			`not trade signal is :`
			`Action: Neutral, position: Neutral -> Nothing`
			`Action: Long, position: Long -> Hold Long`
			`Action: Short, position: Short -> Hold Short`
			`"""`
			`return not ((action == Actions.Neutral.value and self._position == Positions.Neutral)`
			`or (action == Actions.Short.value and self._position == Positions.Short)`
			`or (action == Actions.Long.value and self._position == Positions.Long))`

			`def is_tradesignal_v2(self, action):`
			`# trade signal`
			`"""`
			`not trade signal is :`
			`Action: Neutral, position: Neutral -> Nothing`
			`Action: Long, position: Long -> Hold Long`
			`Action: Short, position: Short -> Hold Short`
			`"""`
			`return not ((action == Actions_v2.Neutral.value and self._position == Positions.Neutral) or`
			`(action == Actions_v2.Short_buy.value and self._position == Positions.Short) or`
			`(action == Actions_v2.Short_sell.value and self._position == Positions.Short) or`
			`(action == Actions_v2.Short_buy.value and self._position == Positions.Long) or`
			`(action == Actions_v2.Short_sell.value and self._position == Positions.Long) or`

			`(action == Actions_v2.Long_buy.value and self._position == Positions.Long) or`
			`(action == Actions_v2.Long_sell.value and self._position == Positions.Long) or`
			`(action == Actions_v2.Long_buy.value and self._position == Positions.Short) or`
			`(action == Actions_v2.Long_sell.value and self._position == Positions.Short))`



			`def _is_trade(self, action: Actions):`
			`return ((action == Actions.Long.value and self._position == Positions.Short) or`
			`(action == Actions.Short.value and self._position == Positions.Long) or`
			`(action == Actions.Neutral.value and self._position == Positions.Long) or`
			`(action == Actions.Neutral.value and self._position == Positions.Short)`
			`)`

			`def _is_trade_v2(self, action: Actions_v2):`
			`return ((action == Actions_v2.Long_buy.value and self._position == Positions.Short) or`
			`(action == Actions_v2.Short_buy.value and self._position == Positions.Long) or`
			`(action == Actions_v2.Neutral.value and self._position == Positions.Long) or`
			`(action == Actions_v2.Neutral.value and self._position == Positions.Short) or`

			`(action == Actions_v2.Neutral.Short_sell and self._position == Positions.Long) or`
			`(action == Actions_v2.Neutral.Long_sell and self._position == Positions.Short)`
			`)`


			`def is_hold(self, action):`
			`return ((action == Actions.Short.value and self._position == Positions.Short)`
			`or (action == Actions.Long.value and self._position == Positions.Long))`

			`def is_hold_v2(self, action):`
			`return ((action == Actions_v2.Short_buy.value and self._position == Positions.Short)`
			`or (action == Actions_v2.Long_buy.value and self._position == Positions.Long))`


			`def add_buy_fee(self, price):`
			`return price * (1 + self.fee)`

			`def add_sell_fee(self, price):`
			`return price / (1 + self.fee)`

			`def _update_history(self, info):`
			`if not self.history:`
			`self.history = {key: [] for key in info.keys()}`

			`for key, value in info.items():`
			`self.history[key].append(value)`


			`def render(self, mode='human'):`

			`def _plot_position(position, tick):`
			`color = None`
			`if position == Positions.Short:`
			`color = 'red'`
			`elif position == Positions.Long:`
			`color = 'green'`
			`if color:`
			`plt.scatter(tick, self.prices.loc[tick].open, color=color)`

			`if self._first_rendering:`
			`self._first_rendering = False`
			`plt.cla()`
			`plt.plot(self.prices)`
			`start_position = self._position_history[self._start_tick]`
			`_plot_position(start_position, self._start_tick)`

			`plt.cla()`
			`plt.plot(self.prices)`
			`_plot_position(self._position, self._current_tick)`

			`plt.suptitle("Total Reward: %.6f" % self.total_reward + ' ~ ' + "Total Profit: %.6f" % self._total_profit)`
			`plt.pause(0.01)`


			`def render_all(self):`
			`plt.figure()`
			`window_ticks = np.arange(len(self._position_history))`
			`plt.plot(self.prices['open'], alpha=0.5)`

			`short_ticks = []`
			`long_ticks = []`
			`neutral_ticks = []`
			`for i, tick in enumerate(window_ticks):`
			`if self._position_history[i] == Positions.Short:`
			`short_ticks.append(tick - 1)`
			`elif self._position_history[i] == Positions.Long:`
			`long_ticks.append(tick - 1)`
			`elif self._position_history[i] == Positions.Neutral:`
			`neutral_ticks.append(tick - 1)`

			`plt.plot(neutral_ticks, self.prices.loc[neutral_ticks].open,`
			`'o', color='grey', ms=3, alpha=0.1)`
			`plt.plot(short_ticks, self.prices.loc[short_ticks].open,`
			`'o', color='r', ms=3, alpha=0.8)`
			`plt.plot(long_ticks, self.prices.loc[long_ticks].open,`
			`'o', color='g', ms=3, alpha=0.8)`

			`plt.suptitle("Generalising")`
			`fig = plt.gcf()`
			`fig.set_size_inches(15, 10)`




			`def close_trade_report(self):`
			`small_trade = 0`
			`positive_big_trade = 0`
			`negative_big_trade = 0`
			`small_profit = 0.003`
			`for i in self.close_trade_profit:`
			`if i < small_profit and i > -small_profit:`
			`small_trade+=1`
			`elif i > small_profit:`
			`positive_big_trade += 1`
			`elif i < -small_profit:`
			`negative_big_trade += 1`
			`print(f"small trade={small_trade/len(self.close_trade_profit)}; positive_big_trade={positive_big_trade/len(self.close_trade_profit)}; negative_big_trade={negative_big_trade/len(self.close_trade_profit)}")`


			`def report(self):`

			`# get total trade`
			`long_trade = 0`
			`short_trade = 0`
			`neutral_trade = 0`
			`for trade in self.trade_history:`
			`if trade['type'] == 'long':`
			`long_trade += 1`

			`elif trade['type'] == 'short':`
			`short_trade += 1`
			`else:`
			`neutral_trade += 1`

			`negative_trade = 0`
			`positive_trade = 0`
			`for tr in self.close_trade_profit:`
			`if tr < 0.:`
			`negative_trade += 1`

			`if tr > 0.:`
			`positive_trade += 1`

			`total_trade_lr = negative_trade+positive_trade`


			`total_trade = long_trade + short_trade`
			`sharp_ratio = self.sharpe_ratio()`
			`sharp_log = self.get_sharpe_ratio()`

			`from tabulate import tabulate`

			`headers = ["Performance", ""]`
			`performanceTable = [["Total Trade", "{0:.2f}".format(total_trade)],`
			`["Total reward", "{0:.3f}".format(self.total_reward)],`
			`["Start profit(unit)", "{0:.2f}".format(1.)],`
			`["End profit(unit)", "{0:.3f}".format(self._total_profit)],`
			`["Sharp ratio", "{0:.3f}".format(sharp_ratio)],`
			`["Sharp log", "{0:.3f}".format(sharp_log)],`
			`# ["Sortino ratio", "{0:.2f}".format(0) + '%'],`
			`["winrate", "{0:.2f}".format(positive_trade*100/total_trade_lr) + '%']`
			`]`
			`tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center")`
			`print(tabulation)`

			`result = {`
			`"Start": "{0:.2f}".format(1.),`
			`"End": "{0:.2f}".format(self._total_profit),`
			`"Sharp": "{0:.3f}".format(sharp_ratio),`
			`"Winrate": "{0:.2f}".format(positive_trade*100/total_trade_lr)`
			`}`
			`return result`

			`def close(self):`
			`plt.close()`

			`def get_sharpe_ratio(self):`
			`return mean_over_std(self.get_portfolio_log_returns())`


			`def save_rendering(self, filepath):`
			`plt.savefig(filepath)`


			`def pause_rendering(self):`
			`plt.show()`


			`def _calculate_reward(self, action):`
			`# rw = self.transaction_profit_reward(action)`
			`#rw = self.reward_rr_profit_config(action)`
			`rw = self.reward_rr_profit_config_v2(action)`
			`return rw`


			`def _update_profit(self, action):`
			`#if self._is_trade(action) or self._done:`
			`if self._is_trade_v2(action) or self._done:`
			`pnl = self.get_unrealized_profit()`

			`if self._position == Positions.Long:`
			`self._total_profit = self._total_profit + self._total_profit*pnl`
			`self._profits.append((self._current_tick, self._total_profit))`
			`self.close_trade_profit.append(pnl)`

			`if self._position == Positions.Short:`
			`self._total_profit = self._total_profit + self._total_profit*pnl`
			`self._profits.append((self._current_tick, self._total_profit))`
			`self.close_trade_profit.append(pnl)`


			`def most_recent_return(self, action):`
			`"""`
			`We support Long, Neutral and Short positions.`
			`Return is generated from rising prices in Long`
			`and falling prices in Short positions.`
			`The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.`
			`"""`
			`# Long positions`
			`if self._position == Positions.Long:`
			`current_price = self.prices.iloc[self._current_tick].open`
			`#if action == Actions.Short.value or action == Actions.Neutral.value:`
			`if action == Actions_v2.Short_buy.value or action == Actions_v2.Neutral.value:`
			`current_price = self.add_sell_fee(current_price)`

			`previous_price = self.prices.iloc[self._current_tick - 1].open`

			`if (self._position_history[self._current_tick - 1] == Positions.Short`
			`or self._position_history[self._current_tick - 1] == Positions.Neutral):`
			`previous_price = self.add_buy_fee(previous_price)`

			`return np.log(current_price) - np.log(previous_price)`

			`# Short positions`
			`if self._position == Positions.Short:`
			`current_price = self.prices.iloc[self._current_tick].open`
			`#if action == Actions.Long.value or action == Actions.Neutral.value:`
			`if action == Actions_v2.Long_buy.value or action == Actions_v2.Neutral.value:`
			`current_price = self.add_buy_fee(current_price)`

			`previous_price = self.prices.iloc[self._current_tick - 1].open`
			`if (self._position_history[self._current_tick - 1] == Positions.Long`
			`or self._position_history[self._current_tick - 1] == Positions.Neutral):`
			`previous_price = self.add_sell_fee(previous_price)`

			`return np.log(previous_price) - np.log(current_price)`

			`return 0`

			`def get_portfolio_log_returns(self):`
			`return self.portfolio_log_returns[1:self._current_tick + 1]`


			`def get_trading_log_return(self):`
			`return self.portfolio_log_returns[self._start_tick:]`

			`def update_portfolio_log_returns(self, action):`
			`self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)`

			`def current_price(self) -> float:`
			`return self.prices.iloc[self._current_tick].open`

			`def prev_price(self) -> float:`
			`return self.prices.iloc[self._current_tick-1].open`



			`def sharpe_ratio(self):`
			`if len(self.close_trade_profit) == 0:`
			`return 0.`
			`returns = np.array(self.close_trade_profit)`
			`reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9)`
			`return reward`

			`def get_bnh_log_return(self):`
			`return np.diff(np.log(self.prices['open'][self._start_tick:]))`


			`def transaction_profit_reward(self, action):`
			`rw = 0.`

			`pt = self.prev_price()`
			`pt_1 = self.current_price()`


			`if self._position == Positions.Long:`
			`a_t = 1`
			`elif self._position == Positions.Short:`
			`a_t = -1`
			`else:`
			`a_t = 0`

			`# close long`
			`if (action == Actions.Short.value or action == Actions.Neutral.value) and self._position == Positions.Long:`
			`pt_1 = self.add_sell_fee(self.current_price())`
			`po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)`

			`rw = a_t*(pt_1 - po)/po`
			`#rw = rw*2`
			`# close short`
			`elif (action == Actions.Long.value or action == Actions.Neutral.value) and self._position == Positions.Short:`
			`pt_1 = self.add_buy_fee(self.current_price())`
			`po = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)`
			`rw = a_t*(pt_1 - po)/po`
			`#rw = rw*2`
			`else:`
			`rw = a_t*(pt_1 - pt)/pt`

			`return np.clip(rw, 0, 1)`



			`def reward_rr_profit_config_v2(self, action):`
			`rw = 0.`

			`pt_1 = self.current_price()`


			`if len(self.close_trade_profit) > 0:`
			`# long`
			`if self._position == Positions.Long:`
			`pt_1 = self.add_sell_fee(self.current_price())`
			`po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)`

			`if action == Actions_v2.Short_buy.value:`
			`if self.close_trade_profit[-1] > self.profit_aim * self.rr:`
			`rw = 10 * 2`
			`elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:`
			`rw = 10 * 1 * 1`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 10 * -1`
			`elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 3 * -1`

			`if action == Actions_v2.Long_sell.value:`
			`if self.close_trade_profit[-1] > self.profit_aim * self.rr:`
			`rw = 10 * 5`
			`elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < self.profit_aim * self.rr:`
			`rw = 10 * 1 * 3`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 10 * -1`
			`elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 3 * -1`

			`if action == Actions_v2.Neutral.value:`
			`if self.close_trade_profit[-1] > 0:`
			`rw = 2`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 2 * -1`

			`# short`
			`if self._position == Positions.Short:`
			`pt_1 = self.add_sell_fee(self.current_price())`
			`po = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)`

			`if action == Actions_v2.Long_buy.value:`
			`if self.close_trade_profit[-1] > self.profit_aim * self.rr:`
			`rw = 10 * 2`
			`elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 1 * 1`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 10 * -1`
			`elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 3 * -1`

			`if action == Actions_v2.Short_sell.value:`
			`if self.close_trade_profit[-1] > self.profit_aim * self.rr:`
			`rw = 10 * 5`
			`elif self.close_trade_profit[-1] > 0 and self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 1 * 3`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 10 * -1`
			`elif self.close_trade_profit[-1] < (self.profit_aim * -1) * self.rr:`
			`rw = 10 * 3 * -1`

			`if action == Actions_v2.Neutral.value:`
			`if self.close_trade_profit[-1] > 0:`
			`rw = 2`
			`elif self.close_trade_profit[-1] < 0:`
			`rw = 2 * -1`

			`return np.clip(rw, 0, 1)`