Merge pull request #7289 from freqtrade/feat/freqai-rl-dev
Add reinforcement learning module to FreqAI
This commit is contained in:
135
freqtrade/freqai/RL/Base4ActionRLEnv.py
Normal file
135
freqtrade/freqai/RL/Base4ActionRLEnv.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
|
||||
from gym import spaces
|
||||
|
||||
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Actions(Enum):
|
||||
Neutral = 0
|
||||
Exit = 1
|
||||
Long_enter = 2
|
||||
Short_enter = 3
|
||||
|
||||
|
||||
class Base4ActionRLEnv(BaseEnvironment):
|
||||
"""
|
||||
Base class for a 4 action environment
|
||||
"""
|
||||
|
||||
def set_action_space(self):
|
||||
self.action_space = spaces.Discrete(len(Actions))
|
||||
|
||||
def step(self, action: int):
|
||||
"""
|
||||
Logic for a single step (incrementing one candle in time)
|
||||
by the agent
|
||||
:param: action: int = the action type that the agent plans
|
||||
to take for the current step.
|
||||
:returns:
|
||||
observation = current state of environment
|
||||
step_reward = the reward from `calculate_reward()`
|
||||
_done = if the agent "died" or if the candles finished
|
||||
info = dict passed back to openai gym lib
|
||||
"""
|
||||
self._done = False
|
||||
self._current_tick += 1
|
||||
|
||||
if self._current_tick == self._end_tick:
|
||||
self._done = True
|
||||
|
||||
self._update_unrealized_total_profit()
|
||||
|
||||
step_reward = self.calculate_reward(action)
|
||||
self.total_reward += step_reward
|
||||
|
||||
trade_type = None
|
||||
if self.is_tradesignal(action):
|
||||
"""
|
||||
Action: Neutral, position: Long -> Close Long
|
||||
Action: Neutral, position: Short -> Close Short
|
||||
|
||||
Action: Long, position: Neutral -> Open Long
|
||||
Action: Long, position: Short -> Close Short and Open Long
|
||||
|
||||
Action: Short, position: Neutral -> Open Short
|
||||
Action: Short, position: Long -> Close Long and Open Short
|
||||
"""
|
||||
|
||||
if action == Actions.Neutral.value:
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
elif action == Actions.Long_enter.value:
|
||||
self._position = Positions.Long
|
||||
trade_type = "long"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif action == Actions.Short_enter.value:
|
||||
self._position = Positions.Short
|
||||
trade_type = "short"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif action == Actions.Exit.value:
|
||||
self._update_total_profit()
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
else:
|
||||
print("case not defined")
|
||||
|
||||
if trade_type is not None:
|
||||
self.trade_history.append(
|
||||
{'price': self.current_price(), 'index': self._current_tick,
|
||||
'type': trade_type})
|
||||
|
||||
if self._total_profit < 1 - self.rl_config.get('max_training_drawdown_pct', 0.8):
|
||||
self._done = True
|
||||
|
||||
self._position_history.append(self._position)
|
||||
|
||||
info = dict(
|
||||
tick=self._current_tick,
|
||||
total_reward=self.total_reward,
|
||||
total_profit=self._total_profit,
|
||||
position=self._position.value
|
||||
)
|
||||
|
||||
observation = self._get_observation()
|
||||
|
||||
self._update_history(info)
|
||||
|
||||
return observation, step_reward, self._done, info
|
||||
|
||||
def is_tradesignal(self, action: int) -> bool:
|
||||
"""
|
||||
Determine if the signal is a trade signal
|
||||
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
|
||||
"""
|
||||
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
|
||||
(action == Actions.Neutral.value and self._position == Positions.Short) or
|
||||
(action == Actions.Neutral.value and self._position == Positions.Long) or
|
||||
(action == Actions.Short_enter.value and self._position == Positions.Short) or
|
||||
(action == Actions.Short_enter.value and self._position == Positions.Long) or
|
||||
(action == Actions.Exit.value and self._position == Positions.Neutral) or
|
||||
(action == Actions.Long_enter.value and self._position == Positions.Long) or
|
||||
(action == Actions.Long_enter.value and self._position == Positions.Short))
|
||||
|
||||
def _is_valid(self, action: int) -> bool:
|
||||
"""
|
||||
Determine if the signal is valid.
|
||||
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
|
||||
"""
|
||||
# Agent should only try to exit if it is in position
|
||||
if action == Actions.Exit.value:
|
||||
if self._position not in (Positions.Short, Positions.Long):
|
||||
return False
|
||||
|
||||
# Agent should only try to enter if it is not in position
|
||||
if action in (Actions.Short_enter.value, Actions.Long_enter.value):
|
||||
if self._position != Positions.Neutral:
|
||||
return False
|
||||
|
||||
return True
|
145
freqtrade/freqai/RL/Base5ActionRLEnv.py
Normal file
145
freqtrade/freqai/RL/Base5ActionRLEnv.py
Normal file
@@ -0,0 +1,145 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
|
||||
from gym import spaces
|
||||
|
||||
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Actions(Enum):
|
||||
Neutral = 0
|
||||
Long_enter = 1
|
||||
Long_exit = 2
|
||||
Short_enter = 3
|
||||
Short_exit = 4
|
||||
|
||||
|
||||
class Base5ActionRLEnv(BaseEnvironment):
|
||||
"""
|
||||
Base class for a 5 action environment
|
||||
"""
|
||||
|
||||
def set_action_space(self):
|
||||
self.action_space = spaces.Discrete(len(Actions))
|
||||
|
||||
def step(self, action: int):
|
||||
"""
|
||||
Logic for a single step (incrementing one candle in time)
|
||||
by the agent
|
||||
:param: action: int = the action type that the agent plans
|
||||
to take for the current step.
|
||||
:returns:
|
||||
observation = current state of environment
|
||||
step_reward = the reward from `calculate_reward()`
|
||||
_done = if the agent "died" or if the candles finished
|
||||
info = dict passed back to openai gym lib
|
||||
"""
|
||||
self._done = False
|
||||
self._current_tick += 1
|
||||
|
||||
if self._current_tick == self._end_tick:
|
||||
self._done = True
|
||||
|
||||
self._update_unrealized_total_profit()
|
||||
step_reward = self.calculate_reward(action)
|
||||
self.total_reward += step_reward
|
||||
|
||||
trade_type = None
|
||||
if self.is_tradesignal(action):
|
||||
"""
|
||||
Action: Neutral, position: Long -> Close Long
|
||||
Action: Neutral, position: Short -> Close Short
|
||||
|
||||
Action: Long, position: Neutral -> Open Long
|
||||
Action: Long, position: Short -> Close Short and Open Long
|
||||
|
||||
Action: Short, position: Neutral -> Open Short
|
||||
Action: Short, position: Long -> Close Long and Open Short
|
||||
"""
|
||||
|
||||
if action == Actions.Neutral.value:
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
elif action == Actions.Long_enter.value:
|
||||
self._position = Positions.Long
|
||||
trade_type = "long"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif action == Actions.Short_enter.value:
|
||||
self._position = Positions.Short
|
||||
trade_type = "short"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif action == Actions.Long_exit.value:
|
||||
self._update_total_profit()
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
elif action == Actions.Short_exit.value:
|
||||
self._update_total_profit()
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
else:
|
||||
print("case not defined")
|
||||
|
||||
if trade_type is not None:
|
||||
self.trade_history.append(
|
||||
{'price': self.current_price(), 'index': self._current_tick,
|
||||
'type': trade_type})
|
||||
|
||||
if (self._total_profit < self.max_drawdown or
|
||||
self._total_unrealized_profit < self.max_drawdown):
|
||||
self._done = True
|
||||
|
||||
self._position_history.append(self._position)
|
||||
|
||||
info = dict(
|
||||
tick=self._current_tick,
|
||||
total_reward=self.total_reward,
|
||||
total_profit=self._total_profit,
|
||||
position=self._position.value
|
||||
)
|
||||
|
||||
observation = self._get_observation()
|
||||
|
||||
self._update_history(info)
|
||||
|
||||
return observation, step_reward, self._done, info
|
||||
|
||||
def is_tradesignal(self, action: int) -> bool:
|
||||
"""
|
||||
Determine if the signal is a trade signal
|
||||
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
|
||||
"""
|
||||
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
|
||||
(action == Actions.Neutral.value and self._position == Positions.Short) or
|
||||
(action == Actions.Neutral.value and self._position == Positions.Long) or
|
||||
(action == Actions.Short_enter.value and self._position == Positions.Short) or
|
||||
(action == Actions.Short_enter.value and self._position == Positions.Long) or
|
||||
(action == Actions.Short_exit.value and self._position == Positions.Long) or
|
||||
(action == Actions.Short_exit.value and self._position == Positions.Neutral) or
|
||||
(action == Actions.Long_enter.value and self._position == Positions.Long) or
|
||||
(action == Actions.Long_enter.value and self._position == Positions.Short) or
|
||||
(action == Actions.Long_exit.value and self._position == Positions.Short) or
|
||||
(action == Actions.Long_exit.value and self._position == Positions.Neutral))
|
||||
|
||||
def _is_valid(self, action: int) -> bool:
|
||||
# trade signal
|
||||
"""
|
||||
Determine if the signal is valid.
|
||||
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
|
||||
"""
|
||||
# Agent should only try to exit if it is in position
|
||||
if action in (Actions.Short_exit.value, Actions.Long_exit.value):
|
||||
if self._position not in (Positions.Short, Positions.Long):
|
||||
return False
|
||||
|
||||
# Agent should only try to enter if it is not in position
|
||||
if action in (Actions.Short_enter.value, Actions.Long_enter.value):
|
||||
if self._position != Positions.Neutral:
|
||||
return False
|
||||
|
||||
return True
|
302
freqtrade/freqai/RL/BaseEnvironment.py
Normal file
302
freqtrade/freqai/RL/BaseEnvironment.py
Normal file
@@ -0,0 +1,302 @@
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.data.dataprovider import DataProvider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Positions(Enum):
|
||||
Short = 0
|
||||
Long = 1
|
||||
Neutral = 0.5
|
||||
|
||||
def opposite(self):
|
||||
return Positions.Short if self == Positions.Long else Positions.Long
|
||||
|
||||
|
||||
class BaseEnvironment(gym.Env):
|
||||
"""
|
||||
Base class for environments. This class is agnostic to action count.
|
||||
Inherited classes customize this to include varying action counts/types,
|
||||
See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py
|
||||
"""
|
||||
|
||||
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
|
||||
reward_kwargs: dict = {}, window_size=10, starting_point=True,
|
||||
id: str = 'baseenv-1', seed: int = 1, config: dict = {},
|
||||
dp: Optional[DataProvider] = None):
|
||||
"""
|
||||
Initializes the training/eval environment.
|
||||
:param df: dataframe of features
|
||||
:param prices: dataframe of prices to be used in the training environment
|
||||
:param window_size: size of window (temporal) to pass to the agent
|
||||
:param reward_kwargs: extra config settings assigned by user in `rl_config`
|
||||
:param starting_point: start at edge of window or not
|
||||
:param id: string id of the environment (used in backend for multiprocessed env)
|
||||
:param seed: Sets the seed of the environment higher in the gym.Env object
|
||||
:param config: Typical user configuration file
|
||||
:param dp: dataprovider from freqtrade
|
||||
"""
|
||||
self.config = config
|
||||
self.rl_config = config['freqai']['rl_config']
|
||||
self.add_state_info = self.rl_config.get('add_state_info', False)
|
||||
self.id = id
|
||||
self.seed(seed)
|
||||
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
|
||||
self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
|
||||
self.compound_trades = config['stake_amount'] == 'unlimited'
|
||||
if self.config.get('fee', None) is not None:
|
||||
self.fee = self.config['fee']
|
||||
elif dp is not None:
|
||||
self.fee = dp._exchange.get_fee(symbol=dp.current_whitelist()[0]) # type: ignore
|
||||
else:
|
||||
self.fee = 0.0015
|
||||
|
||||
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
|
||||
reward_kwargs: dict, starting_point=True):
|
||||
"""
|
||||
Resets the environment when the agent fails (in our case, if the drawdown
|
||||
exceeds the user set max_training_drawdown_pct)
|
||||
:param df: dataframe of features
|
||||
:param prices: dataframe of prices to be used in the training environment
|
||||
:param window_size: size of window (temporal) to pass to the agent
|
||||
:param reward_kwargs: extra config settings assigned by user in `rl_config`
|
||||
:param starting_point: start at edge of window or not
|
||||
"""
|
||||
self.df = df
|
||||
self.signal_features = self.df
|
||||
self.prices = prices
|
||||
self.window_size = window_size
|
||||
self.starting_point = starting_point
|
||||
self.rr = reward_kwargs["rr"]
|
||||
self.profit_aim = reward_kwargs["profit_aim"]
|
||||
|
||||
# # spaces
|
||||
if self.add_state_info:
|
||||
self.total_features = self.signal_features.shape[1] + 3
|
||||
else:
|
||||
self.total_features = self.signal_features.shape[1]
|
||||
self.shape = (window_size, self.total_features)
|
||||
self.set_action_space()
|
||||
self.observation_space = spaces.Box(
|
||||
low=-1, high=1, shape=self.shape, dtype=np.float32)
|
||||
|
||||
# episode
|
||||
self._start_tick: int = self.window_size
|
||||
self._end_tick: int = len(self.prices) - 1
|
||||
self._done: bool = False
|
||||
self._current_tick: int = self._start_tick
|
||||
self._last_trade_tick: Optional[int] = None
|
||||
self._position = Positions.Neutral
|
||||
self._position_history: list = [None]
|
||||
self.total_reward: float = 0
|
||||
self._total_profit: float = 1
|
||||
self._total_unrealized_profit: float = 1
|
||||
self.history: dict = {}
|
||||
self.trade_history: list = []
|
||||
|
||||
@abstractmethod
|
||||
def set_action_space(self):
|
||||
"""
|
||||
Unique to the environment action count. Must be inherited.
|
||||
"""
|
||||
|
||||
def seed(self, seed: int = 1):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def reset(self):
|
||||
|
||||
self._done = False
|
||||
|
||||
if self.starting_point is True:
|
||||
self._position_history = (self._start_tick * [None]) + [self._position]
|
||||
else:
|
||||
self._position_history = (self.window_size * [None]) + [self._position]
|
||||
|
||||
self._current_tick = self._start_tick
|
||||
self._last_trade_tick = None
|
||||
self._position = Positions.Neutral
|
||||
|
||||
self.total_reward = 0.
|
||||
self._total_profit = 1. # unit
|
||||
self.history = {}
|
||||
self.trade_history = []
|
||||
self.portfolio_log_returns = np.zeros(len(self.prices))
|
||||
|
||||
self._profits = [(self._start_tick, 1)]
|
||||
self.close_trade_profit = []
|
||||
self._total_unrealized_profit = 1
|
||||
|
||||
return self._get_observation()
|
||||
|
||||
@abstractmethod
|
||||
def step(self, action: int):
|
||||
"""
|
||||
Step depeneds on action types, this must be inherited.
|
||||
"""
|
||||
return
|
||||
|
||||
def _get_observation(self):
|
||||
"""
|
||||
This may or may not be independent of action types, user can inherit
|
||||
this in their custom "MyRLEnv"
|
||||
"""
|
||||
features_window = self.signal_features[(
|
||||
self._current_tick - self.window_size):self._current_tick]
|
||||
if self.add_state_info:
|
||||
features_and_state = DataFrame(np.zeros((len(features_window), 3)),
|
||||
columns=['current_profit_pct',
|
||||
'position',
|
||||
'trade_duration'],
|
||||
index=features_window.index)
|
||||
|
||||
features_and_state['current_profit_pct'] = self.get_unrealized_profit()
|
||||
features_and_state['position'] = self._position.value
|
||||
features_and_state['trade_duration'] = self.get_trade_duration()
|
||||
features_and_state = pd.concat([features_window, features_and_state], axis=1)
|
||||
return features_and_state
|
||||
else:
|
||||
return features_window
|
||||
|
||||
def get_trade_duration(self):
|
||||
"""
|
||||
Get the trade duration if the agent is in a trade
|
||||
"""
|
||||
if self._last_trade_tick is None:
|
||||
return 0
|
||||
else:
|
||||
return self._current_tick - self._last_trade_tick
|
||||
|
||||
def get_unrealized_profit(self):
|
||||
"""
|
||||
Get the unrealized profit if the agent is in a trade
|
||||
"""
|
||||
if self._last_trade_tick is None:
|
||||
return 0.
|
||||
|
||||
if self._position == Positions.Neutral:
|
||||
return 0.
|
||||
elif self._position == Positions.Short:
|
||||
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
|
||||
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
|
||||
return (last_trade_price - current_price) / last_trade_price
|
||||
elif self._position == Positions.Long:
|
||||
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
|
||||
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
|
||||
return (current_price - last_trade_price) / last_trade_price
|
||||
else:
|
||||
return 0.
|
||||
|
||||
@abstractmethod
|
||||
def is_tradesignal(self, action: int) -> bool:
|
||||
"""
|
||||
Determine if the signal is a trade signal. This is
|
||||
unique to the actions in the environment, and therefore must be
|
||||
inherited.
|
||||
"""
|
||||
return True
|
||||
|
||||
def _is_valid(self, action: int) -> bool:
|
||||
"""
|
||||
Determine if the signal is valid.This is
|
||||
unique to the actions in the environment, and therefore must be
|
||||
inherited.
|
||||
"""
|
||||
return True
|
||||
|
||||
def add_entry_fee(self, price):
|
||||
return price * (1 + self.fee)
|
||||
|
||||
def add_exit_fee(self, price):
|
||||
return price / (1 + self.fee)
|
||||
|
||||
def _update_history(self, info):
|
||||
if not self.history:
|
||||
self.history = {key: [] for key in info.keys()}
|
||||
|
||||
for key, value in info.items():
|
||||
self.history[key].append(value)
|
||||
|
||||
@abstractmethod
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
"""
|
||||
An example reward function. This is the one function that users will likely
|
||||
wish to inject their own creativity into.
|
||||
:param action: int = The action made by the agent for the current candle.
|
||||
:return:
|
||||
float = the reward to give to the agent for current step (used for optimization
|
||||
of weights in NN)
|
||||
"""
|
||||
|
||||
def _update_unrealized_total_profit(self):
|
||||
"""
|
||||
Update the unrealized total profit incase of episode end.
|
||||
"""
|
||||
if self._position in (Positions.Long, Positions.Short):
|
||||
pnl = self.get_unrealized_profit()
|
||||
if self.compound_trades:
|
||||
# assumes unit stake and compounding
|
||||
unrl_profit = self._total_profit * (1 + pnl)
|
||||
else:
|
||||
# assumes unit stake and no compounding
|
||||
unrl_profit = self._total_profit + pnl
|
||||
self._total_unrealized_profit = unrl_profit
|
||||
|
||||
def _update_total_profit(self):
|
||||
pnl = self.get_unrealized_profit()
|
||||
if self.compound_trades:
|
||||
# assumes unit stake and compounding
|
||||
self._total_profit = self._total_profit * (1 + pnl)
|
||||
else:
|
||||
# assumes unit stake and no compounding
|
||||
self._total_profit += pnl
|
||||
|
||||
def current_price(self) -> float:
|
||||
return self.prices.iloc[self._current_tick].open
|
||||
|
||||
# Keeping around incase we want to start building more complex environment
|
||||
# templates in the future.
|
||||
# def most_recent_return(self):
|
||||
# """
|
||||
# Calculate the tick to tick return if in a trade.
|
||||
# Return is generated from rising prices in Long
|
||||
# and falling prices in Short positions.
|
||||
# The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee.
|
||||
# """
|
||||
# # Long positions
|
||||
# if self._position == Positions.Long:
|
||||
# current_price = self.prices.iloc[self._current_tick].open
|
||||
# previous_price = self.prices.iloc[self._current_tick - 1].open
|
||||
|
||||
# if (self._position_history[self._current_tick - 1] == Positions.Short
|
||||
# or self._position_history[self._current_tick - 1] == Positions.Neutral):
|
||||
# previous_price = self.add_entry_fee(previous_price)
|
||||
|
||||
# return np.log(current_price) - np.log(previous_price)
|
||||
|
||||
# # Short positions
|
||||
# if self._position == Positions.Short:
|
||||
# current_price = self.prices.iloc[self._current_tick].open
|
||||
# previous_price = self.prices.iloc[self._current_tick - 1].open
|
||||
# if (self._position_history[self._current_tick - 1] == Positions.Long
|
||||
# or self._position_history[self._current_tick - 1] == Positions.Neutral):
|
||||
# previous_price = self.add_exit_fee(previous_price)
|
||||
|
||||
# return np.log(previous_price) - np.log(current_price)
|
||||
|
||||
# return 0
|
||||
|
||||
# def update_portfolio_log_returns(self, action):
|
||||
# self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action)
|
395
freqtrade/freqai/RL/BaseReinforcementLearningModel.py
Normal file
395
freqtrade/freqai/RL/BaseReinforcementLearningModel.py
Normal file
@@ -0,0 +1,395 @@
|
||||
import importlib
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
import torch as th
|
||||
import torch.multiprocessing
|
||||
from pandas import DataFrame
|
||||
from stable_baselines3.common.callbacks import EvalCallback
|
||||
from stable_baselines3.common.monitor import Monitor
|
||||
from stable_baselines3.common.utils import set_random_seed
|
||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv
|
||||
from freqtrade.freqai.RL.BaseEnvironment import Positions
|
||||
from freqtrade.persistence import Trade
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
torch.multiprocessing.set_sharing_strategy('file_system')
|
||||
|
||||
SB3_MODELS = ['PPO', 'A2C', 'DQN']
|
||||
SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO']
|
||||
|
||||
|
||||
class BaseReinforcementLearningModel(IFreqaiModel):
|
||||
"""
|
||||
User created Reinforcement Learning Model prediction class
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(config=kwargs['config'])
|
||||
self.max_threads = min(self.freqai_info['rl_config'].get(
|
||||
'cpu_count', 1), max(int(self.max_system_threads / 2), 1))
|
||||
th.set_num_threads(self.max_threads)
|
||||
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
|
||||
self.train_env: Union[SubprocVecEnv, gym.Env] = None
|
||||
self.eval_env: Union[SubprocVecEnv, gym.Env] = None
|
||||
self.eval_callback: Optional[EvalCallback] = None
|
||||
self.model_type = self.freqai_info['rl_config']['model_type']
|
||||
self.rl_config = self.freqai_info['rl_config']
|
||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||
if self.model_type in SB3_MODELS:
|
||||
import_str = 'stable_baselines3'
|
||||
elif self.model_type in SB3_CONTRIB_MODELS:
|
||||
import_str = 'sb3_contrib'
|
||||
else:
|
||||
raise OperationalException(f'{self.model_type} not available in stable_baselines3 or '
|
||||
f'sb3_contrib. please choose one of {SB3_MODELS} or '
|
||||
f'{SB3_CONTRIB_MODELS}')
|
||||
|
||||
mod = importlib.import_module(import_str, self.model_type)
|
||||
self.MODELCLASS = getattr(mod, self.model_type)
|
||||
self.policy_type = self.freqai_info['rl_config']['policy_type']
|
||||
self.unset_outlier_removal()
|
||||
self.net_arch = self.rl_config.get('net_arch', [128, 128])
|
||||
|
||||
def unset_outlier_removal(self):
|
||||
"""
|
||||
If user has activated any function that may remove training points, this
|
||||
function will set them to false and warn them
|
||||
"""
|
||||
if self.ft_params.get('use_SVM_to_remove_outliers', False):
|
||||
self.ft_params.update({'use_SVM_to_remove_outliers': False})
|
||||
logger.warning('User tried to use SVM with RL. Deactivating SVM.')
|
||||
if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
|
||||
self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
|
||||
logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
|
||||
if self.freqai_info['data_split_parameters'].get('shuffle', False):
|
||||
self.freqai_info['data_split_parameters'].update({'shuffle': False})
|
||||
logger.warning('User tried to shuffle training data. Setting shuffle to False')
|
||||
|
||||
def train(
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:returns:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("--------------------Starting training " f"{pair} --------------------")
|
||||
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
|
||||
features_filtered, labels_filtered)
|
||||
dk.fit_labels() # FIXME useless for now, but just satiating append methods
|
||||
|
||||
# normalize all data based on train_dataset only
|
||||
prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
||||
# data cleaning/analysis
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
)
|
||||
|
||||
self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk)
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame],
|
||||
prices_train: DataFrame, prices_test: DataFrame,
|
||||
dk: FreqaiDataKitchen):
|
||||
"""
|
||||
User can override this if they are using a custom MyRLEnv
|
||||
:param data_dictionary: dict = common data dictionary containing train and test
|
||||
features/labels/weights.
|
||||
:param prices_train/test: DataFrame = dataframe comprised of the prices to be used in the
|
||||
environment during training or testing
|
||||
:param dk: FreqaiDataKitchen = the datakitchen for the current pair
|
||||
"""
|
||||
train_df = data_dictionary["train_features"]
|
||||
test_df = data_dictionary["test_features"]
|
||||
|
||||
self.train_env = self.MyRLEnv(df=train_df,
|
||||
prices=prices_train,
|
||||
window_size=self.CONV_WIDTH,
|
||||
reward_kwargs=self.reward_params,
|
||||
config=self.config,
|
||||
dp=self.data_provider)
|
||||
self.eval_env = Monitor(self.MyRLEnv(df=test_df,
|
||||
prices=prices_test,
|
||||
window_size=self.CONV_WIDTH,
|
||||
reward_kwargs=self.reward_params,
|
||||
config=self.config,
|
||||
dp=self.data_provider))
|
||||
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
||||
render=False, eval_freq=len(train_df),
|
||||
best_model_save_path=str(dk.data_path))
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs):
|
||||
"""
|
||||
Agent customizations and abstract Reinforcement Learning customizations
|
||||
go in here. Abstract method, so this function must be overridden by
|
||||
user class.
|
||||
"""
|
||||
return
|
||||
|
||||
def get_state_info(self, pair: str) -> Tuple[float, float, int]:
|
||||
"""
|
||||
State info during dry/live (not backtesting) which is fed back
|
||||
into the model.
|
||||
:param pair: str = COIN/STAKE to get the environment information for
|
||||
:return:
|
||||
:market_side: float = representing short, long, or neutral for
|
||||
pair
|
||||
:current_profit: float = unrealized profit of the current trade
|
||||
:trade_duration: int = the number of candles that the trade has
|
||||
been open for
|
||||
"""
|
||||
open_trades = Trade.get_trades_proxy(is_open=True)
|
||||
market_side = 0.5
|
||||
current_profit: float = 0
|
||||
trade_duration = 0
|
||||
for trade in open_trades:
|
||||
if trade.pair == pair:
|
||||
if self.data_provider._exchange is None: # type: ignore
|
||||
logger.error('No exchange available.')
|
||||
return 0, 0, 0
|
||||
else:
|
||||
current_rate = self.data_provider._exchange.get_rate( # type: ignore
|
||||
pair, refresh=False, side="exit", is_short=trade.is_short)
|
||||
|
||||
now = datetime.now(timezone.utc).timestamp()
|
||||
trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
|
||||
current_profit = trade.calc_profit_ratio(current_rate)
|
||||
|
||||
return market_side, current_profit, int(trade_duration)
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_dataframe, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
|
||||
dk.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_predict(dk)
|
||||
|
||||
pred_df = self.rl_model_predict(
|
||||
dk.data_dictionary["prediction_features"], dk, self.model)
|
||||
pred_df.fillna(0, inplace=True)
|
||||
|
||||
return (pred_df, dk.do_predict)
|
||||
|
||||
def rl_model_predict(self, dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, model: Any) -> DataFrame:
|
||||
"""
|
||||
A helper function to make predictions in the Reinforcement learning module.
|
||||
:param dataframe: DataFrame = the dataframe of features to make the predictions on
|
||||
:param dk: FreqaiDatakitchen = data kitchen for the current pair
|
||||
:param model: Any = the trained model used to inference the features.
|
||||
"""
|
||||
output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list)
|
||||
|
||||
def _predict(window):
|
||||
observations = dataframe.iloc[window.index]
|
||||
if self.live and self.rl_config.get('add_state_info', False):
|
||||
market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
|
||||
observations['current_profit_pct'] = current_profit
|
||||
observations['position'] = market_side
|
||||
observations['trade_duration'] = trade_duration
|
||||
res, _ = model.predict(observations, deterministic=True)
|
||||
return res
|
||||
|
||||
output = output.rolling(window=self.CONV_WIDTH).apply(_predict)
|
||||
|
||||
return output
|
||||
|
||||
def build_ohlc_price_dataframes(self, data_dictionary: dict,
|
||||
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
|
||||
DataFrame]:
|
||||
"""
|
||||
Builds the train prices and test prices for the environment.
|
||||
"""
|
||||
|
||||
pair = pair.replace(':', '')
|
||||
train_df = data_dictionary["train_features"]
|
||||
test_df = data_dictionary["test_features"]
|
||||
|
||||
# price data for model training and evaluation
|
||||
tf = self.config['timeframe']
|
||||
ohlc_list = [f'%-{pair}raw_open_{tf}', f'%-{pair}raw_low_{tf}',
|
||||
f'%-{pair}raw_high_{tf}', f'%-{pair}raw_close_{tf}']
|
||||
rename_dict = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
|
||||
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
|
||||
|
||||
prices_train = train_df.filter(ohlc_list, axis=1)
|
||||
if prices_train.empty:
|
||||
raise OperationalException('Reinforcement learning module didnt find the raw prices '
|
||||
'assigned in populate_any_indicators. Please assign them '
|
||||
'with:\n'
|
||||
'informative[f"%-{pair}raw_close"] = informative["close"]\n'
|
||||
'informative[f"%-{pair}raw_open"] = informative["open"]\n'
|
||||
'informative[f"%-{pair}raw_high"] = informative["high"]\n'
|
||||
'informative[f"%-{pair}raw_low"] = informative["low"]\n')
|
||||
prices_train.rename(columns=rename_dict, inplace=True)
|
||||
prices_train.reset_index(drop=True)
|
||||
|
||||
prices_test = test_df.filter(ohlc_list, axis=1)
|
||||
prices_test.rename(columns=rename_dict, inplace=True)
|
||||
prices_test.reset_index(drop=True)
|
||||
|
||||
return prices_train, prices_test
|
||||
|
||||
def load_model_from_disk(self, dk: FreqaiDataKitchen) -> Any:
|
||||
"""
|
||||
Can be used by user if they are trying to limit_ram_usage *and*
|
||||
perform continual learning.
|
||||
For now, this is unused.
|
||||
"""
|
||||
exists = Path(dk.data_path / f"{dk.model_filename}_model").is_file()
|
||||
if exists:
|
||||
model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
|
||||
else:
|
||||
logger.info('No model file on disk to continue learning from.')
|
||||
|
||||
return model
|
||||
|
||||
def _on_stop(self):
|
||||
"""
|
||||
Hook called on bot shutdown. Close SubprocVecEnv subprocesses for clean shutdown.
|
||||
"""
|
||||
|
||||
if self.train_env:
|
||||
self.train_env.close()
|
||||
|
||||
if self.eval_env:
|
||||
self.eval_env.close()
|
||||
|
||||
# Nested class which can be overridden by user to customize further
|
||||
class MyRLEnv(Base5ActionRLEnv):
|
||||
"""
|
||||
User can override any function in BaseRLEnv and gym.Env. Here the user
|
||||
sets a custom reward based on profit and trade duration.
|
||||
"""
|
||||
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
"""
|
||||
An example reward function. This is the one function that users will likely
|
||||
wish to inject their own creativity into.
|
||||
:param action: int = The action made by the agent for the current candle.
|
||||
:return:
|
||||
float = the reward to give to the agent for current step (used for optimization
|
||||
of weights in NN)
|
||||
"""
|
||||
# first, penalize if the action is not valid
|
||||
if not self._is_valid(action):
|
||||
return -2
|
||||
|
||||
pnl = self.get_unrealized_profit()
|
||||
factor = 100.
|
||||
|
||||
# reward agent for entering trades
|
||||
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
|
||||
and self._position == Positions.Neutral):
|
||||
return 25
|
||||
# discourage agent from not entering trades
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
return -1
|
||||
|
||||
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
|
||||
if self._last_trade_tick:
|
||||
trade_duration = self._current_tick - self._last_trade_tick
|
||||
else:
|
||||
trade_duration = 0
|
||||
|
||||
if trade_duration <= max_trade_duration:
|
||||
factor *= 1.5
|
||||
elif trade_duration > max_trade_duration:
|
||||
factor *= 0.5
|
||||
|
||||
# discourage sitting in position
|
||||
if (self._position in (Positions.Short, Positions.Long) and
|
||||
action == Actions.Neutral.value):
|
||||
return -1 * trade_duration / max_trade_duration
|
||||
|
||||
# close long
|
||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
# close short
|
||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
return 0.
|
||||
|
||||
|
||||
def make_env(MyRLEnv: Type[gym.Env], env_id: str, rank: int,
|
||||
seed: int, train_df: DataFrame, price: DataFrame,
|
||||
reward_params: Dict[str, int], window_size: int, monitor: bool = False,
|
||||
config: Dict[str, Any] = {}) -> Callable:
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
:param env_id: (str) the environment ID
|
||||
:param num_env: (int) the number of environment you wish to have in subprocesses
|
||||
:param seed: (int) the inital seed for RNG
|
||||
:param rank: (int) index of the subprocess
|
||||
:return: (Callable)
|
||||
"""
|
||||
|
||||
def _init() -> gym.Env:
|
||||
|
||||
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
|
||||
reward_kwargs=reward_params, id=env_id, seed=seed + rank, config=config)
|
||||
if monitor:
|
||||
env = Monitor(env)
|
||||
return env
|
||||
set_random_seed(seed)
|
||||
return _init
|
0
freqtrade/freqai/RL/__init__.py
Normal file
0
freqtrade/freqai/RL/__init__.py
Normal file
@@ -1,4 +1,5 @@
|
||||
import collections
|
||||
import importlib
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
@@ -98,6 +99,12 @@ class FreqaiDataDrawer:
|
||||
self.empty_pair_dict: pair_info = {
|
||||
"model_filename": "", "trained_timestamp": 0,
|
||||
"data_path": "", "extras": {}}
|
||||
if 'Reinforcement' in self.config['freqaimodel']:
|
||||
self.model_type = 'stable_baselines'
|
||||
logger.warning('User passed a ReinforcementLearner model, FreqAI will '
|
||||
'now use stable_baselines3 to save models.')
|
||||
else:
|
||||
self.model_type = self.freqai_info.get('model_save_type', 'joblib')
|
||||
|
||||
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
|
||||
"""
|
||||
@@ -476,10 +483,12 @@ class FreqaiDataDrawer:
|
||||
save_path = Path(dk.data_path)
|
||||
|
||||
# Save the trained model
|
||||
if not dk.keras:
|
||||
if self.model_type == 'joblib':
|
||||
dump(model, save_path / f"{dk.model_filename}_model.joblib")
|
||||
else:
|
||||
elif self.model_type == 'keras':
|
||||
model.save(save_path / f"{dk.model_filename}_model.h5")
|
||||
elif 'stable_baselines' in self.model_type:
|
||||
model.save(save_path / f"{dk.model_filename}_model.zip")
|
||||
|
||||
if dk.svm_model is not None:
|
||||
dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib")
|
||||
@@ -506,11 +515,10 @@ class FreqaiDataDrawer:
|
||||
dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb")
|
||||
)
|
||||
|
||||
# if self.live:
|
||||
# store as much in ram as possible to increase performance
|
||||
self.model_dictionary[coin] = model
|
||||
self.pair_dict[coin]["model_filename"] = dk.model_filename
|
||||
self.pair_dict[coin]["data_path"] = str(dk.data_path)
|
||||
|
||||
if coin not in self.meta_data_dictionary:
|
||||
self.meta_data_dictionary[coin] = {}
|
||||
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
|
||||
@@ -542,14 +550,6 @@ class FreqaiDataDrawer:
|
||||
if dk.live:
|
||||
dk.model_filename = self.pair_dict[coin]["model_filename"]
|
||||
dk.data_path = Path(self.pair_dict[coin]["data_path"])
|
||||
if self.freqai_info.get("follow_mode", False):
|
||||
# follower can be on a different system which is rsynced from the leader:
|
||||
dk.data_path = Path(
|
||||
self.config["user_data_dir"]
|
||||
/ "models"
|
||||
/ dk.data_path.parts[-2]
|
||||
/ dk.data_path.parts[-1]
|
||||
)
|
||||
|
||||
if coin in self.meta_data_dictionary:
|
||||
dk.data = self.meta_data_dictionary[coin]["meta_data"]
|
||||
@@ -568,12 +568,16 @@ class FreqaiDataDrawer:
|
||||
# try to access model in memory instead of loading object from disk to save time
|
||||
if dk.live and coin in self.model_dictionary:
|
||||
model = self.model_dictionary[coin]
|
||||
elif not dk.keras:
|
||||
elif self.model_type == 'joblib':
|
||||
model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
|
||||
else:
|
||||
elif self.model_type == 'keras':
|
||||
from tensorflow import keras
|
||||
|
||||
model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
|
||||
elif self.model_type == 'stable_baselines':
|
||||
mod = importlib.import_module(
|
||||
'stable_baselines3', self.freqai_info['rl_config']['model_type'])
|
||||
MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
|
||||
model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
|
||||
|
||||
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
|
||||
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
|
||||
@@ -583,6 +587,10 @@ class FreqaiDataDrawer:
|
||||
f"Unable to load model, ensure model exists at " f"{dk.data_path} "
|
||||
)
|
||||
|
||||
# load it into ram if it was loaded from disk
|
||||
if coin not in self.model_dictionary:
|
||||
self.model_dictionary[coin] = model
|
||||
|
||||
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
|
||||
dk.pca = cloudpickle.load(
|
||||
open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb")
|
||||
|
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
import psutil
|
||||
from pandas import DataFrame
|
||||
from scipy import stats
|
||||
from sklearn import linear_model
|
||||
@@ -102,7 +103,10 @@ class FreqaiDataKitchen:
|
||||
)
|
||||
|
||||
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
|
||||
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
|
||||
if not self.freqai_config.get("data_kitchen_thread_count", 0):
|
||||
self.thread_count = max(int(psutil.cpu_count() * 2 - 2), 1)
|
||||
else:
|
||||
self.thread_count = self.freqai_config["data_kitchen_thread_count"]
|
||||
self.train_dates: DataFrame = pd.DataFrame()
|
||||
self.unique_classes: Dict[str, list] = {}
|
||||
self.unique_class_list: list = []
|
||||
|
@@ -5,15 +5,17 @@ from abc import ABC, abstractmethod
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, Dict, List, Literal, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psutil
|
||||
from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.data.dataprovider import DataProvider
|
||||
from freqtrade.enums import RunMode
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
@@ -98,6 +100,8 @@ class IFreqaiModel(ABC):
|
||||
self.get_corr_dataframes: bool = True
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
self.data_provider: Optional[DataProvider] = None
|
||||
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
|
||||
|
||||
record_params(config, self.full_path)
|
||||
|
||||
@@ -126,6 +130,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
|
||||
self.dd.set_pair_dict_info(metadata)
|
||||
self.data_provider = strategy.dp
|
||||
|
||||
if self.live:
|
||||
self.inference_timer('start')
|
||||
@@ -164,6 +169,13 @@ class IFreqaiModel(ABC):
|
||||
self.model = None
|
||||
self.dk = None
|
||||
|
||||
def _on_stop(self):
|
||||
"""
|
||||
Callback for Subclasses to override to include logic for shutting down resources
|
||||
when SIGINT is sent.
|
||||
"""
|
||||
return
|
||||
|
||||
def shutdown(self):
|
||||
"""
|
||||
Cleans up threads on Shutdown, set stop event. Join threads to wait
|
||||
@@ -172,6 +184,9 @@ class IFreqaiModel(ABC):
|
||||
logger.info("Stopping FreqAI")
|
||||
self._stop_event.set()
|
||||
|
||||
self.data_provider = None
|
||||
self._on_stop()
|
||||
|
||||
logger.info("Waiting on Training iteration")
|
||||
for _thread in self._threads:
|
||||
_thread.join()
|
||||
|
141
freqtrade/freqai/prediction_models/ReinforcementLearner.py
Normal file
141
freqtrade/freqai/prediction_models/ReinforcementLearner.py
Normal file
@@ -0,0 +1,141 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch as th
|
||||
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
|
||||
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReinforcementLearner(BaseReinforcementLearningModel):
|
||||
"""
|
||||
Reinforcement Learning Model prediction model.
|
||||
|
||||
Users can inherit from this class to make their own RL model with custom
|
||||
environment/training controls. Define the file as follows:
|
||||
|
||||
```
|
||||
from freqtrade.freqai.prediction_models.ReinforcementLearner import ReinforcementLearner
|
||||
|
||||
class MyCoolRLModel(ReinforcementLearner):
|
||||
```
|
||||
|
||||
Save the file to `user_data/freqaimodels`, then run it with:
|
||||
|
||||
freqtrade trade --freqaimodel MyCoolRLModel --config config.json --strategy SomeCoolStrat
|
||||
|
||||
Here the users can override any of the functions
|
||||
available in the `IFreqaiModel` inheritance tree. Most importantly for RL, this
|
||||
is where the user overrides `MyRLEnv` (see below), to define custom
|
||||
`calculate_reward()` function, or to override any other parts of the environment.
|
||||
|
||||
This class also allows users to override any other part of the IFreqaiModel tree.
|
||||
For example, the user can override `def fit()` or `def train()` or `def predict()`
|
||||
to take fine-tuned control over these processes.
|
||||
|
||||
Another common override may be `def data_cleaning_predict()` where the user can
|
||||
take fine-tuned control over the data handling pipeline.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs):
|
||||
"""
|
||||
User customizable fit method
|
||||
:param data_dictionary: dict = common data dictionary containing all train/test
|
||||
features/labels/weights.
|
||||
:param dk: FreqaiDatakitchen = data kitchen for current pair.
|
||||
:return:
|
||||
model Any = trained model to be used for inference in dry/live/backtesting
|
||||
"""
|
||||
train_df = data_dictionary["train_features"]
|
||||
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
|
||||
|
||||
policy_kwargs = dict(activation_fn=th.nn.ReLU,
|
||||
net_arch=self.net_arch)
|
||||
|
||||
if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
|
||||
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs,
|
||||
tensorboard_log=Path(
|
||||
dk.full_path / "tensorboard" / dk.pair.split('/')[0]),
|
||||
**self.freqai_info['model_training_parameters']
|
||||
)
|
||||
else:
|
||||
logger.info('Continual training activated - starting training from previously '
|
||||
'trained agent.')
|
||||
model = self.dd.model_dictionary[dk.pair]
|
||||
model.set_env(self.train_env)
|
||||
|
||||
model.learn(
|
||||
total_timesteps=int(total_timesteps),
|
||||
callback=self.eval_callback
|
||||
)
|
||||
|
||||
if Path(dk.data_path / "best_model.zip").is_file():
|
||||
logger.info('Callback found a best model.')
|
||||
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
|
||||
return best_model
|
||||
|
||||
logger.info('Couldnt find best model, using final model instead.')
|
||||
|
||||
return model
|
||||
|
||||
class MyRLEnv(Base5ActionRLEnv):
|
||||
"""
|
||||
User can override any function in BaseRLEnv and gym.Env. Here the user
|
||||
sets a custom reward based on profit and trade duration.
|
||||
"""
|
||||
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
"""
|
||||
An example reward function. This is the one function that users will likely
|
||||
wish to inject their own creativity into.
|
||||
:param action: int = The action made by the agent for the current candle.
|
||||
:return:
|
||||
float = the reward to give to the agent for current step (used for optimization
|
||||
of weights in NN)
|
||||
"""
|
||||
# first, penalize if the action is not valid
|
||||
if not self._is_valid(action):
|
||||
return -2
|
||||
|
||||
pnl = self.get_unrealized_profit()
|
||||
factor = 100.
|
||||
|
||||
# reward agent for entering trades
|
||||
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
|
||||
and self._position == Positions.Neutral):
|
||||
return 25
|
||||
# discourage agent from not entering trades
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
return -1
|
||||
|
||||
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
|
||||
trade_duration = self._current_tick - self._last_trade_tick # type: ignore
|
||||
|
||||
if trade_duration <= max_trade_duration:
|
||||
factor *= 1.5
|
||||
elif trade_duration > max_trade_duration:
|
||||
factor *= 0.5
|
||||
|
||||
# discourage sitting in position
|
||||
if (self._position in (Positions.Short, Positions.Long) and
|
||||
action == Actions.Neutral.value):
|
||||
return -1 * trade_duration / max_trade_duration
|
||||
|
||||
# close long
|
||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
# close short
|
||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
return 0.
|
@@ -0,0 +1,51 @@
|
||||
import logging
|
||||
from typing import Any, Dict # , Tuple
|
||||
|
||||
# import numpy.typing as npt
|
||||
from pandas import DataFrame
|
||||
from stable_baselines3.common.callbacks import EvalCallback
|
||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
||||
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.prediction_models.ReinforcementLearner import ReinforcementLearner
|
||||
from freqtrade.freqai.RL.BaseReinforcementLearningModel import make_env
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReinforcementLearner_multiproc(ReinforcementLearner):
|
||||
"""
|
||||
Demonstration of how to build vectorized environments
|
||||
"""
|
||||
|
||||
def set_train_and_eval_environments(self, data_dictionary: Dict[str, Any],
|
||||
prices_train: DataFrame, prices_test: DataFrame,
|
||||
dk: FreqaiDataKitchen):
|
||||
"""
|
||||
User can override this if they are using a custom MyRLEnv
|
||||
:param data_dictionary: dict = common data dictionary containing train and test
|
||||
features/labels/weights.
|
||||
:param prices_train/test: DataFrame = dataframe comprised of the prices to be used in
|
||||
the environment during training
|
||||
or testing
|
||||
:param dk: FreqaiDataKitchen = the datakitchen for the current pair
|
||||
"""
|
||||
train_df = data_dictionary["train_features"]
|
||||
test_df = data_dictionary["test_features"]
|
||||
|
||||
env_id = "train_env"
|
||||
self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train,
|
||||
self.reward_params, self.CONV_WIDTH, monitor=True,
|
||||
config=self.config) for i
|
||||
in range(self.max_threads)])
|
||||
|
||||
eval_env_id = 'eval_env'
|
||||
self.eval_env = SubprocVecEnv([make_env(self.MyRLEnv, eval_env_id, i, 1,
|
||||
test_df, prices_test,
|
||||
self.reward_params, self.CONV_WIDTH, monitor=True,
|
||||
config=self.config) for i
|
||||
in range(self.max_threads)])
|
||||
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
|
||||
render=False, eval_freq=len(train_df),
|
||||
best_model_save_path=str(dk.data_path))
|
Reference in New Issue
Block a user