From c0cee5df07ac18d7f870385586e9007ccc74024b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 24 Aug 2022 12:54:02 +0200 Subject: [PATCH] add continual retraining feature, handly mypy typing reqs, improve docstrings --- config_examples/config_freqai-rl.example.json | 3 +- freqtrade/freqai/RL/Base3ActionRLEnv.py | 618 +++++++++--------- freqtrade/freqai/RL/Base5ActionRLEnv.py | 38 +- .../RL/BaseReinforcementLearningModel.py | 42 +- .../ReinforcementLearnerCustomAgent.py | 10 +- freqtrade/freqai/data_drawer.py | 4 + .../ReinforcementLearningExample5ac.py | 3 +- .../prediction_models/BaseClassifierModel.py | 4 +- .../prediction_models/BaseRegressionModel.py | 4 +- .../prediction_models/BaseTensorFlowModel.py | 4 +- .../prediction_models/ReinforcementLearner.py | 19 +- 11 files changed, 387 insertions(+), 362 deletions(-) rename freqtrade/freqai/{prediction_models => RL}/ReinforcementLearnerCustomAgent.py (95%) diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index bb67b44b6..b3f8737be 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -85,12 +85,13 @@ "verbose": 1 }, "rl_config": { - "train_cycles": 10, + "train_cycles": 3, "eval_cycles": 3, "thread_count": 4, "max_trade_duration_candles": 100, "model_type": "PPO", "policy_type": "MlpPolicy", + "continual_retraining": true, "model_reward_parameters": { "rr": 1, "profit_aim": 0.02, diff --git a/freqtrade/freqai/RL/Base3ActionRLEnv.py b/freqtrade/freqai/RL/Base3ActionRLEnv.py index df53c729b..cddd2f6f9 100644 --- a/freqtrade/freqai/RL/Base3ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base3ActionRLEnv.py @@ -1,330 +1,330 @@ -import logging -from enum import Enum +# import logging +# from enum import Enum -import gym -import numpy as np -import pandas as pd -from gym import spaces -from gym.utils import seeding -from pandas import DataFrame +# import gym +# import numpy as np +# import pandas as pd +# from gym import spaces +# from gym.utils import seeding +# from pandas import DataFrame -# from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union +# # from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union -logger = logging.getLogger(__name__) +# logger = logging.getLogger(__name__) -class Actions(Enum): - Short = 0 - Long = 1 - Neutral = 2 +# class Actions(Enum): +# Short = 0 +# Long = 1 +# Neutral = 2 -class Positions(Enum): - Short = 0 - Long = 1 - Neutral = 0.5 +# class Positions(Enum): +# Short = 0 +# Long = 1 +# Neutral = 0.5 - def opposite(self): - return Positions.Short if self == Positions.Long else Positions.Long +# def opposite(self): +# return Positions.Short if self == Positions.Long else Positions.Long -def mean_over_std(x): - std = np.std(x, ddof=1) - mean = np.mean(x) - return mean / std if std > 0 else 0 +# def mean_over_std(x): +# std = np.std(x, ddof=1) +# mean = np.mean(x) +# return mean / std if std > 0 else 0 -class Base3ActionRLEnv(gym.Env): +# class Base3ActionRLEnv(gym.Env): - metadata = {'render.modes': ['human']} +# metadata = {'render.modes': ['human']} - def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), - reward_kwargs: dict = {}, window_size=10, starting_point=True, - id: str = 'baseenv-1', seed: int = 1): - assert df.ndim == 2 +# def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), +# reward_kwargs: dict = {}, window_size=10, starting_point=True, +# id: str = 'baseenv-1', seed: int = 1): +# assert df.ndim == 2 - self.id = id - self.seed(seed) - self.reset_env(df, prices, window_size, reward_kwargs, starting_point) +# self.id = id +# self.seed(seed) +# self.reset_env(df, prices, window_size, reward_kwargs, starting_point) - def reset_env(self, df, prices, window_size, reward_kwargs, starting_point=True): - self.df = df - self.signal_features = self.df - self.prices = prices - self.window_size = window_size - self.starting_point = starting_point - self.rr = reward_kwargs["rr"] - self.profit_aim = reward_kwargs["profit_aim"] +# def reset_env(self, df, prices, window_size, reward_kwargs, starting_point=True): +# self.df = df +# self.signal_features = self.df +# self.prices = prices +# self.window_size = window_size +# self.starting_point = starting_point +# self.rr = reward_kwargs["rr"] +# self.profit_aim = reward_kwargs["profit_aim"] - self.fee = 0.0015 +# self.fee = 0.0015 - # # spaces - self.shape = (window_size, self.signal_features.shape[1] + 2) - self.action_space = spaces.Discrete(len(Actions)) - self.observation_space = spaces.Box( - low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) - - # episode - self._start_tick = self.window_size - self._end_tick = len(self.prices) - 1 - self._done = None - self._current_tick = None - self._last_trade_tick = None - self._position = Positions.Neutral - self._position_history = None - self.total_reward = None - self._total_profit = None - self._first_rendering = None - self.history = None - self.trade_history = [] - - def seed(self, seed: int = 1): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def reset(self): - - self._done = False - - if self.starting_point is True: - self._position_history = (self._start_tick * [None]) + [self._position] - else: - self._position_history = (self.window_size * [None]) + [self._position] - - self._current_tick = self._start_tick - self._last_trade_tick = None - self._position = Positions.Neutral - - self.total_reward = 0. - self._total_profit = 1. # unit - self._first_rendering = True - self.history = {} - self.trade_history = [] - self.portfolio_log_returns = np.zeros(len(self.prices)) - - self._profits = [(self._start_tick, 1)] - self.close_trade_profit = [] - - return self._get_observation() - - def step(self, action: int): - self._done = False - self._current_tick += 1 - - if self._current_tick == self._end_tick: - self._done = True - - self.update_portfolio_log_returns(action) - - self._update_profit(action) - step_reward = self.calculate_reward(action) - self.total_reward += step_reward - - trade_type = None - if self.is_tradesignal(action): # exclude 3 case not trade - # Update position - """ - Action: Neutral, position: Long -> Close Long - Action: Neutral, position: Short -> Close Short - - Action: Long, position: Neutral -> Open Long - Action: Long, position: Short -> Close Short and Open Long - - Action: Short, position: Neutral -> Open Short - Action: Short, position: Long -> Close Long and Open Short - """ - - if action == Actions.Neutral.value: - self._position = Positions.Neutral - trade_type = "neutral" - elif action == Actions.Long.value: - self._position = Positions.Long - trade_type = "long" - elif action == Actions.Short.value: - self._position = Positions.Short - trade_type = "short" - else: - print("case not defined") - - # Update last trade tick - self._last_trade_tick = self._current_tick - - if trade_type is not None: - self.trade_history.append( - {'price': self.current_price(), 'index': self._current_tick, - 'type': trade_type}) - - if self._total_profit < 0.2: - self._done = True - - self._position_history.append(self._position) - observation = self._get_observation() - info = dict( - tick=self._current_tick, - total_reward=self.total_reward, - total_profit=self._total_profit, - position=self._position.value - ) - self._update_history(info) - - return observation, step_reward, self._done, info - - def _get_observation(self): - features_window = self.signal_features[( - self._current_tick - self.window_size):self._current_tick] - features_and_state = DataFrame(np.zeros((len(features_window), 2)), - columns=['current_profit_pct', 'position'], - index=features_window.index) - - features_and_state['current_profit_pct'] = self.get_unrealized_profit() - features_and_state['position'] = self._position.value - features_and_state = pd.concat([features_window, features_and_state], axis=1) - return features_and_state - - def get_unrealized_profit(self): - - if self._last_trade_tick is None: - return 0. - - if self._position == Positions.Neutral: - return 0. - elif self._position == Positions.Short: - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - return (last_trade_price - current_price) / last_trade_price - elif self._position == Positions.Long: - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - return (current_price - last_trade_price) / last_trade_price - else: - return 0. - - def is_tradesignal(self, action: int): - # trade signal - """ - not trade signal is : - Action: Neutral, position: Neutral -> Nothing - Action: Long, position: Long -> Hold Long - Action: Short, position: Short -> Hold Short - """ - return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) - or (action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) - - def _is_trade(self, action: Actions): - return ((action == Actions.Long.value and self._position == Positions.Short) or - (action == Actions.Short.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Long) or - (action == Actions.Neutral.value and self._position == Positions.Short) - ) - - def is_hold(self, action): - return ((action == Actions.Short.value and self._position == Positions.Short) - or (action == Actions.Long.value and self._position == Positions.Long)) - - def add_buy_fee(self, price): - return price * (1 + self.fee) - - def add_sell_fee(self, price): - return price / (1 + self.fee) - - def _update_history(self, info): - if not self.history: - self.history = {key: [] for key in info.keys()} - - for key, value in info.items(): - self.history[key].append(value) - - def get_sharpe_ratio(self): - return mean_over_std(self.get_portfolio_log_returns()) - - def calculate_reward(self, action): - - if self._last_trade_tick is None: - return 0. - - # close long - if (action == Actions.Short.value or - action == Actions.Neutral.value) and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - # close short - if (action == Actions.Long.value or - action == Actions.Neutral.value) and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - return 0. - - def _update_profit(self, action): - if self._is_trade(action) or self._done: - pnl = self.get_unrealized_profit() - - if self._position == Positions.Long: - self._total_profit = self._total_profit + self._total_profit * pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - if self._position == Positions.Short: - self._total_profit = self._total_profit + self._total_profit * pnl - self._profits.append((self._current_tick, self._total_profit)) - self.close_trade_profit.append(pnl) - - def most_recent_return(self, action: int): - """ - We support Long, Neutral and Short positions. - Return is generated from rising prices in Long - and falling prices in Short positions. - The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. - """ - # Long positions - if self._position == Positions.Long: - current_price = self.prices.iloc[self._current_tick].open - if action == Actions.Short.value or action == Actions.Neutral.value: - current_price = self.add_sell_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - - if (self._position_history[self._current_tick - 1] == Positions.Short - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_buy_fee(previous_price) - - return np.log(current_price) - np.log(previous_price) - - # Short positions - if self._position == Positions.Short: - current_price = self.prices.iloc[self._current_tick].open - if action == Actions.Long.value or action == Actions.Neutral.value: - current_price = self.add_buy_fee(current_price) - - previous_price = self.prices.iloc[self._current_tick - 1].open - if (self._position_history[self._current_tick - 1] == Positions.Long - or self._position_history[self._current_tick - 1] == Positions.Neutral): - previous_price = self.add_sell_fee(previous_price) - - return np.log(previous_price) - np.log(current_price) - - return 0 - - def get_portfolio_log_returns(self): - return self.portfolio_log_returns[1:self._current_tick + 1] - - def update_portfolio_log_returns(self, action): - self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) - - def current_price(self) -> float: - return self.prices.iloc[self._current_tick].open +# # # spaces +# self.shape = (window_size, self.signal_features.shape[1] + 2) +# self.action_space = spaces.Discrete(len(Actions)) +# self.observation_space = spaces.Box( +# low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) + +# # episode +# self._start_tick = self.window_size +# self._end_tick = len(self.prices) - 1 +# self._done = None +# self._current_tick = None +# self._last_trade_tick = None +# self._position = Positions.Neutral +# self._position_history = None +# self.total_reward = None +# self._total_profit = None +# self._first_rendering = None +# self.history = None +# self.trade_history = [] + +# def seed(self, seed: int = 1): +# self.np_random, seed = seeding.np_random(seed) +# return [seed] + +# def reset(self): + +# self._done = False + +# if self.starting_point is True: +# self._position_history = (self._start_tick * [None]) + [self._position] +# else: +# self._position_history = (self.window_size * [None]) + [self._position] + +# self._current_tick = self._start_tick +# self._last_trade_tick = None +# self._position = Positions.Neutral + +# self.total_reward = 0. +# self._total_profit = 1. # unit +# self._first_rendering = True +# self.history = {} +# self.trade_history = [] +# self.portfolio_log_returns = np.zeros(len(self.prices)) + +# self._profits = [(self._start_tick, 1)] +# self.close_trade_profit = [] + +# return self._get_observation() + +# def step(self, action: int): +# self._done = False +# self._current_tick += 1 + +# if self._current_tick == self._end_tick: +# self._done = True + +# self.update_portfolio_log_returns(action) + +# self._update_profit(action) +# step_reward = self.calculate_reward(action) +# self.total_reward += step_reward + +# trade_type = None +# if self.is_tradesignal(action): # exclude 3 case not trade +# # Update position +# """ +# Action: Neutral, position: Long -> Close Long +# Action: Neutral, position: Short -> Close Short + +# Action: Long, position: Neutral -> Open Long +# Action: Long, position: Short -> Close Short and Open Long + +# Action: Short, position: Neutral -> Open Short +# Action: Short, position: Long -> Close Long and Open Short +# """ + +# if action == Actions.Neutral.value: +# self._position = Positions.Neutral +# trade_type = "neutral" +# elif action == Actions.Long.value: +# self._position = Positions.Long +# trade_type = "long" +# elif action == Actions.Short.value: +# self._position = Positions.Short +# trade_type = "short" +# else: +# print("case not defined") + +# # Update last trade tick +# self._last_trade_tick = self._current_tick + +# if trade_type is not None: +# self.trade_history.append( +# {'price': self.current_price(), 'index': self._current_tick, +# 'type': trade_type}) + +# if self._total_profit < 0.2: +# self._done = True + +# self._position_history.append(self._position) +# observation = self._get_observation() +# info = dict( +# tick=self._current_tick, +# total_reward=self.total_reward, +# total_profit=self._total_profit, +# position=self._position.value +# ) +# self._update_history(info) + +# return observation, step_reward, self._done, info + +# def _get_observation(self): +# features_window = self.signal_features[( +# self._current_tick - self.window_size):self._current_tick] +# features_and_state = DataFrame(np.zeros((len(features_window), 2)), +# columns=['current_profit_pct', 'position'], +# index=features_window.index) + +# features_and_state['current_profit_pct'] = self.get_unrealized_profit() +# features_and_state['position'] = self._position.value +# features_and_state = pd.concat([features_window, features_and_state], axis=1) +# return features_and_state + +# def get_unrealized_profit(self): + +# if self._last_trade_tick is None: +# return 0. + +# if self._position == Positions.Neutral: +# return 0. +# elif self._position == Positions.Short: +# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) +# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) +# return (last_trade_price - current_price) / last_trade_price +# elif self._position == Positions.Long: +# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) +# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) +# return (current_price - last_trade_price) / last_trade_price +# else: +# return 0. + +# def is_tradesignal(self, action: int): +# # trade signal +# """ +# not trade signal is : +# Action: Neutral, position: Neutral -> Nothing +# Action: Long, position: Long -> Hold Long +# Action: Short, position: Short -> Hold Short +# """ +# return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) +# or (action == Actions.Short.value and self._position == Positions.Short) +# or (action == Actions.Long.value and self._position == Positions.Long)) + +# def _is_trade(self, action: Actions): +# return ((action == Actions.Long.value and self._position == Positions.Short) or +# (action == Actions.Short.value and self._position == Positions.Long) or +# (action == Actions.Neutral.value and self._position == Positions.Long) or +# (action == Actions.Neutral.value and self._position == Positions.Short) +# ) + +# def is_hold(self, action): +# return ((action == Actions.Short.value and self._position == Positions.Short) +# or (action == Actions.Long.value and self._position == Positions.Long)) + +# def add_buy_fee(self, price): +# return price * (1 + self.fee) + +# def add_sell_fee(self, price): +# return price / (1 + self.fee) + +# def _update_history(self, info): +# if not self.history: +# self.history = {key: [] for key in info.keys()} + +# for key, value in info.items(): +# self.history[key].append(value) + +# def get_sharpe_ratio(self): +# return mean_over_std(self.get_portfolio_log_returns()) + +# def calculate_reward(self, action): + +# if self._last_trade_tick is None: +# return 0. + +# # close long +# if (action == Actions.Short.value or +# action == Actions.Neutral.value) and self._position == Positions.Long: +# last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) +# current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) +# return float(np.log(current_price) - np.log(last_trade_price)) + +# # close short +# if (action == Actions.Long.value or +# action == Actions.Neutral.value) and self._position == Positions.Short: +# last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) +# current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) +# return float(np.log(last_trade_price) - np.log(current_price)) + +# return 0. + +# def _update_profit(self, action): +# if self._is_trade(action) or self._done: +# pnl = self.get_unrealized_profit() + +# if self._position == Positions.Long: +# self._total_profit = self._total_profit + self._total_profit * pnl +# self._profits.append((self._current_tick, self._total_profit)) +# self.close_trade_profit.append(pnl) + +# if self._position == Positions.Short: +# self._total_profit = self._total_profit + self._total_profit * pnl +# self._profits.append((self._current_tick, self._total_profit)) +# self.close_trade_profit.append(pnl) + +# def most_recent_return(self, action: int): +# """ +# We support Long, Neutral and Short positions. +# Return is generated from rising prices in Long +# and falling prices in Short positions. +# The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. +# """ +# # Long positions +# if self._position == Positions.Long: +# current_price = self.prices.iloc[self._current_tick].open +# if action == Actions.Short.value or action == Actions.Neutral.value: +# current_price = self.add_sell_fee(current_price) + +# previous_price = self.prices.iloc[self._current_tick - 1].open + +# if (self._position_history[self._current_tick - 1] == Positions.Short +# or self._position_history[self._current_tick - 1] == Positions.Neutral): +# previous_price = self.add_buy_fee(previous_price) + +# return np.log(current_price) - np.log(previous_price) + +# # Short positions +# if self._position == Positions.Short: +# current_price = self.prices.iloc[self._current_tick].open +# if action == Actions.Long.value or action == Actions.Neutral.value: +# current_price = self.add_buy_fee(current_price) + +# previous_price = self.prices.iloc[self._current_tick - 1].open +# if (self._position_history[self._current_tick - 1] == Positions.Long +# or self._position_history[self._current_tick - 1] == Positions.Neutral): +# previous_price = self.add_sell_fee(previous_price) + +# return np.log(previous_price) - np.log(current_price) + +# return 0 + +# def get_portfolio_log_returns(self): +# return self.portfolio_log_returns[1:self._current_tick + 1] + +# def update_portfolio_log_returns(self, action): +# self.portfolio_log_returns[self._current_tick] = self.most_recent_return(action) + +# def current_price(self) -> float: +# return self.prices.iloc[self._current_tick].open - def prev_price(self) -> float: - return self.prices.iloc[self._current_tick - 1].open +# def prev_price(self) -> float: +# return self.prices.iloc[self._current_tick - 1].open - def sharpe_ratio(self) -> float: - if len(self.close_trade_profit) == 0: - return 0. - returns = np.array(self.close_trade_profit) - reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) - return reward +# def sharpe_ratio(self) -> float: +# if len(self.close_trade_profit) == 0: +# return 0. +# returns = np.array(self.close_trade_profit) +# reward = (np.mean(returns) - 0. + 1e-9) / (np.std(returns) + 1e-9) +# return reward diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index a14111495..64d7061fc 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -1,6 +1,6 @@ import logging from enum import Enum -# from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union +from typing import Optional import gym import numpy as np @@ -44,14 +44,14 @@ class Base5ActionRLEnv(gym.Env): def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), reward_kwargs: dict = {}, window_size=10, starting_point=True, id: str = 'baseenv-1', seed: int = 1, config: dict = {}): - assert df.ndim == 2 self.rl_config = config['freqai']['rl_config'] self.id = id self.seed(seed) self.reset_env(df, prices, window_size, reward_kwargs, starting_point) - def reset_env(self, df, prices, window_size, reward_kwargs, starting_point=True): + def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, + reward_kwargs: dict, starting_point=True): self.df = df self.signal_features = self.df self.prices = prices @@ -69,18 +69,18 @@ class Base5ActionRLEnv(gym.Env): low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32) # episode - self._start_tick = self.window_size - self._end_tick = len(self.prices) - 1 - self._done = None - self._current_tick = None - self._last_trade_tick = None + self._start_tick: int = self.window_size + self._end_tick: int = len(self.prices) - 1 + self._done: bool = False + self._current_tick: int = self._start_tick + self._last_trade_tick: Optional[int] = None self._position = Positions.Neutral - self._position_history = None - self.total_reward = None - self._total_profit = None - self._first_rendering = None - self.history = None - self.trade_history = [] + self._position_history: list = [None] + self.total_reward: float = 0 + self._total_profit: float = 0 + self._first_rendering: bool = False + self.history: dict = {} + self.trade_history: list = [] def seed(self, seed: int = 1): self.np_random, seed = seeding.np_random(seed) @@ -125,8 +125,7 @@ class Base5ActionRLEnv(gym.Env): self.total_reward += step_reward trade_type = None - if self.is_tradesignal(action): # exclude 3 case not trade - # Update position + if self.is_tradesignal(action): """ Action: Neutral, position: Long -> Close Long Action: Neutral, position: Short -> Close Short @@ -223,9 +222,8 @@ class Base5ActionRLEnv(gym.Env): # trade signal """ not trade signal is : - Action: Neutral, position: Neutral -> Nothing - Action: Long, position: Long -> Hold Long - Action: Short, position: Short -> Hold Short + Determine if the signal is non sensical + e.g.: agent wants a Actions.Long_exit while it is in a Positions.short """ return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or (action == Actions.Neutral.value and self._position == Positions.Short) or @@ -292,7 +290,7 @@ class Base5ActionRLEnv(gym.Env): def most_recent_return(self, action: int): """ - We support Long, Neutral and Short positions. + Calculate the tick to tick return if in a trade. Return is generated from rising prices in Long and falling prices in Short positions. The actions Sell/Buy or Hold during a Long position trigger the sell/buy-fee. diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index a9a1377a8..6660709bd 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -19,6 +19,7 @@ from typing import Callable from datetime import datetime, timezone from stable_baselines3.common.utils import set_random_seed import gym +from pathlib import Path logger = logging.getLogger(__name__) torch.multiprocessing.set_sharing_strategy('file_system') @@ -40,6 +41,8 @@ class BaseReinforcementLearningModel(IFreqaiModel): self.eval_env: Base5ActionRLEnv = None self.eval_callback: EvalCallback = None self.model_type = self.freqai_info['rl_config']['model_type'] + self.rl_config = self.freqai_info['rl_config'] + self.continual_retraining = self.rl_config['continual_retraining'] if self.model_type in SB3_MODELS: import_str = 'stable_baselines3' elif self.model_type in SB3_CONTRIB_MODELS: @@ -68,7 +71,6 @@ class BaseReinforcementLearningModel(IFreqaiModel): logger.info("--------------------Starting training " f"{pair} --------------------") - # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( unfiltered_dataframe, dk.training_features_list, @@ -78,19 +80,19 @@ class BaseReinforcementLearningModel(IFreqaiModel): data_dictionary: Dict[str, Any] = dk.make_train_test_datasets( features_filtered, labels_filtered) - dk.fit_labels() # useless for now, but just satiating append methods + dk.fit_labels() # FIXME useless for now, but just satiating append methods # normalize all data based on train_dataset only prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk) data_dictionary = dk.normalize_data(data_dictionary) - # optional additional data cleaning/analysis + # data cleaning/analysis self.data_cleaning_train(dk) logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' + f' features and {len(data_dictionary["train_features"])} data points' ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') self.set_train_and_eval_environments(data_dictionary, prices_train, prices_test, dk) @@ -100,9 +102,11 @@ class BaseReinforcementLearningModel(IFreqaiModel): return model - def set_train_and_eval_environments(self, data_dictionary, prices_train, prices_test, dk): + def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame], + prices_train: DataFrame, prices_test: DataFrame, + dk: FreqaiDataKitchen): """ - User overrides this as shown here if they are using a custom MyRLEnv + User can override this if they are using a custom MyRLEnv """ train_df = data_dictionary["train_features"] test_df = data_dictionary["test_features"] @@ -114,18 +118,22 @@ class BaseReinforcementLearningModel(IFreqaiModel): reward_kwargs=self.reward_params, config=self.config) self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params, config=self.config), ".") + reward_kwargs=self.reward_params, config=self.config), + str(Path(dk.data_path / 'monitor'))) self.eval_callback = EvalCallback(self.eval_env, deterministic=True, render=False, eval_freq=eval_freq, - best_model_save_path=dk.data_path) + best_model_save_path=str(dk.data_path)) else: self.train_env.reset() self.eval_env.reset() self.train_env.reset_env(train_df, prices_train, self.CONV_WIDTH, self.reward_params) self.eval_env.reset_env(test_df, prices_test, self.CONV_WIDTH, self.reward_params) + # self.eval_callback.eval_env = self.eval_env + # self.eval_callback.best_model_save_path = str(dk.data_path) + # self.eval_callback._init_callback() self.eval_callback.__init__(self.eval_env, deterministic=True, render=False, eval_freq=eval_freq, - best_model_save_path=dk.data_path) + best_model_save_path=str(dk.data_path)) @abstractmethod def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): @@ -137,19 +145,20 @@ class BaseReinforcementLearningModel(IFreqaiModel): return - def get_state_info(self, pair): + def get_state_info(self, pair: str): open_trades = Trade.get_trades_proxy(is_open=True) market_side = 0.5 - current_profit = 0 + current_profit: float = 0 trade_duration = 0 for trade in open_trades: if trade.pair == pair: + # FIXME: mypy typing doesnt like that strategy may be "None" (it never will be) current_value = self.strategy.dp._exchange.get_rate( pair, refresh=False, side="exit", is_short=trade.is_short) openrate = trade.open_rate now = datetime.now(timezone.utc).timestamp() - trade_duration = (now - trade.open_date.timestamp()) / self.base_tf_seconds - if 'long' in trade.enter_tag: + trade_duration = int((now - trade.open_date.timestamp()) / self.base_tf_seconds) + if 'long' in str(trade.enter_tag): market_side = 1 current_profit = (current_value - openrate) / openrate else: @@ -245,8 +254,9 @@ class BaseReinforcementLearningModel(IFreqaiModel): return -def make_env(env_id: str, rank: int, seed: int, train_df, price, - reward_params, window_size, monitor=False, config={}) -> Callable: +def make_env(env_id: str, rank: int, seed: int, train_df: DataFrame, price: DataFrame, + reward_params: Dict[str, int], window_size: int, monitor: bool = False, + config: Dict[str, Any] = {}) -> Callable: """ Utility function for multiprocessed env. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py b/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py similarity index 95% rename from freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py rename to freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py index bb16b612b..fcd813ce6 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearnerCustomAgent.py +++ b/freqtrade/freqai/RL/ReinforcementLearnerCustomAgent.py @@ -22,6 +22,12 @@ class ReinforcementLearnerCustomAgent(BaseReinforcementLearningModel): """ User can customize agent by defining the class and using it directly. Here the example is "TDQN" + + Warning! + This is an advanced example of how a user may create and use a highly + customized model class (which can inherit from existing classes, + similar to how the example below inherits from DQN). + This file is for example purposes only, and should not be run. """ def fit_rl(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen): @@ -34,7 +40,7 @@ class ReinforcementLearnerCustomAgent(BaseReinforcementLearningModel): # TDQN is a custom agent defined below model = TDQN(self.policy_type, self.train_env, - tensorboard_log=Path(dk.data_path / "tensorboard"), + tensorboard_log=str(Path(dk.data_path / "tensorboard")), policy_kwargs=policy_kwargs, **self.freqai_info['model_training_parameters'] ) @@ -217,7 +223,7 @@ class TDQN(DQN): exploration_initial_eps: float = 1.0, exploration_final_eps: float = 0.05, max_grad_norm: float = 10, - tensorboard_log: Optional[Path] = None, + tensorboard_log: Optional[str] = None, create_eval_env: bool = False, policy_kwargs: Optional[Dict[str, Any]] = None, verbose: int = 1, diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index c37973551..ae3e92f5e 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -485,6 +485,10 @@ class FreqaiDataDrawer: f"Unable to load model, ensure model exists at " f"{dk.data_path} " ) + # load it into ram if it was loaded from disk + if coin not in self.model_dictionary: + self.model_dictionary[coin] = model + if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: dk.pca = cloudpickle.load( open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb") diff --git a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py index 437b53b05..15a263b94 100644 --- a/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py +++ b/freqtrade/freqai/example_strats/ReinforcementLearningExample5ac.py @@ -76,7 +76,8 @@ class ReinforcementLearningExample5ac(IStrategy): informative[f"%-{coin}pct-change"] = informative["close"].pct_change() informative[f"%-{coin}raw_volume"] = informative["volume"] - # The following features are necessary for RL models + # FIXME: add these outside the user strategy? + # The following columns are necessary for RL models. informative[f"%-{coin}raw_close"] = informative["close"] informative[f"%-{coin}raw_open"] = informative["open"] informative[f"%-{coin}raw_high"] = informative["high"] diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/prediction_models/BaseClassifierModel.py index 2edbf3b51..042f43199 100644 --- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py +++ b/freqtrade/freqai/prediction_models/BaseClassifierModel.py @@ -57,9 +57,9 @@ class BaseClassifierModel(IFreqaiModel): self.data_cleaning_train(dk) logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' + f' features and {len(data_dictionary["train_features"])} data points' ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') model = self.fit(data_dictionary) diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 2ef175a2e..6ca9ae8cb 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -56,9 +56,9 @@ class BaseRegressionModel(IFreqaiModel): self.data_cleaning_train(dk) logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' + f' features and {len(data_dictionary["train_features"])} data points' ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') model = self.fit(data_dictionary) diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py index 04eff045f..6a842f007 100644 --- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py @@ -53,9 +53,9 @@ class BaseTensorFlowModel(IFreqaiModel): self.data_cleaning_train(dk) logger.info( - f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features" + f'Training model on {len(dk.data_dictionary["train_features"].columns)}' + f' features and {len(data_dictionary["train_features"])} data points' ) - logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') model = self.fit(data_dictionary) diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index d3e6bde7c..254fd32b0 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -1,7 +1,6 @@ import logging -from typing import Any, Dict # , Tuple +from typing import Any, Dict -# import numpy.typing as npt import torch as th from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions @@ -22,12 +21,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel): total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) policy_kwargs = dict(activation_fn=th.nn.ReLU, - net_arch=[256, 256, 128]) + net_arch=[512, 512, 256]) - model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, - tensorboard_log=Path(dk.data_path / "tensorboard"), - **self.freqai_info['model_training_parameters'] - ) + if dk.pair not in self.dd.model_dictionary or not self.continual_retraining: + model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, + tensorboard_log=Path(dk.data_path / "tensorboard"), + **self.freqai_info['model_training_parameters'] + ) + else: + logger.info('Continual training activated - starting training from previously ' + 'trained agent.') + model = self.dd.model_dictionary[dk.pair] + model.set_env(self.train_env) model.learn( total_timesteps=int(total_timesteps),