merge develop into feat/shuffle_after_split

This commit is contained in:
robcaulk
2023-02-16 18:46:01 +01:00
203 changed files with 12376 additions and 8494 deletions

View File

@@ -0,0 +1,125 @@
import logging
from enum import Enum
from gym import spaces
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, Positions
logger = logging.getLogger(__name__)
class Actions(Enum):
Neutral = 0
Buy = 1
Sell = 2
class Base3ActionRLEnv(BaseEnvironment):
"""
Base class for a 3 action environment
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.actions = Actions
def set_action_space(self):
self.action_space = spaces.Discrete(len(Actions))
def step(self, action: int):
"""
Logic for a single step (incrementing one candle in time)
by the agent
:param: action: int = the action type that the agent plans
to take for the current step.
:returns:
observation = current state of environment
step_reward = the reward from `calculate_reward()`
_done = if the agent "died" or if the candles finished
info = dict passed back to openai gym lib
"""
self._done = False
self._current_tick += 1
if self._current_tick == self._end_tick:
self._done = True
self._update_unrealized_total_profit()
step_reward = self.calculate_reward(action)
self.total_reward += step_reward
self.tensorboard_log(self.actions._member_names_[action])
trade_type = None
if self.is_tradesignal(action):
if action == Actions.Buy.value:
if self._position == Positions.Short:
self._update_total_profit()
self._position = Positions.Long
trade_type = "long"
self._last_trade_tick = self._current_tick
elif action == Actions.Sell.value and self.can_short:
if self._position == Positions.Long:
self._update_total_profit()
self._position = Positions.Short
trade_type = "short"
self._last_trade_tick = self._current_tick
elif action == Actions.Sell.value and not self.can_short:
self._update_total_profit()
self._position = Positions.Neutral
trade_type = "neutral"
self._last_trade_tick = None
else:
print("case not defined")
if trade_type is not None:
self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick,
'type': trade_type})
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
self._done = True
self._position_history.append(self._position)
info = dict(
tick=self._current_tick,
action=action,
total_reward=self.total_reward,
total_profit=self._total_profit,
position=self._position.value,
trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit()
)
observation = self._get_observation()
self._update_history(info)
return observation, step_reward, self._done, info
def is_tradesignal(self, action: int) -> bool:
"""
Determine if the signal is a trade signal
e.g.: agent wants a Actions.Buy while it is in a Positions.short
"""
return (
(action == Actions.Buy.value and self._position == Positions.Neutral)
or (action == Actions.Sell.value and self._position == Positions.Long)
or (action == Actions.Sell.value and self._position == Positions.Neutral
and self.can_short)
or (action == Actions.Buy.value and self._position == Positions.Short
and self.can_short)
)
def _is_valid(self, action: int) -> bool:
"""
Determine if the signal is valid.
e.g.: agent wants a Actions.Sell while it is in a Positions.Long
"""
if self.can_short:
return action in [Actions.Buy.value, Actions.Sell.value, Actions.Neutral.value]
else:
if action == Actions.Sell.value and self._position != Positions.Long:
return False
return True

View File

@@ -88,7 +88,8 @@ class Base4ActionRLEnv(BaseEnvironment):
{'price': self.current_price(), 'index': self._current_tick,
'type': trade_type})
if self._total_profit < 1 - self.rl_config.get('max_training_drawdown_pct', 0.8):
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
self._done = True
self._position_history.append(self._position)

View File

@@ -45,7 +45,8 @@ class BaseEnvironment(gym.Env):
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False,
fee: float = 0.0015):
fee: float = 0.0015, can_short: bool = False, pair: str = "",
df_raw: DataFrame = DataFrame()):
"""
Initializes the training/eval environment.
:param df: dataframe of features
@@ -58,13 +59,16 @@ class BaseEnvironment(gym.Env):
:param config: Typical user configuration file
:param live: Whether or not this environment is active in dry/live/backtesting
:param fee: The fee to use for environmental interactions.
:param can_short: Whether or not the environment can short
"""
self.config = config
self.rl_config = config['freqai']['rl_config']
self.add_state_info = self.rl_config.get('add_state_info', False)
self.id = id
self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
self.compound_trades = config['stake_amount'] == 'unlimited'
self.config: dict = config
self.rl_config: dict = config['freqai']['rl_config']
self.add_state_info: bool = self.rl_config.get('add_state_info', False)
self.id: str = id
self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
self.compound_trades: bool = config['stake_amount'] == 'unlimited'
self.pair: str = pair
self.raw_features: DataFrame = df_raw
if self.config.get('fee', None) is not None:
self.fee = self.config['fee']
else:
@@ -73,7 +77,8 @@ class BaseEnvironment(gym.Env):
# set here to default 5Ac, but all children envs can override this
self.actions: Type[Enum] = BaseActions
self.tensorboard_metrics: dict = {}
self.live = live
self.can_short: bool = can_short
self.live: bool = live
if not self.live and self.add_state_info:
self.add_state_info = False
logger.warning("add_state_info is not available in backtesting. Deactivating.")
@@ -91,13 +96,12 @@ class BaseEnvironment(gym.Env):
:param reward_kwargs: extra config settings assigned by user in `rl_config`
:param starting_point: start at edge of window or not
"""
self.df = df
self.signal_features = self.df
self.prices = prices
self.window_size = window_size
self.starting_point = starting_point
self.rr = reward_kwargs["rr"]
self.profit_aim = reward_kwargs["profit_aim"]
self.signal_features: DataFrame = df
self.prices: DataFrame = prices
self.window_size: int = window_size
self.starting_point: bool = starting_point
self.rr: float = reward_kwargs["rr"]
self.profit_aim: float = reward_kwargs["profit_aim"]
# # spaces
if self.add_state_info:

View File

@@ -1,3 +1,4 @@
import copy
import importlib
import logging
from abc import abstractmethod
@@ -50,6 +51,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
self.eval_callback: Optional[EvalCallback] = None
self.model_type = self.freqai_info['rl_config']['model_type']
self.rl_config = self.freqai_info['rl_config']
self.df_raw: DataFrame = DataFrame()
self.continual_learning = self.freqai_info.get('continual_learning', False)
if self.model_type in SB3_MODELS:
import_str = 'stable_baselines3'
@@ -107,6 +109,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
features_filtered, labels_filtered)
self.df_raw = copy.deepcopy(data_dictionary["train_features"])
dk.fit_labels() # FIXME useless for now, but just satiating append methods
# normalize all data based on train_dataset only
@@ -143,7 +146,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
env_info = self.pack_env_dict()
env_info = self.pack_env_dict(dk.pair)
self.train_env = self.MyRLEnv(df=train_df,
prices=prices_train,
@@ -158,14 +161,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
actions = self.train_env.get_actions()
self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
def pack_env_dict(self) -> Dict[str, Any]:
def pack_env_dict(self, pair: str) -> Dict[str, Any]:
"""
Create dictionary of environment arguments
"""
env_info = {"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params,
"config": self.config,
"live": self.live}
"live": self.live,
"can_short": self.can_short,
"pair": pair,
"df_raw": self.df_raw}
if self.data_provider:
env_info["fee"] = self.data_provider._exchange \
.get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore
@@ -279,26 +285,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# %-raw_volume_gen_shift-2_ETH/USDT_1h
# price data for model training and evaluation
tf = self.config['timeframe']
ohlc_list = [f'%-{pair}raw_open_{tf}', f'%-{pair}raw_low_{tf}',
f'%-{pair}raw_high_{tf}', f'%-{pair}raw_close_{tf}']
rename_dict = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low',
'%-raw_high': ' high', '%-raw_close': 'close'}
rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
prices_train = train_df.filter(rename_dict.keys(), axis=1)
prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
if prices_train.empty or not prices_train_old.empty:
if not prices_train_old.empty:
prices_train = prices_train_old
rename_dict = rename_dict_old
logger.warning('Reinforcement learning module didnt find the correct raw prices '
'assigned in feature_engineering_standard(). '
'Please assign them with:\n'
'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n'
'inside `feature_engineering_standard()')
elif prices_train.empty:
raise OperationalException("No prices found, please follow log warning "
"instructions to correct the strategy.")
prices_train = train_df.filter(ohlc_list, axis=1)
if prices_train.empty:
raise OperationalException('Reinforcement learning module didnt find the raw prices '
'assigned in populate_any_indicators. Please assign them '
'with:\n'
'informative[f"%-{pair}raw_close"] = informative["close"]\n'
'informative[f"%-{pair}raw_open"] = informative["open"]\n'
'informative[f"%-{pair}raw_high"] = informative["high"]\n'
'informative[f"%-{pair}raw_low"] = informative["low"]\n')
prices_train.rename(columns=rename_dict, inplace=True)
prices_train.reset_index(drop=True)
prices_test = test_df.filter(ohlc_list, axis=1)
prices_test = test_df.filter(rename_dict.keys(), axis=1)
prices_test.rename(columns=rename_dict, inplace=True)
prices_test.reset_index(drop=True)
@@ -336,7 +352,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
sets a custom reward based on profit and trade duration.
"""
def calculate_reward(self, action: int) -> float:
def calculate_reward(self, action: int) -> float: # noqa: C901
"""
An example reward function. This is the one function that users will likely
wish to inject their own creativity into.
@@ -352,10 +368,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
pnl = self.get_unrealized_profit()
factor = 100.
# you can use feature values from dataframe
rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
f"{self.config['timeframe']}"].iloc[self._current_tick]
# reward agent for entering trades
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral):
return 25
if rsi_now < 40:
factor = 40 / rsi_now
else:
factor = 1
return 25 * factor
# discourage agent from not entering trades
if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1

View File

@@ -59,7 +59,7 @@ class FreqaiDataDrawer:
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
"""
def __init__(self, full_path: Path, config: Config, follow_mode: bool = False):
def __init__(self, full_path: Path, config: Config):
self.config = config
self.freqai_info = config.get("freqai", {})
@@ -84,9 +84,6 @@ class FreqaiDataDrawer:
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
@@ -149,13 +146,8 @@ class FreqaiDataDrawer:
if exists:
with open(self.pair_dictionary_path, "r") as fp:
self.pair_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch")
else:
logger.warning(
f"Follower could not find pair_dictionary at {self.full_path} "
"sending null values back to strategy"
)
logger.info("Could not find existing datadrawer, starting from scratch")
def load_metric_tracker_from_disk(self):
"""
@@ -193,13 +185,8 @@ class FreqaiDataDrawer:
self.historic_predictions = cloudpickle.load(fp)
logger.warning('FreqAI successfully loaded the backup historical predictions file.')
elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch")
else:
logger.warning(
f"Follower could not find historic predictions at {self.full_path} "
"sending null values back to strategy"
)
logger.info("Could not find existing historic_predictions, starting from scratch")
return exists
@@ -248,23 +235,6 @@ class FreqaiDataDrawer:
rapidjson.dump(metadata, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
"""
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
exists = self.follower_dict_path.is_file()
if exists:
logger.info("Found an existing follower dictionary")
for pair in whitelist_pairs:
self.follower_dict[pair] = {}
self.save_follower_dict_to_disk()
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()
@@ -282,27 +252,17 @@ class FreqaiDataDrawer:
"""
pair_dict = self.pair_dict.get(pair)
data_path_set = self.pair_dict.get(pair, self.empty_pair_dict).get("data_path", "")
# data_path_set = self.pair_dict.get(pair, self.empty_pair_dict).get("data_path", "")
return_null_array = False
if pair_dict:
model_filename = pair_dict["model_filename"]
trained_timestamp = pair_dict["trained_timestamp"]
elif not self.follow_mode:
else:
self.pair_dict[pair] = self.empty_pair_dict.copy()
model_filename = ""
trained_timestamp = 0
if not data_path_set and self.follow_mode:
logger.warning(
f"Follower could not find current pair {pair} in "
f"pair_dictionary at path {self.full_path}, sending null values "
"back to strategy."
)
trained_timestamp = 0
model_filename = ''
return_null_array = True
return model_filename, trained_timestamp, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None:
@@ -311,7 +271,6 @@ class FreqaiDataDrawer:
return
else:
self.pair_dict[metadata["pair"]] = self.empty_pair_dict.copy()
return
def set_initial_return_values(self, pair: str, pred_df: DataFrame) -> None:

View File

@@ -1,11 +1,12 @@
import copy
import inspect
import logging
import random
import shutil
from datetime import datetime, timezone
from math import cos, sin
from pathlib import Path
from typing import Any, Dict, List, Tuple
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import numpy.typing as npt
@@ -24,6 +25,7 @@ from freqtrade.constants import Config
from freqtrade.data.converter import reduce_dataframe_footprint
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.strategy import merge_informative_pair
from freqtrade.strategy.interface import IStrategy
@@ -111,7 +113,7 @@ class FreqaiDataKitchen:
def set_paths(
self,
pair: str,
trained_timestamp: int = None,
trained_timestamp: Optional[int] = None,
) -> None:
"""
Set the paths to the data for the present coin/botloop
@@ -1159,9 +1161,9 @@ class FreqaiDataKitchen:
for pair in pairs:
pair = pair.replace(':', '') # lightgbm doesnt like colons
valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
pair_cols = [col for col in dataframe.columns if
any(substr in col for substr in valid_strs)]
pair_cols = [col for col in dataframe.columns if col.startswith("%")
and f"{pair}_" in col]
if pair_cols:
pair_cols.insert(0, 'date')
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
@@ -1190,6 +1192,105 @@ class FreqaiDataKitchen:
return dataframe
def get_pair_data_for_features(self,
pair: str,
tf: str,
strategy: IStrategy,
corr_dataframes: dict = {},
base_dataframes: dict = {},
is_corr_pairs: bool = False) -> DataFrame:
"""
Get the data for the pair. If it's not in the dictionary, get it from the data provider
:param pair: str = pair to get data for
:param tf: str = timeframe to get data for
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the df pair dataframes
(for user defined timeframes)
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param is_corr_pairs: bool = whether the pair is a corr pair or not
:return: dataframe = dataframe containing the pair data
"""
if is_corr_pairs:
dataframe = corr_dataframes[pair][tf]
if not dataframe.empty:
return dataframe
else:
dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf)
return dataframe
else:
dataframe = base_dataframes[tf]
if not dataframe.empty:
return dataframe
else:
dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf)
return dataframe
def merge_features(self, df_main: DataFrame, df_to_merge: DataFrame,
tf: str, timeframe_inf: str, suffix: str) -> DataFrame:
"""
Merge the features of the dataframe and remove HLCV and date added columns
:param df_main: DataFrame = main dataframe
:param df_to_merge: DataFrame = dataframe to merge
:param tf: str = timeframe of the main dataframe
:param timeframe_inf: str = timeframe of the dataframe to merge
:param suffix: str = suffix to add to the columns of the dataframe to merge
:return: dataframe = merged dataframe
"""
dataframe = merge_informative_pair(df_main, df_to_merge, tf, timeframe_inf=timeframe_inf,
append_timeframe=False, suffix=suffix, ffill=True)
skip_columns = [
(f"{s}_{suffix}") for s in ["date", "open", "high", "low", "close", "volume"]
]
dataframe = dataframe.drop(columns=skip_columns)
return dataframe
def populate_features(self, dataframe: DataFrame, pair: str, strategy: IStrategy,
corr_dataframes: dict, base_dataframes: dict,
is_corr_pairs: bool = False) -> DataFrame:
"""
Use the user defined strategy functions for populating features
:param dataframe: DataFrame = dataframe to populate
:param pair: str = pair to populate
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the df pair dataframes
:param base_dataframes: dict = dict containing the current pair dataframes
:param is_corr_pairs: bool = whether the pair is a corr pair or not
:return: dataframe = populated dataframe
"""
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
for tf in tfs:
metadata = {"pair": pair, "tf": tf}
informative_df = self.get_pair_data_for_features(
pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs)
informative_copy = informative_df.copy()
for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]:
df_features = strategy.feature_engineering_expand_all(
informative_copy.copy(), t, metadata=metadata)
suffix = f"{t}"
informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix)
generic_df = strategy.feature_engineering_expand_basic(
informative_copy.copy(), metadata=metadata)
suffix = "gen"
informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix)
indicators = [col for col in informative_df if col.startswith("%")]
for n in range(self.freqai_config["feature_parameters"]["include_shifted_candles"] + 1):
if n == 0:
continue
df_shift = informative_df[indicators].shift(n)
df_shift = df_shift.add_suffix("_shift-" + str(n))
informative_df = pd.concat((informative_df, df_shift), axis=1)
dataframe = self.merge_features(dataframe.copy(), informative_df,
self.config["timeframe"], tf, f'{pair}_{tf}')
return dataframe
def use_strategy_to_populate_indicators(
self,
strategy: IStrategy,
@@ -1202,7 +1303,87 @@ class FreqaiDataKitchen:
"""
Use the user defined strategy for populating indicators during retrain
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the informative pair dataframes
:param corr_dataframes: dict = dict containing the df pair dataframes
(for user defined timeframes)
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param pair: str = pair to populate
:param prediction_dataframe: DataFrame = dataframe containing the pair data
used for prediction
:param do_corr_pairs: bool = whether to populate corr pairs or not
:return:
dataframe: DataFrame = dataframe containing populated indicators
"""
# this is a hack to check if the user is using the populate_any_indicators function
new_version = inspect.getsource(strategy.populate_any_indicators) == (
inspect.getsource(IStrategy.populate_any_indicators))
if new_version:
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs: List[str] = self.freqai_config["feature_parameters"].get(
"include_corr_pairlist", [])
for tf in tfs:
if tf not in base_dataframes:
base_dataframes[tf] = pd.DataFrame()
for p in pairs:
if p not in corr_dataframes:
corr_dataframes[p] = {}
if tf not in corr_dataframes[p]:
corr_dataframes[p][tf] = pd.DataFrame()
if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy()
else:
dataframe = base_dataframes[self.config["timeframe"]].copy()
corr_pairs: List[str] = self.freqai_config["feature_parameters"].get(
"include_corr_pairlist", [])
dataframe = self.populate_features(dataframe.copy(), pair, strategy,
corr_dataframes, base_dataframes)
metadata = {"pair": pair}
dataframe = strategy.feature_engineering_standard(dataframe.copy(), metadata=metadata)
# ensure corr pairs are always last
for corr_pair in corr_pairs:
if pair == corr_pair:
continue # dont repeat anything from whitelist
if corr_pairs and do_corr_pairs:
dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy,
corr_dataframes, base_dataframes, True)
dataframe = strategy.set_freqai_targets(dataframe.copy(), metadata=metadata)
self.get_unique_classes_from_labels(dataframe)
dataframe = self.remove_special_chars_from_feature_names(dataframe)
if self.config.get('reduce_df_footprint', False):
dataframe = reduce_dataframe_footprint(dataframe)
return dataframe
else:
# the user is using the populate_any_indicators functions which is deprecated
df = self.use_strategy_to_populate_indicators_old_version(
strategy, corr_dataframes, base_dataframes, pair,
prediction_dataframe, do_corr_pairs)
return df
def use_strategy_to_populate_indicators_old_version(
self,
strategy: IStrategy,
corr_dataframes: dict = {},
base_dataframes: dict = {},
pair: str = "",
prediction_dataframe: DataFrame = pd.DataFrame(),
do_corr_pairs: bool = True,
) -> DataFrame:
"""
Use the user defined strategy for populating indicators during retrain
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the df pair dataframes
(for user defined timeframes)
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)

View File

@@ -1,3 +1,4 @@
import inspect
import logging
import threading
import time
@@ -65,12 +66,11 @@ class IFreqaiModel(ABC):
self.retrain = False
self.first = True
self.set_full_path()
self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config)
# set current candle to arbitrary historical date
self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
self.dd.current_candle = self.current_candle
@@ -104,6 +104,9 @@ class IFreqaiModel(ABC):
self.metadata: Dict[str, Any] = self.dd.load_global_metadata_from_disk()
self.data_provider: Optional[DataProvider] = None
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
self.can_short = True # overridden in start() with strategy.can_short
self.warned_deprecated_populate_any_indicators = False
record_params(config, self.full_path)
@@ -133,6 +136,10 @@ class IFreqaiModel(ABC):
self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
self.dd.set_pair_dict_info(metadata)
self.data_provider = strategy.dp
self.can_short = strategy.can_short
# check if the strategy has deprecated populate_any_indicators function
self.check_deprecated_populate_any_indicators(strategy)
if self.live:
self.inference_timer('start')
@@ -145,14 +152,11 @@ class IFreqaiModel(ABC):
# (backtest window, i.e. window immediately following the training window).
# FreqAI slides the window and sequentially builds the backtesting results before returning
# the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode:
else:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
if not self.config.get("freqai_backtest_live_models", False):
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dk = self.start_backtesting(dataframe, metadata, self.dk)
dk = self.start_backtesting(dataframe, metadata, self.dk, strategy)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
logger.info(
@@ -253,7 +257,7 @@ class IFreqaiModel(ABC):
self.dd.save_metric_tracker_to_disk()
def start_backtesting(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen, strategy: IStrategy
) -> FreqaiDataKitchen:
"""
The main broad execution for backtesting. For backtesting, each pair enters and then gets
@@ -265,19 +269,22 @@ class IFreqaiModel(ABC):
:param dataframe: DataFrame = strategy passed dataframe
:param metadata: Dict = pair metadata
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:param strategy: Strategy to train on
:return:
FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
self.pair_it += 1
train_it = 0
pair = metadata["pair"]
populate_indicators = True
check_features = True
# Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly
# following tr_train. Both of these windows slide through the
# entire backtest
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
pair = metadata["pair"]
(_, _, _) = self.dd.get_pair_dict_info(pair)
train_it += 1
total_trains = len(dk.backtesting_timeranges)
@@ -299,18 +306,44 @@ class IFreqaiModel(ABC):
dk.set_new_model_names(pair, timestamp_model_id)
if dk.check_if_backtest_prediction_is_valid(len_backtest_df):
self.dd.load_metadata(dk)
dk.find_features(dataframe)
self.check_if_feature_list_matches_strategy(dk)
if check_features:
self.dd.load_metadata(dk)
dataframe_dummy_features = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe.tail(1), pair=metadata["pair"]
)
dk.find_features(dataframe_dummy_features)
self.check_if_feature_list_matches_strategy(dk)
check_features = False
append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df)
else:
dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
if populate_indicators:
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
populate_indicators = False
dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :]
dataframe_base_train = strategy.set_freqai_targets(
dataframe_base_train, metadata=metadata)
dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :]
dataframe_base_backtest = strategy.set_freqai_targets(
dataframe_base_backtest, metadata=metadata)
dataframe_train = dk.slice_dataframe(tr_train, dataframe_base_train)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe_base_backtest)
if not self.model_exists(dk):
dk.find_features(dataframe_train)
dk.find_labels(dataframe_train)
self.model = self.train(dataframe_train, pair, dk)
try:
self.model = self.train(dataframe_train, pair, dk)
except Exception as msg:
logger.warning(
f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.")
self.dd.pair_dict[pair]["trained_timestamp"] = int(
tr_train.stopts)
if self.plot_features:
@@ -348,46 +381,27 @@ class IFreqaiModel(ABC):
dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
"""
# update follower
if self.follow_mode:
self.dd.update_follower_metadata()
# get the model metadata associated with the current pair
(_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
# if the metadata doesn't exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array:
logger.info("Returning null array from follower to strategy")
self.dd.return_null_values_to_strategy(dataframe, dk)
return dk
# append the historic data once per round
if self.dd.historic_data:
self.dd.update_historic_data(strategy, dk)
logger.debug(f'Updating historic data on pair {metadata["pair"]}')
self.track_current_candle()
if not self.follow_mode:
(_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
trained_timestamp
)
dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
(_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
trained_timestamp
)
dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
# load candle history into memory if it is not yet.
if not self.dd.historic_data:
self.dd.load_all_pair_histories(data_load_timerange, dk)
# load candle history into memory if it is not yet.
if not self.dd.historic_data:
self.dd.load_all_pair_histories(data_load_timerange, dk)
if not self.scanning:
self.scanning = True
self.start_scanning(strategy)
elif self.follow_mode:
dk.set_paths(metadata["pair"], trained_timestamp)
logger.info(
"FreqAI instance set to follow_mode, finding existing pair "
f"using { self.identifier }"
)
if not self.scanning:
self.scanning = True
self.start_scanning(strategy)
# load the model and associated data into the data kitchen
self.model = self.dd.load_data(metadata["pair"], dk)
@@ -911,9 +925,28 @@ class IFreqaiModel(ABC):
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
# dk.return_dataframe = dk.return_dataframe[saved_dataframe.columns].fillna(0)
return dk
def check_deprecated_populate_any_indicators(self, strategy: IStrategy):
"""
Check and warn if the deprecated populate_any_indicators function is used.
:param strategy: strategy object
"""
if not self.warned_deprecated_populate_any_indicators:
self.warned_deprecated_populate_any_indicators = True
old_version = inspect.getsource(strategy.populate_any_indicators) != (
inspect.getsource(IStrategy.populate_any_indicators))
if old_version:
logger.warning("DEPRECATION WARNING: "
"You are using the deprecated populate_any_indicators function. "
"This function will raise an error on March 1 2023. "
"Please update your strategy by using "
"the new feature_engineering functions. See \n"
"https://www.freqtrade.io/en/latest/freqai-feature-engineering/"
"for details.")
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.

View File

@@ -34,7 +34,7 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
env_info = self.pack_env_dict()
env_info = self.pack_env_dict(dk.pair)
env_id = "train_env"
self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1,