ensure full pair string is used for caching dataframes. If not, revert to old behavior. Update docs.

This commit is contained in:
robcaulk
2022-10-29 22:26:49 +02:00
parent a9db668082
commit 650bb8b7d7
5 changed files with 92 additions and 74 deletions

View File

@@ -1133,17 +1133,19 @@ class FreqaiDataKitchen:
"""
Find the columns of the dataframe corresponding to the corr_pairlist, save them
in a dictionary to be reused and attached to other pairs.
:params:
:dataframe: fully populated dataframe (current pair + corr_pairs)
:return:
:corr_dataframes: dictionary of dataframes to be attached to other pairs in same candle.
:param dataframe: fully populated dataframe (current pair + corr_pairs)
:return: corr_dataframes, dictionary of dataframes to be attached
to other pairs in same candle.
"""
corr_dataframes: Dict[str, DataFrame] = {}
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
coin = pair.split('/')[0]
pair_cols = [col for col in dataframe.columns if coin in col]
valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
pair_cols = [col for col in dataframe.columns if
any(substr in col for substr in valid_strs)]
pair_cols.insert(0, 'date')
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
return corr_dataframes
@@ -1153,10 +1155,10 @@ class FreqaiDataKitchen:
current_pair: str) -> DataFrame:
"""
Attach the existing corr_pair dataframes to the current pair dataframe before training
:params:
:dataframe: current pair strategy dataframe, indicators populated already
:corr_dataframes: dictionary of saved dataframes from earlier in the same candle
:current_pair: current pair to which we will attach corr pair dataframe
:param dataframe: current pair strategy dataframe, indicators populated already
:param corr_dataframes: dictionary of saved dataframes from earlier in the same candle
:param current_pair: current pair to which we will attach corr pair dataframe
:return:
:dataframe: current pair dataframe of populated indicators, concatenated with corr_pairs
ready for training
@@ -1164,8 +1166,8 @@ class FreqaiDataKitchen:
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
if current_pair not in pair:
dataframe = pd.concat([dataframe, corr_dataframes[pair]], axis=1)
if current_pair != pair:
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')
return dataframe
@@ -1186,15 +1188,15 @@ class FreqaiDataKitchen:
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param metadata: dict = strategy furnished pair metadata
:returns:
:return:
dataframe: DataFrame = dataframe containing populated indicators
"""
# for prediction dataframe creation, we let dataprovider handle everything in the strategy
# so we create empty dictionaries, which allows us to pass None to
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy()
for tf in tfs:
@@ -1219,16 +1221,16 @@ class FreqaiDataKitchen:
)
# ensure corr pairs are always last
for i in pairs:
if pair in i:
for corr_pair in pairs:
if pair == corr_pair:
continue # dont repeat anything from whitelist
for tf in tfs:
if pairs and do_corr_pairs:
dataframe = strategy.populate_any_indicators(
i,
corr_pair,
dataframe.copy(),
tf,
informative=corr_dataframes[i][tf]
informative=corr_dataframes[corr_pair][tf]
)
self.get_unique_classes_from_labels(dataframe)

View File

@@ -72,6 +72,7 @@ class IFreqaiModel(ABC):
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.corr_pairlist = self.ft_params.get("include_corr_pairlist", [])
self.keras: bool = self.freqai_info.get("keras", False)
if self.keras and self.ft_params.get("DI_threshold", 0):
self.ft_params["DI_threshold"] = 0
@@ -375,12 +376,8 @@ class IFreqaiModel(ABC):
self.dd.return_null_values_to_strategy(dataframe, dk)
return dk
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
self.get_corr_dataframes = False
else:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, metadata["pair"])
if self.corr_pairlist:
dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)
dk.find_labels(dataframe)
@@ -687,7 +684,8 @@ class IFreqaiModel(ABC):
" avoid blinding open trades and degrading performance.")
self.pair_it = 0
self.inference_time = 0
self.get_corr_dataframes = True
if self.corr_pairlist:
self.get_corr_dataframes = True
return
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
@@ -746,6 +744,29 @@ class IFreqaiModel(ABC):
f'Best approximation queue: {best_queue}')
return best_queue
def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
current candle.
:param dataframe: strategy fed dataframe
:param dk: datakitchen object for current asset
:return: dataframe to attach/extract cached corr_pair dfs to/from.
"""
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
if not self.corr_dataframes:
logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features "
"in `populate_any_indicators()`.")
self.get_corr_dataframes = not bool(self.corr_dataframes)
else:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, dk.pair)
return dataframe
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.

View File

@@ -53,7 +53,7 @@ class FreqaiExampleStrategy(IStrategy):
"""
Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User controls the indicators
passed to the training/prediction by prepending indicators with `'%-' + coin `
passed to the training/prediction by prepending indicators with `f'%-{pair}`
(see convention below). I.e. user should not prepend any supporting metrics
(e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
model.
@@ -63,8 +63,6 @@ class FreqaiExampleStrategy(IStrategy):
:param informative: the dataframe associated with the informative pair
"""
coin = pair.split('/')[0]
if informative is None:
informative = self.dp.get_pair_dataframe(pair, tf)
@@ -72,36 +70,36 @@ class FreqaiExampleStrategy(IStrategy):
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
t = int(t)
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, timeperiod=t)
informative[f"%-{coin}sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
informative[f"%-{coin}ema-period_{t}"] = ta.EMA(informative, timeperiod=t)
informative[f"%-{pair}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
informative[f"%-{pair}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
informative[f"%-{pair}adx-period_{t}"] = ta.ADX(informative, timeperiod=t)
informative[f"%-{pair}sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
informative[f"%-{pair}ema-period_{t}"] = ta.EMA(informative, timeperiod=t)
bollinger = qtpylib.bollinger_bands(
qtpylib.typical_price(informative), window=t, stds=2.2
)
informative[f"{coin}bb_lowerband-period_{t}"] = bollinger["lower"]
informative[f"{coin}bb_middleband-period_{t}"] = bollinger["mid"]
informative[f"{coin}bb_upperband-period_{t}"] = bollinger["upper"]
informative[f"{pair}bb_lowerband-period_{t}"] = bollinger["lower"]
informative[f"{pair}bb_middleband-period_{t}"] = bollinger["mid"]
informative[f"{pair}bb_upperband-period_{t}"] = bollinger["upper"]
informative[f"%-{coin}bb_width-period_{t}"] = (
informative[f"{coin}bb_upperband-period_{t}"]
- informative[f"{coin}bb_lowerband-period_{t}"]
) / informative[f"{coin}bb_middleband-period_{t}"]
informative[f"%-{coin}close-bb_lower-period_{t}"] = (
informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
informative[f"%-{pair}bb_width-period_{t}"] = (
informative[f"{pair}bb_upperband-period_{t}"]
- informative[f"{pair}bb_lowerband-period_{t}"]
) / informative[f"{pair}bb_middleband-period_{t}"]
informative[f"%-{pair}close-bb_lower-period_{t}"] = (
informative["close"] / informative[f"{pair}bb_lowerband-period_{t}"]
)
informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
informative[f"%-{pair}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
informative[f"%-{coin}relative_volume-period_{t}"] = (
informative[f"%-{pair}relative_volume-period_{t}"] = (
informative["volume"] / informative["volume"].rolling(t).mean()
)
informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"]
informative[f"%-{coin}raw_price"] = informative["close"]
informative[f"%-{pair}pct-change"] = informative["close"].pct_change()
informative[f"%-{pair}raw_volume"] = informative["volume"]
informative[f"%-{pair}raw_price"] = informative["close"]
indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data