ensure full pair string is used for caching dataframes. If not, revert to old behavior. Update docs.

This commit is contained in:
robcaulk
2022-10-29 22:26:49 +02:00
parent a9db668082
commit 650bb8b7d7
5 changed files with 92 additions and 74 deletions

View File

@@ -1133,17 +1133,19 @@ class FreqaiDataKitchen:
"""
Find the columns of the dataframe corresponding to the corr_pairlist, save them
in a dictionary to be reused and attached to other pairs.
:params:
:dataframe: fully populated dataframe (current pair + corr_pairs)
:return:
:corr_dataframes: dictionary of dataframes to be attached to other pairs in same candle.
:param dataframe: fully populated dataframe (current pair + corr_pairs)
:return: corr_dataframes, dictionary of dataframes to be attached
to other pairs in same candle.
"""
corr_dataframes: Dict[str, DataFrame] = {}
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
coin = pair.split('/')[0]
pair_cols = [col for col in dataframe.columns if coin in col]
valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
pair_cols = [col for col in dataframe.columns if
any(substr in col for substr in valid_strs)]
pair_cols.insert(0, 'date')
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
return corr_dataframes
@@ -1153,10 +1155,10 @@ class FreqaiDataKitchen:
current_pair: str) -> DataFrame:
"""
Attach the existing corr_pair dataframes to the current pair dataframe before training
:params:
:dataframe: current pair strategy dataframe, indicators populated already
:corr_dataframes: dictionary of saved dataframes from earlier in the same candle
:current_pair: current pair to which we will attach corr pair dataframe
:param dataframe: current pair strategy dataframe, indicators populated already
:param corr_dataframes: dictionary of saved dataframes from earlier in the same candle
:param current_pair: current pair to which we will attach corr pair dataframe
:return:
:dataframe: current pair dataframe of populated indicators, concatenated with corr_pairs
ready for training
@@ -1164,8 +1166,8 @@ class FreqaiDataKitchen:
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
if current_pair not in pair:
dataframe = pd.concat([dataframe, corr_dataframes[pair]], axis=1)
if current_pair != pair:
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')
return dataframe
@@ -1186,15 +1188,15 @@ class FreqaiDataKitchen:
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param metadata: dict = strategy furnished pair metadata
:returns:
:return:
dataframe: DataFrame = dataframe containing populated indicators
"""
# for prediction dataframe creation, we let dataprovider handle everything in the strategy
# so we create empty dictionaries, which allows us to pass None to
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy()
for tf in tfs:
@@ -1219,16 +1221,16 @@ class FreqaiDataKitchen:
)
# ensure corr pairs are always last
for i in pairs:
if pair in i:
for corr_pair in pairs:
if pair == corr_pair:
continue # dont repeat anything from whitelist
for tf in tfs:
if pairs and do_corr_pairs:
dataframe = strategy.populate_any_indicators(
i,
corr_pair,
dataframe.copy(),
tf,
informative=corr_dataframes[i][tf]
informative=corr_dataframes[corr_pair][tf]
)
self.get_unique_classes_from_labels(dataframe)

View File

@@ -72,6 +72,7 @@ class IFreqaiModel(ABC):
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.corr_pairlist = self.ft_params.get("include_corr_pairlist", [])
self.keras: bool = self.freqai_info.get("keras", False)
if self.keras and self.ft_params.get("DI_threshold", 0):
self.ft_params["DI_threshold"] = 0
@@ -375,12 +376,8 @@ class IFreqaiModel(ABC):
self.dd.return_null_values_to_strategy(dataframe, dk)
return dk
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
self.get_corr_dataframes = False
else:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, metadata["pair"])
if self.corr_pairlist:
dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)
dk.find_labels(dataframe)
@@ -687,7 +684,8 @@ class IFreqaiModel(ABC):
" avoid blinding open trades and degrading performance.")
self.pair_it = 0
self.inference_time = 0
self.get_corr_dataframes = True
if self.corr_pairlist:
self.get_corr_dataframes = True
return
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
@@ -746,6 +744,29 @@ class IFreqaiModel(ABC):
f'Best approximation queue: {best_queue}')
return best_queue
def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
current candle.
:param dataframe: strategy fed dataframe
:param dk: datakitchen object for current asset
:return: dataframe to attach/extract cached corr_pair dfs to/from.
"""
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
if not self.corr_dataframes:
logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features "
"in `populate_any_indicators()`.")
self.get_corr_dataframes = not bool(self.corr_dataframes)
else:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, dk.pair)
return dataframe
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.