Make check constant pred labels agnostic
This commit is contained in:
parent
20fc521771
commit
033c5bd441
@ -71,6 +71,7 @@ class FreqaiDataKitchen:
|
|||||||
self.data_path = Path()
|
self.data_path = Path()
|
||||||
self.label_list: List = []
|
self.label_list: List = []
|
||||||
self.training_features_list: List = []
|
self.training_features_list: List = []
|
||||||
|
self.constant_features_list: List = []
|
||||||
self.model_filename: str = ""
|
self.model_filename: str = ""
|
||||||
self.backtesting_results_path = Path()
|
self.backtesting_results_path = Path()
|
||||||
self.backtest_predictions_folder: str = "backtesting_predictions"
|
self.backtest_predictions_folder: str = "backtesting_predictions"
|
||||||
@ -206,15 +207,14 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
|
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
|
||||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||||
ft_params = self.freqai_config["feature_parameters"]
|
|
||||||
if (training_filter):
|
if (training_filter):
|
||||||
if not ft_params.get(
|
|
||||||
"principal_component_analysis", False
|
|
||||||
):
|
|
||||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||||
if const_cols:
|
if const_cols:
|
||||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||||
|
self.constant_features_list = const_cols
|
||||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||||
|
else:
|
||||||
|
self.constant_features_list = []
|
||||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||||
# about removing any row with NaNs
|
# about removing any row with NaNs
|
||||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||||
@ -245,9 +245,7 @@ class FreqaiDataKitchen:
|
|||||||
self.data["filter_drop_index_training"] = drop_index
|
self.data["filter_drop_index_training"] = drop_index
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if not ft_params.get(
|
if len(self.constant_features_list):
|
||||||
"principal_component_analysis", False
|
|
||||||
):
|
|
||||||
filtered_df = self.check_pred_labels(filtered_df)
|
filtered_df = self.check_pred_labels(filtered_df)
|
||||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||||
# so now we use do_predict to avoid any prediction based on a NaN
|
# so now we use do_predict to avoid any prediction based on a NaN
|
||||||
@ -474,14 +472,13 @@ class FreqaiDataKitchen:
|
|||||||
:params:
|
:params:
|
||||||
:df_predictions: incoming predictions
|
:df_predictions: incoming predictions
|
||||||
"""
|
"""
|
||||||
train_labels = self.data_dictionary["train_features"].columns
|
constant_labels = self.constant_features_list
|
||||||
pred_labels = df_predictions.columns
|
df_predictions = df_predictions.filter(
|
||||||
num_diffs = len(pred_labels.difference(train_labels))
|
df_predictions.columns.difference(constant_labels)
|
||||||
if num_diffs != 0:
|
)
|
||||||
df_predictions = df_predictions[train_labels]
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Removed {num_diffs} features from prediction features, "
|
f"Removed {len(constant_labels)} features from prediction features, "
|
||||||
f"these were likely considered constant values during most recent training."
|
f"these were considered constant values during most recent training."
|
||||||
)
|
)
|
||||||
|
|
||||||
return df_predictions
|
return df_predictions
|
||||||
|
Loading…
Reference in New Issue
Block a user