Merge pull request #7508 from aemr3/fix-pca-errors

Fix feature list match for PCA
This commit is contained in:
Robert Caulk 2022-10-01 16:50:29 +02:00 committed by GitHub
commit 3e34f10e3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 13 additions and 11 deletions

View File

@ -92,7 +92,7 @@ class BaseClassifierModel(IFreqaiModel):
filtered_df = dk.normalize_data_from_metadata(filtered_df) filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk, filtered_df) self.data_cleaning_predict(dk)
predictions = self.model.predict(dk.data_dictionary["prediction_features"]) predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list) pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@ -92,7 +92,7 @@ class BaseRegressionModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = filtered_df dk.data_dictionary["prediction_features"] = filtered_df
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_df) self.data_cleaning_predict(dk)
predictions = self.model.predict(dk.data_dictionary["prediction_features"]) predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list) pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@ -423,7 +423,7 @@ class FreqaiDataDrawer:
dk.data["data_path"] = str(dk.data_path) dk.data["data_path"] = str(dk.data_path)
dk.data["model_filename"] = str(dk.model_filename) dk.data["model_filename"] = str(dk.model_filename)
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns) dk.data["training_features_list"] = dk.training_features_list
dk.data["label_list"] = dk.label_list dk.data["label_list"] = dk.label_list
# store the metadata # store the metadata
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp: with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:

View File

@ -875,6 +875,7 @@ class FreqaiDataKitchen:
""" """
column_names = dataframe.columns column_names = dataframe.columns
features = [c for c in column_names if "%" in c] features = [c for c in column_names if "%" in c]
if not features: if not features:
raise OperationalException("Could not find any features!") raise OperationalException("Could not find any features!")

View File

@ -275,7 +275,8 @@ class IFreqaiModel(ABC):
if dk.check_if_backtest_prediction_exists(): if dk.check_if_backtest_prediction_exists():
self.dd.load_metadata(dk) self.dd.load_metadata(dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk) dk.find_features(dataframe_train)
self.check_if_feature_list_matches_strategy(dk)
append_df = dk.get_backtesting_prediction() append_df = dk.get_backtesting_prediction()
dk.append_predictions(append_df) dk.append_predictions(append_df)
else: else:
@ -296,7 +297,6 @@ class IFreqaiModel(ABC):
else: else:
self.model = self.dd.load_data(pair, dk) self.model = self.dd.load_data(pair, dk)
# self.check_if_feature_list_matches_strategy(dataframe_train, dk)
pred_df, do_preds = self.predict(dataframe_backtest, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk)
append_df = dk.get_predictions_to_append(pred_df, do_preds) append_df = dk.get_predictions_to_append(pred_df, do_preds)
dk.append_predictions(append_df) dk.append_predictions(append_df)
@ -420,7 +420,7 @@ class IFreqaiModel(ABC):
return return
def check_if_feature_list_matches_strategy( def check_if_feature_list_matches_strategy(
self, dataframe: DataFrame, dk: FreqaiDataKitchen self, dk: FreqaiDataKitchen
) -> None: ) -> None:
""" """
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
@ -429,11 +429,12 @@ class IFreqaiModel(ABC):
:param dk: FreqaiDataKitchen = non-persistent data container/analyzer for :param dk: FreqaiDataKitchen = non-persistent data container/analyzer for
current coin/bot loop current coin/bot loop
""" """
dk.find_features(dataframe)
if "training_features_list_raw" in dk.data: if "training_features_list_raw" in dk.data:
feature_list = dk.data["training_features_list_raw"] feature_list = dk.data["training_features_list_raw"]
else: else:
feature_list = dk.data['training_features_list'] feature_list = dk.data['training_features_list']
if dk.training_features_list != feature_list: if dk.training_features_list != feature_list:
raise OperationalException( raise OperationalException(
"Trying to access pretrained model with `identifier` " "Trying to access pretrained model with `identifier` "
@ -481,13 +482,16 @@ class IFreqaiModel(ABC):
if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0): if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
dk.add_noise_to_training_features() dk.add_noise_to_training_features()
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: def data_cleaning_predict(self, dk: FreqaiDataKitchen) -> None:
""" """
Base data cleaning method for predict. Base data cleaning method for predict.
Functions here are complementary to the functions of data_cleaning_train. Functions here are complementary to the functions of data_cleaning_train.
""" """
ft_params = self.freqai_info["feature_parameters"] ft_params = self.freqai_info["feature_parameters"]
# ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dk)
if ft_params.get('inlier_metric_window', 0): if ft_params.get('inlier_metric_window', 0):
dk.compute_inlier_metric(set_='predict') dk.compute_inlier_metric(set_='predict')
@ -505,9 +509,6 @@ class IFreqaiModel(ABC):
if ft_params.get("use_DBSCAN_to_remove_outliers", False): if ft_params.get("use_DBSCAN_to_remove_outliers", False):
dk.use_DBSCAN_to_remove_outliers(predict=True) dk.use_DBSCAN_to_remove_outliers(predict=True)
# ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dk.data_dictionary['prediction_features'], dk)
def model_exists(self, dk: FreqaiDataKitchen) -> bool: def model_exists(self, dk: FreqaiDataKitchen) -> bool:
""" """
Given a pair and path, check if a model already exists Given a pair and path, check if a model already exists