throw user error if user tries to load models but feeds the wrong features (while using PCA)

This commit is contained in:
robcaulk 2022-05-28 11:11:41 +02:00
parent b8f9c3557b
commit c5a16e91fb
4 changed files with 19 additions and 6 deletions

View File

@ -313,6 +313,8 @@ $$ W_i = \exp(\frac{-i}{\alpha*n}) $$
where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._
![weight-factor](assets/weights_factor.png)
Finally, `period` defines the offset used for the `labels`. In the present example, Finally, `period` defines the offset used for the `labels`. In the present example,
the user is asking for `labels` that are 24 candles in the future. the user is asking for `labels` that are 24 candles in the future.

View File

@ -477,6 +477,11 @@ class FreqaiDataKitchen:
index=self.data_dictionary["train_features"].index, index=self.data_dictionary["train_features"].index,
) )
# keeping a copy of the non-transformed features so we can check for errors during
# model load from disk
self.data['training_features_list_raw'] = copy.deepcopy(self.training_features_list)
self.training_features_list = self.data_dictionary["train_features"].columns
self.data_dictionary["test_features"] = pd.DataFrame( self.data_dictionary["test_features"] = pd.DataFrame(
data=test_components, data=test_components,
columns=["PC" + str(i) for i in range(0, n_keep_components)], columns=["PC" + str(i) for i in range(0, n_keep_components)],
@ -563,7 +568,8 @@ class FreqaiDataKitchen:
def find_features(self, dataframe: DataFrame) -> list: def find_features(self, dataframe: DataFrame) -> list:
column_names = dataframe.columns column_names = dataframe.columns
features = [c for c in column_names if '%' in c] features = [c for c in column_names if '%' in c]
assert features, ("Could not find any features!") if not features:
raise OperationalException("Could not find any features!")
return features return features
def check_if_pred_in_training_spaces(self) -> None: def check_if_pred_in_training_spaces(self) -> None:

View File

@ -225,7 +225,11 @@ class IFreqaiModel(ABC):
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None: dh: FreqaiDataKitchen) -> None:
strategy_provided_features = dh.find_features(dataframe) strategy_provided_features = dh.find_features(dataframe)
if strategy_provided_features != dh.training_features_list: if dh.data['training_features_list_raw']:
feature_list = dh.data['training_features_list_raw']
else:
feature_list = dh.training_features_list
if strategy_provided_features != feature_list:
raise OperationalException("Trying to access pretrained model with `identifier` " raise OperationalException("Trying to access pretrained model with `identifier` "
"but found different features furnished by current strategy." "but found different features furnished by current strategy."
"Change `identifer` to train from scratch, or ensure the" "Change `identifer` to train from scratch, or ensure the"
@ -254,7 +258,7 @@ class IFreqaiModel(ABC):
# if self.feature_parameters["remove_outliers"]: # if self.feature_parameters["remove_outliers"]:
# dh.remove_outliers(predict=False) # dh.remove_outliers(predict=False)
def data_cleaning_predict(self, dh: FreqaiDataKitchen) -> None: def data_cleaning_predict(self, dh: FreqaiDataKitchen, dataframe: DataFrame) -> None:
""" """
Base data cleaning method for predict. Base data cleaning method for predict.
These functions each modify dh.do_predict, which is a dataframe with equal length These functions each modify dh.do_predict, which is a dataframe with equal length
@ -266,7 +270,7 @@ class IFreqaiModel(ABC):
for buy signals. for buy signals.
""" """
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
dh.pca_transform() dh.pca_transform(dataframe)
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
dh.use_SVM_to_remove_outliers(predict=True) dh.use_SVM_to_remove_outliers(predict=True)

View File

@ -71,7 +71,8 @@ class CatboostPredictionModel(IFreqaiModel):
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_train(dh) self.data_cleaning_train(dh)
logger.info(f'Training model on {len(dh.training_features_list)} features') logger.info(f'Training model on {len(dh.data_dictionary["train_features"].columns)}'
'features')
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary) model = self.fit(data_dictionary)
@ -129,7 +130,7 @@ class CatboostPredictionModel(IFreqaiModel):
dh.data_dictionary["prediction_features"] = filtered_dataframe dh.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_predict(dh) self.data_cleaning_predict(dh, filtered_dataframe)
predictions = self.model.predict(dh.data_dictionary["prediction_features"]) predictions = self.model.predict(dh.data_dictionary["prediction_features"])