no model save backtest, plot features backtest, ensure inlier plays nice, doc
This commit is contained in:
parent
f4fac53a13
commit
873d2a5069
@ -110,7 +110,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
|
|||||||
| `indicator_periods_candles` | Calculate indicators for `indicator_periods_candles` time periods and add them to the feature set. <br> **Datatype:** List of positive integers.
|
| `indicator_periods_candles` | Calculate indicators for `indicator_periods_candles` time periods and add them to the feature set. <br> **Datatype:** List of positive integers.
|
||||||
| `stratify_training_data` | This value is used to indicate the grouping of the data. For example, 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. See details about how it works [here](#stratifying-the-data-for-training-and-testing-the-model) <br> **Datatype:** Positive integer.
|
| `stratify_training_data` | This value is used to indicate the grouping of the data. For example, 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. See details about how it works [here](#stratifying-the-data-for-training-and-testing-the-model) <br> **Datatype:** Positive integer.
|
||||||
| `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis)
|
| `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis)
|
||||||
| `plot_feature_importance` | Create an interactive feature importance plot for each model.<br> **Datatype:** Boolean.<br> **Datatype:** Boolean, defaults to `False`
|
| `plot_feature_importances` | Create a feature importance plot for each model for the top/bottom `plot_feature_importances` number of features.<br> **Datatype:** Boolean.<br> **Datatype:** Boolean, defaults to `0`
|
||||||
| `DI_threshold` | Activates the Dissimilarity Index for outlier detection when > 0. See details about how it works [here](#removing-outliers-with-the-dissimilarity-index). <br> **Datatype:** Positive float (typically < 1).
|
| `DI_threshold` | Activates the Dissimilarity Index for outlier detection when > 0. See details about how it works [here](#removing-outliers-with-the-dissimilarity-index). <br> **Datatype:** Positive float (typically < 1).
|
||||||
| `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Boolean.
|
| `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Boolean.
|
||||||
| `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Dictionary.
|
| `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Dictionary.
|
||||||
|
@ -313,6 +313,7 @@ class FreqaiDataDrawer:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
dk.find_features(dataframe)
|
dk.find_features(dataframe)
|
||||||
|
dk.find_labels(dataframe)
|
||||||
|
|
||||||
full_labels = dk.label_list + dk.unique_class_list
|
full_labels = dk.label_list + dk.unique_class_list
|
||||||
|
|
||||||
@ -376,7 +377,27 @@ class FreqaiDataDrawer:
|
|||||||
if self.config.get("freqai", {}).get("purge_old_models", False):
|
if self.config.get("freqai", {}).get("purge_old_models", False):
|
||||||
self.purge_old_models()
|
self.purge_old_models()
|
||||||
|
|
||||||
# Functions pulled back from FreqaiDataKitchen because they relied on DataDrawer
|
def save_metaddata(self, dk: FreqaiDataKitchen) -> None:
|
||||||
|
"""
|
||||||
|
Saves only metadata for backtesting studies if user prefers
|
||||||
|
not to save model data. This saves tremendous amounts of space
|
||||||
|
for users generating huge studies.
|
||||||
|
This is only active when `save_backtest_models`: false (not default)
|
||||||
|
"""
|
||||||
|
if not dk.data_path.is_dir():
|
||||||
|
dk.data_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
save_path = Path(dk.data_path)
|
||||||
|
|
||||||
|
dk.data["data_path"] = str(dk.data_path)
|
||||||
|
dk.data["model_filename"] = str(dk.model_filename)
|
||||||
|
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
|
||||||
|
dk.data["label_list"] = dk.label_list
|
||||||
|
|
||||||
|
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
|
||||||
|
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -831,7 +831,7 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
inlier_metric = pd.DataFrame(
|
inlier_metric = pd.DataFrame(
|
||||||
data=inliers.sum(axis=1) / no_prev_pts,
|
data=inliers.sum(axis=1) / no_prev_pts,
|
||||||
columns=['inlier_metric'],
|
columns=['%-inlier_metric'],
|
||||||
index=compute_df.index
|
index=compute_df.index
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -881,11 +881,14 @@ class FreqaiDataKitchen:
|
|||||||
"""
|
"""
|
||||||
column_names = dataframe.columns
|
column_names = dataframe.columns
|
||||||
features = [c for c in column_names if "%" in c]
|
features = [c for c in column_names if "%" in c]
|
||||||
labels = [c for c in column_names if "&" in c]
|
|
||||||
if not features:
|
if not features:
|
||||||
raise OperationalException("Could not find any features!")
|
raise OperationalException("Could not find any features!")
|
||||||
|
|
||||||
self.training_features_list = features
|
self.training_features_list = features
|
||||||
|
|
||||||
|
def find_labels(self, dataframe: DataFrame) -> None:
|
||||||
|
column_names = dataframe.columns
|
||||||
|
labels = [c for c in column_names if "&" in c]
|
||||||
self.label_list = labels
|
self.label_list = labels
|
||||||
|
|
||||||
def check_if_pred_in_training_spaces(self) -> None:
|
def check_if_pred_in_training_spaces(self) -> None:
|
||||||
@ -1206,7 +1209,8 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
|
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
|
||||||
|
|
||||||
self.find_features(dataframe)
|
# self.find_features(dataframe)
|
||||||
|
self.find_labels(dataframe)
|
||||||
|
|
||||||
for key in self.label_list:
|
for key in self.label_list:
|
||||||
if dataframe[key].dtype == object:
|
if dataframe[key].dtype == object:
|
||||||
|
@ -92,6 +92,7 @@ class IFreqaiModel(ABC):
|
|||||||
self.begin_time_train: float = 0
|
self.begin_time_train: float = 0
|
||||||
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
|
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
|
||||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||||
|
self.plot_features = self.ft_params.get("plot_feature_importances", 0)
|
||||||
|
|
||||||
self._threads: List[threading.Thread] = []
|
self._threads: List[threading.Thread] = []
|
||||||
self._stop_event = threading.Event()
|
self._stop_event = threading.Event()
|
||||||
@ -278,22 +279,24 @@ class IFreqaiModel(ABC):
|
|||||||
append_df = dk.get_backtesting_prediction()
|
append_df = dk.get_backtesting_prediction()
|
||||||
dk.append_predictions(append_df)
|
dk.append_predictions(append_df)
|
||||||
else:
|
else:
|
||||||
if not self.model_exists(
|
if not self.model_exists(dk):
|
||||||
pair, dk, trained_timestamp=trained_timestamp_int
|
|
||||||
):
|
|
||||||
dk.find_features(dataframe_train)
|
dk.find_features(dataframe_train)
|
||||||
|
dk.find_labels(dataframe_train)
|
||||||
self.model = self.train(dataframe_train, pair, dk)
|
self.model = self.train(dataframe_train, pair, dk)
|
||||||
self.dd.pair_dict[pair]["trained_timestamp"] = int(
|
self.dd.pair_dict[pair]["trained_timestamp"] = int(
|
||||||
trained_timestamp.stopts)
|
trained_timestamp.stopts)
|
||||||
|
if self.plot_features:
|
||||||
|
plot_feature_importance(self.model, pair, dk, self.plot_features)
|
||||||
if self.save_backtest_models:
|
if self.save_backtest_models:
|
||||||
logger.info('Saving backtest model to disk.')
|
logger.info('Saving backtest model to disk.')
|
||||||
self.dd.save_data(self.model, pair, dk)
|
self.dd.save_data(self.model, pair, dk)
|
||||||
|
else:
|
||||||
|
logger.info('Saving metadata to disk.')
|
||||||
|
self.dd.save_metaddata(dk)
|
||||||
else:
|
else:
|
||||||
self.model = self.dd.load_data(pair, dk)
|
self.model = self.dd.load_data(pair, dk)
|
||||||
|
|
||||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
# self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||||
|
|
||||||
pred_df, do_preds = self.predict(dataframe_backtest, dk)
|
pred_df, do_preds = self.predict(dataframe_backtest, dk)
|
||||||
append_df = dk.get_predictions_to_append(pred_df, do_preds)
|
append_df = dk.get_predictions_to_append(pred_df, do_preds)
|
||||||
dk.append_predictions(append_df)
|
dk.append_predictions(append_df)
|
||||||
@ -372,8 +375,7 @@ class IFreqaiModel(ABC):
|
|||||||
self.dd.return_null_values_to_strategy(dataframe, dk)
|
self.dd.return_null_values_to_strategy(dataframe, dk)
|
||||||
return dk
|
return dk
|
||||||
|
|
||||||
# ensure user is feeding the correct indicators to the model
|
dk.find_labels(dataframe)
|
||||||
self.check_if_feature_list_matches_strategy(dataframe, dk)
|
|
||||||
|
|
||||||
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
|
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
|
||||||
|
|
||||||
@ -492,7 +494,7 @@ class IFreqaiModel(ABC):
|
|||||||
if ft_params.get(
|
if ft_params.get(
|
||||||
"principal_component_analysis", False
|
"principal_component_analysis", False
|
||||||
):
|
):
|
||||||
dk.pca_transform(self.dk.data_dictionary['prediction_features'])
|
dk.pca_transform(dk.data_dictionary['prediction_features'])
|
||||||
|
|
||||||
if ft_params.get("use_SVM_to_remove_outliers", False):
|
if ft_params.get("use_SVM_to_remove_outliers", False):
|
||||||
dk.use_SVM_to_remove_outliers(predict=True)
|
dk.use_SVM_to_remove_outliers(predict=True)
|
||||||
@ -503,14 +505,10 @@ class IFreqaiModel(ABC):
|
|||||||
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||||
dk.use_DBSCAN_to_remove_outliers(predict=True)
|
dk.use_DBSCAN_to_remove_outliers(predict=True)
|
||||||
|
|
||||||
def model_exists(
|
# ensure user is feeding the correct indicators to the model
|
||||||
self,
|
self.check_if_feature_list_matches_strategy(dk.data_dictionary['prediction_features'], dk)
|
||||||
pair: str,
|
|
||||||
dk: FreqaiDataKitchen,
|
def model_exists(self, dk: FreqaiDataKitchen) -> bool:
|
||||||
trained_timestamp: int = None,
|
|
||||||
model_filename: str = "",
|
|
||||||
scanning: bool = False,
|
|
||||||
) -> bool:
|
|
||||||
"""
|
"""
|
||||||
Given a pair and path, check if a model already exists
|
Given a pair and path, check if a model already exists
|
||||||
:param pair: pair e.g. BTC/USD
|
:param pair: pair e.g. BTC/USD
|
||||||
@ -518,11 +516,11 @@ class IFreqaiModel(ABC):
|
|||||||
:return:
|
:return:
|
||||||
:boolean: whether the model file exists or not.
|
:boolean: whether the model file exists or not.
|
||||||
"""
|
"""
|
||||||
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
|
path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib")
|
||||||
file_exists = path_to_modelfile.is_file()
|
file_exists = path_to_modelfile.is_file()
|
||||||
if file_exists and not scanning:
|
if file_exists:
|
||||||
logger.info("Found model at %s", dk.data_path / dk.model_filename)
|
logger.info("Found model at %s", dk.data_path / dk.model_filename)
|
||||||
elif not scanning:
|
else:
|
||||||
logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
|
logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
|
||||||
return file_exists
|
return file_exists
|
||||||
|
|
||||||
@ -569,6 +567,7 @@ class IFreqaiModel(ABC):
|
|||||||
|
|
||||||
# find the features indicated by strategy and store in datakitchen
|
# find the features indicated by strategy and store in datakitchen
|
||||||
dk.find_features(unfiltered_dataframe)
|
dk.find_features(unfiltered_dataframe)
|
||||||
|
dk.find_labels(unfiltered_dataframe)
|
||||||
|
|
||||||
model = self.train(unfiltered_dataframe, pair, dk)
|
model = self.train(unfiltered_dataframe, pair, dk)
|
||||||
|
|
||||||
@ -576,8 +575,8 @@ class IFreqaiModel(ABC):
|
|||||||
dk.set_new_model_names(pair, new_trained_timerange)
|
dk.set_new_model_names(pair, new_trained_timerange)
|
||||||
self.dd.save_data(model, pair, dk)
|
self.dd.save_data(model, pair, dk)
|
||||||
|
|
||||||
if self.freqai_info["feature_parameters"].get("plot_feature_importance", False):
|
if self.plot_features:
|
||||||
plot_feature_importance(model, pair, dk)
|
plot_feature_importance(model, pair, dk, self.plot_features)
|
||||||
|
|
||||||
if self.freqai_info.get("purge_old_models", False):
|
if self.freqai_info.get("purge_old_models", False):
|
||||||
self.dd.purge_old_models()
|
self.dd.purge_old_models()
|
||||||
|
@ -170,7 +170,7 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
|||||||
|
|
||||||
# Data preparation
|
# Data preparation
|
||||||
fi_df = pd.DataFrame({
|
fi_df = pd.DataFrame({
|
||||||
"feature_names": np.array(dk.training_features_list),
|
"feature_names": np.array(dk.data_dictionary['train_features'].columns),
|
||||||
"feature_importance": np.array(feature_importance)
|
"feature_importance": np.array(feature_importance)
|
||||||
})
|
})
|
||||||
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
||||||
|
Loading…
Reference in New Issue
Block a user