diff --git a/docs/freqai.md b/docs/freqai.md index 4a61e63cd..abdc6e542 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -109,8 +109,8 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `indicator_max_period_candles` | **No longer used**. User must use the strategy set `startup_candle_count` which defines the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN
**Datatype:** positive integer. | `indicator_periods_candles` | Calculate indicators for `indicator_periods_candles` time periods and add them to the feature set.
**Datatype:** List of positive integers. | `stratify_training_data` | This value is used to indicate the grouping of the data. For example, 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. See details about how it works [here](#stratifying-the-data-for-training-and-testing-the-model)
**Datatype:** Positive integer. -| `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis) -| `plot_feature_importance` | Create an interactive feature importance plot for each model.
**Datatype:** Boolean.
**Datatype:** Boolean, defaults to `False` +| `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis)
**Datatype:** Boolean. defaults to `false`. +| `plot_feature_importances` | Create a feature importance plot for each model for the top/bottom `plot_feature_importances` number of features.
**Datatype:** Integer, defaults to `0`. | `DI_threshold` | Activates the Dissimilarity Index for outlier detection when > 0. See details about how it works [here](#removing-outliers-with-the-dissimilarity-index).
**Datatype:** Positive float (typically < 1). | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Boolean. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Dictionary. diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index bb4cadc52..0dc7bacd5 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -313,6 +313,7 @@ class FreqaiDataDrawer: """ dk.find_features(dataframe) + dk.find_labels(dataframe) full_labels = dk.label_list + dk.unique_class_list @@ -376,7 +377,27 @@ class FreqaiDataDrawer: if self.config.get("freqai", {}).get("purge_old_models", False): self.purge_old_models() - # Functions pulled back from FreqaiDataKitchen because they relied on DataDrawer + def save_metadata(self, dk: FreqaiDataKitchen) -> None: + """ + Saves only metadata for backtesting studies if user prefers + not to save model data. This saves tremendous amounts of space + for users generating huge studies. + This is only active when `save_backtest_models`: false (not default) + """ + if not dk.data_path.is_dir(): + dk.data_path.mkdir(parents=True, exist_ok=True) + + save_path = Path(dk.data_path) + + dk.data["data_path"] = str(dk.data_path) + dk.data["model_filename"] = str(dk.model_filename) + dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns) + dk.data["label_list"] = dk.label_list + + with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp: + rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE) + + return def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None: """ diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 2446bcc99..4e14f6e6e 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -832,7 +832,7 @@ class FreqaiDataKitchen: inlier_metric = pd.DataFrame( data=inliers.sum(axis=1) / no_prev_pts, - columns=['inlier_metric'], + columns=['%-inlier_metric'], index=compute_df.index ) @@ -882,11 +882,14 @@ class FreqaiDataKitchen: """ column_names = dataframe.columns features = [c for c in column_names if "%" in c] - labels = [c for c in column_names if "&" in c] if not features: raise OperationalException("Could not find any features!") self.training_features_list = features + + def find_labels(self, dataframe: DataFrame) -> None: + column_names = dataframe.columns + labels = [c for c in column_names if "&" in c] self.label_list = labels def check_if_pred_in_training_spaces(self) -> None: @@ -1207,7 +1210,8 @@ class FreqaiDataKitchen: def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None: - self.find_features(dataframe) + # self.find_features(dataframe) + self.find_labels(dataframe) for key in self.label_list: if dataframe[key].dtype == object: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index b6bf26da7..61ce400df 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -93,6 +93,8 @@ class IFreqaiModel(ABC): self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) self.continual_learning = self.freqai_info.get('continual_learning', False) self.spice_rack_open: bool = False + self.plot_features = self.ft_params.get("plot_feature_importances", 0) + self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() @@ -278,22 +280,24 @@ class IFreqaiModel(ABC): append_df = dk.get_backtesting_prediction() dk.append_predictions(append_df) else: - if not self.model_exists( - pair, dk, trained_timestamp=trained_timestamp_int - ): + if not self.model_exists(dk): dk.find_features(dataframe_train) + dk.find_labels(dataframe_train) self.model = self.train(dataframe_train, pair, dk) self.dd.pair_dict[pair]["trained_timestamp"] = int( trained_timestamp.stopts) - + if self.plot_features: + plot_feature_importance(self.model, pair, dk, self.plot_features) if self.save_backtest_models: logger.info('Saving backtest model to disk.') self.dd.save_data(self.model, pair, dk) + else: + logger.info('Saving metadata to disk.') + self.dd.save_metadata(dk) else: self.model = self.dd.load_data(pair, dk) - self.check_if_feature_list_matches_strategy(dataframe_train, dk) - + # self.check_if_feature_list_matches_strategy(dataframe_train, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk) append_df = dk.get_predictions_to_append(pred_df, do_preds) dk.append_predictions(append_df) @@ -372,8 +376,7 @@ class IFreqaiModel(ABC): self.dd.return_null_values_to_strategy(dataframe, dk) return dk - # ensure user is feeding the correct indicators to the model - self.check_if_feature_list_matches_strategy(dataframe, dk) + dk.find_labels(dataframe) self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp) @@ -492,7 +495,7 @@ class IFreqaiModel(ABC): if ft_params.get( "principal_component_analysis", False ): - dk.pca_transform(self.dk.data_dictionary['prediction_features']) + dk.pca_transform(dk.data_dictionary['prediction_features']) if ft_params.get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=True) @@ -503,14 +506,10 @@ class IFreqaiModel(ABC): if ft_params.get("use_DBSCAN_to_remove_outliers", False): dk.use_DBSCAN_to_remove_outliers(predict=True) - def model_exists( - self, - pair: str, - dk: FreqaiDataKitchen, - trained_timestamp: int = None, - model_filename: str = "", - scanning: bool = False, - ) -> bool: + # ensure user is feeding the correct indicators to the model + self.check_if_feature_list_matches_strategy(dk.data_dictionary['prediction_features'], dk) + + def model_exists(self, dk: FreqaiDataKitchen) -> bool: """ Given a pair and path, check if a model already exists :param pair: pair e.g. BTC/USD @@ -518,11 +517,11 @@ class IFreqaiModel(ABC): :return: :boolean: whether the model file exists or not. """ - path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib") + path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib") file_exists = path_to_modelfile.is_file() - if file_exists and not scanning: + if file_exists: logger.info("Found model at %s", dk.data_path / dk.model_filename) - elif not scanning: + else: logger.info("Could not find model at %s", dk.data_path / dk.model_filename) return file_exists @@ -569,6 +568,7 @@ class IFreqaiModel(ABC): # find the features indicated by strategy and store in datakitchen dk.find_features(unfiltered_dataframe) + dk.find_labels(unfiltered_dataframe) model = self.train(unfiltered_dataframe, pair, dk) @@ -576,8 +576,8 @@ class IFreqaiModel(ABC): dk.set_new_model_names(pair, new_trained_timerange) self.dd.save_data(model, pair, dk) - if self.freqai_info["feature_parameters"].get("plot_feature_importance", False): - plot_feature_importance(model, pair, dk) + if self.plot_features: + plot_feature_importance(model, pair, dk, self.plot_features) if self.freqai_info.get("purge_old_models", False): self.dd.purge_old_models() diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index bbe846098..c604c5adc 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -306,7 +306,7 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen, # Data preparation fi_df = pd.DataFrame({ - "feature_names": np.array(dk.training_features_list), + "feature_names": np.array(dk.data_dictionary['train_features'].columns), "feature_importance": np.array(feature_importance) }) fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1] diff --git a/requirements.txt b/requirements.txt index c12d3fb08..d4d15b198 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,4 +55,3 @@ schedule==1.1.0 #WS Messages websockets==10.3 janus==1.0.0 -