From 7e8e29e42d4c2b0eb058d9408c27ebd220eb9f68 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 30 Aug 2022 20:41:37 +0200 Subject: [PATCH] use continuous value for inlier_metric --- docs/freqai.md | 9 +++------ freqtrade/freqai/data_kitchen.py | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 3fc76e9f0..2c6efa3b9 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -116,7 +116,6 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `30` | `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it.
**Datatype:** bool. Default: False | `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric)
**Datatype:** int. Default: 0 -| `inlier_metric_weibull_cutoff` | If the `inlier_metric_window` is set, this value is used to determine the tail cutoff in the weibull distribution fit. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric)
**Datatype:** float. Default: 0.95 | | **Data split parameters** | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website).
**Datatype:** Dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** Positive float < 1. @@ -644,13 +643,11 @@ The `inlier_metric` is a metric aimed at quantifying how different a prediction User can set `inlier_metric_window` to set the look back window. FreqAI will compute the distance between the present prediction point and each of the previous data points (total of `inlier_metric_window` points). -This function goes one step further - during training, it computes the `inlier_metric` for all training data points and builds weibull distributions for each each lookback point. If one of the distances falls in the tail of the respective weibull distribution, it is considered an "outlier." If the distance to the lookback point is not in the tail, it is considered an "inlier." Inliers receive a value of 1, and outliers receive a value of 0. +This function goes one step further - during training, it computes the `inlier_metric` for all training data points and builds weibull distributions for each each lookback point. The cumulative distribution function for the weibull distribution is used to produce a quantile for each of the data points. The quantiles for each lookback point are averaged to create the `inlier_metric`. -FreqAI adds this `inlier_metric` score to the training features! Thus, your model is trained to recognize how this temporal inlier metric is evolving. +FreqAI adds this `inlier_metric` score to the training features! In other words, your model is trained to recognize how this temporal inlier metric is related to the user set labels. -Users can control the weibull threshold using the `inlier_metric_weibull_cutoff` - -This function does not currently remove outliers from the data set. +This function does **not** remove outliers from the data set. ### Controlling the model learning process diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 0158996c7..9d4a69287 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -735,7 +735,6 @@ class FreqaiDataKitchen: import scipy.stats as ss no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"] - weib_pct = self.freqai_config["feature_parameters"]["inlier_metric_weibull_cutoff"] if set_ == 'train': compute_df = copy.deepcopy(self.data_dictionary['train_features']) @@ -780,12 +779,10 @@ class FreqaiDataKitchen: for key in distances.keys(): current_distances = distances[key].dropna() fit_params = ss.weibull_min.fit(current_distances) - cutoff = ss.weibull_min.ppf(weib_pct, *fit_params) - is_inlier = np.where( - current_distances <= cutoff, 1, 0 - ) + quantiles = ss.weibull_min.cdf(current_distances, *fit_params) + df_inlier = pd.DataFrame( - {key + '_IsInlier': is_inlier}, index=distances.index + {key: quantiles}, index=distances.index ) inliers = pd.concat( [inliers, df_inlier], axis=1