Normalise distances before Weibull fit (#7432)

* Normalise distances before Weibull

* Track inlier-metric params
This commit is contained in:
th0rntwig 2022-09-22 18:11:50 +02:00 committed by GitHub
parent 91dc5e7aa6
commit 71e6c54ea4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -775,12 +775,22 @@ class FreqaiDataKitchen:
def compute_inlier_metric(self, set_='train') -> None:
"""
Compute inlier metric from backwards distance distributions.
This metric defines how well features from a timepoint fit
into previous timepoints.
"""
def normalise(dataframe: DataFrame, key: str) -> DataFrame:
if set_ == 'train':
min_value = dataframe.min()
max_value = dataframe.max()
self.data[f'{key}_min'] = min_value
self.data[f'{key}_max'] = max_value
else:
min_value = self.data[f'{key}_min']
max_value = self.data[f'{key}_max']
return (dataframe - min_value) / (max_value - min_value)
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
if set_ == 'train':
@ -825,7 +835,12 @@ class FreqaiDataKitchen:
inliers = pd.DataFrame(index=distances.index)
for key in distances.keys():
current_distances = distances[key].dropna()
fit_params = stats.weibull_min.fit(current_distances)
current_distances = normalise(current_distances, key)
if set_ == 'train':
fit_params = stats.weibull_min.fit(current_distances)
self.data[f'{key}_fit_params'] = fit_params
else:
fit_params = self.data[f'{key}_fit_params']
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
df_inlier = pd.DataFrame(