Add additional data normalization methods to freqai module, including StandardScaler, MinMaxScaler, and QuantileTransformer. Add support for pickle metadata, normalization_factory, and unit tests.
This commit is contained in:
parent
8a49d62068
commit
4aa9284737
@ -73,7 +73,8 @@
|
||||
10,
|
||||
20
|
||||
],
|
||||
"plot_feature_importances": 0
|
||||
"plot_feature_importances": 0,
|
||||
"data_normalization": "legacy"
|
||||
},
|
||||
"data_split_parameters": {
|
||||
"test_size": 0.33,
|
||||
|
@ -427,6 +427,9 @@ class FreqaiDataDrawer:
|
||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
with (save_path / f"{dk.model_filename}_metadata.pkl").open("wb") as fp:
|
||||
cloudpickle.dump(dk.pkl_data, fp)
|
||||
|
||||
return
|
||||
|
||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||
@ -456,10 +459,14 @@ class FreqaiDataDrawer:
|
||||
dk.data["model_filename"] = str(dk.model_filename)
|
||||
dk.data["training_features_list"] = dk.training_features_list
|
||||
dk.data["label_list"] = dk.label_list
|
||||
# store the metadata
|
||||
# store the json metadata
|
||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
# store the pickle metadata
|
||||
with (save_path / f"{dk.model_filename}_metadata.pkl").open("wb") as fp:
|
||||
cloudpickle.dump(dk.pkl_data, fp)
|
||||
|
||||
# save the train data to file so we can check preds for area of applicability later
|
||||
dk.data_dictionary["train_features"].to_pickle(
|
||||
save_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
@ -486,6 +493,16 @@ class FreqaiDataDrawer:
|
||||
|
||||
return
|
||||
|
||||
def load_pickle_metadata(self, dk: FreqaiDataKitchen):
|
||||
pickle_file_path = dk.data_path / f"{dk.model_filename}_metadata.pkl"
|
||||
exists = pickle_file_path.is_file()
|
||||
# Check if the metadata pickle file exists before attempting to read it.
|
||||
# This is for backward compatibility with models generated before the
|
||||
# pickle metadata feature was implemented.
|
||||
if exists:
|
||||
with (dk.data_path / f"{dk.model_filename}_metadata.pkl").open("rb") as fp:
|
||||
dk.pkl_data = cloudpickle.load(fp)
|
||||
|
||||
def load_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Load only metadata into datakitchen to increase performance during
|
||||
@ -496,6 +513,8 @@ class FreqaiDataDrawer:
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
self.load_pickle_metadata(dk)
|
||||
|
||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
||||
"""
|
||||
loads all data required to make a prediction on a sub-train time range
|
||||
@ -517,6 +536,8 @@ class FreqaiDataDrawer:
|
||||
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
self.load_pickle_metadata(dk)
|
||||
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
)
|
||||
|
@ -25,6 +25,7 @@ from freqtrade.constants import Config
|
||||
from freqtrade.data.converter import reduce_dataframe_footprint
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.normalization import Normalization, normalization_factory
|
||||
from freqtrade.strategy import merge_informative_pair
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
@ -68,6 +69,7 @@ class FreqaiDataKitchen:
|
||||
pair: str = "",
|
||||
):
|
||||
self.data: Dict[str, Any] = {}
|
||||
self.pkl_data: Dict[str, Any] = {}
|
||||
self.data_dictionary: Dict[str, DataFrame] = {}
|
||||
self.config = config
|
||||
self.freqai_config: Dict[str, Any] = config["freqai"]
|
||||
@ -109,6 +111,8 @@ class FreqaiDataKitchen:
|
||||
self.unique_classes: Dict[str, list] = {}
|
||||
self.unique_class_list: list = []
|
||||
self.backtest_live_models_data: Dict[str, Any] = {}
|
||||
self.normalizer: Normalization = normalization_factory(config, self.data, self.pkl_data,
|
||||
self.unique_class_list)
|
||||
|
||||
def set_paths(
|
||||
self,
|
||||
@ -308,105 +312,16 @@ class FreqaiDataKitchen:
|
||||
return self.data_dictionary
|
||||
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
|
||||
# standardize the data by training stats
|
||||
train_max = data_dictionary["train_features"].max()
|
||||
train_min = data_dictionary["train_features"].min()
|
||||
data_dictionary["train_features"] = (
|
||||
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
data_dictionary["test_features"] = (
|
||||
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
|
||||
for item in train_max.keys():
|
||||
self.data[item + "_max"] = train_max[item]
|
||||
self.data[item + "_min"] = train_min[item]
|
||||
|
||||
for item in data_dictionary["train_labels"].keys():
|
||||
if data_dictionary["train_labels"][item].dtype == object:
|
||||
continue
|
||||
train_labels_max = data_dictionary["train_labels"][item].max()
|
||||
train_labels_min = data_dictionary["train_labels"][item].min()
|
||||
data_dictionary["train_labels"][item] = (
|
||||
2
|
||||
* (data_dictionary["train_labels"][item] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
data_dictionary["test_labels"][item] = (
|
||||
2
|
||||
* (data_dictionary["test_labels"][item] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
|
||||
self.data[f"{item}_max"] = train_labels_max
|
||||
self.data[f"{item}_min"] = train_labels_min
|
||||
return data_dictionary
|
||||
return self.normalizer.normalize_data(data_dictionary)
|
||||
|
||||
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||
|
||||
train_max = df.max()
|
||||
train_min = df.min()
|
||||
df = (
|
||||
2 * (df - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
|
||||
for item in train_max.keys():
|
||||
self.data[item + "_max"] = train_max[item]
|
||||
self.data[item + "_min"] = train_min[item]
|
||||
|
||||
return df
|
||||
return self.normalizer.normalize_single_dataframe(df)
|
||||
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Normalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
train_max = [None] * len(df.keys())
|
||||
train_min = [None] * len(df.keys())
|
||||
|
||||
for i, item in enumerate(df.keys()):
|
||||
train_max[i] = self.data[f"{item}_max"]
|
||||
train_min[i] = self.data[f"{item}_min"]
|
||||
|
||||
train_max_series = pd.Series(train_max, index=df.keys())
|
||||
train_min_series = pd.Series(train_min, index=df.keys())
|
||||
|
||||
df = (
|
||||
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
||||
)
|
||||
|
||||
return df
|
||||
return self.normalizer.normalize_data_from_metadata(df)
|
||||
|
||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Denormalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe of predictions to be denormalized
|
||||
"""
|
||||
|
||||
for label in df.columns:
|
||||
if df[label].dtype == object or label in self.unique_class_list:
|
||||
continue
|
||||
df[label] = (
|
||||
(df[label] + 1)
|
||||
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
|
||||
/ 2
|
||||
) + self.data[f"{label}_min"]
|
||||
|
||||
return df
|
||||
return self.normalizer.denormalize_labels_from_metadata(df)
|
||||
|
||||
def split_timerange(
|
||||
self, tr: str, train_split: int = 28, bt_split: float = 7
|
||||
@ -524,7 +439,7 @@ class FreqaiDataKitchen:
|
||||
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
||||
index=self.data_dictionary["train_features"].index,
|
||||
)
|
||||
# normalsing transformed training features
|
||||
# normalizing transformed training features
|
||||
self.data_dictionary["train_features"] = self.normalize_single_dataframe(
|
||||
self.data_dictionary["train_features"])
|
||||
|
||||
|
272
freqtrade/freqai/normalization.py
Normal file
272
freqtrade/freqai/normalization.py
Normal file
@ -0,0 +1,272 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, TypeVar
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer, StandardScaler
|
||||
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.exceptions import OperationalException
|
||||
|
||||
|
||||
TransformerType = TypeVar('TransformerType', MinMaxScaler, StandardScaler, QuantileTransformer)
|
||||
|
||||
|
||||
def normalization_factory(
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list
|
||||
):
|
||||
freqai_config: Dict[str, Any] = config["freqai"]
|
||||
norm_config_id = freqai_config["feature_parameters"].get("data_normalization", "legacy")
|
||||
if norm_config_id.lower() == "legacy":
|
||||
return LegacyNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||
elif norm_config_id.lower() == "standard":
|
||||
return StandardNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||
elif norm_config_id.lower() == "minmax":
|
||||
return MinMaxNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||
elif norm_config_id.lower() == "quantile":
|
||||
return QuantileNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||
else:
|
||||
raise OperationalException(f"Invalid data normalization identifier '{norm_config_id}'")
|
||||
|
||||
|
||||
class Normalization(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list
|
||||
):
|
||||
self.freqai_config: Dict[str, Any] = config["freqai"]
|
||||
self.data: Dict[str, Any] = meta_data
|
||||
self.pkl_data: Dict[str, Any] = pickle_meta_data
|
||||
self.unique_class_list: list = unique_class_list
|
||||
|
||||
@abstractmethod
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
""""""
|
||||
|
||||
@abstractmethod
|
||||
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||
""""""
|
||||
|
||||
@abstractmethod
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
""""""
|
||||
|
||||
@abstractmethod
|
||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
""""""
|
||||
|
||||
|
||||
class LegacyNormalization(Normalization):
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
|
||||
# standardize the data by training stats
|
||||
train_max = data_dictionary["train_features"].max()
|
||||
train_min = data_dictionary["train_features"].min()
|
||||
data_dictionary["train_features"] = (
|
||||
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
data_dictionary["test_features"] = (
|
||||
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
|
||||
for item in train_max.keys():
|
||||
self.data[item + "_max"] = train_max[item]
|
||||
self.data[item + "_min"] = train_min[item]
|
||||
|
||||
for item in data_dictionary["train_labels"].keys():
|
||||
if data_dictionary["train_labels"][item].dtype == object:
|
||||
continue
|
||||
train_labels_max = data_dictionary["train_labels"][item].max()
|
||||
train_labels_min = data_dictionary["train_labels"][item].min()
|
||||
data_dictionary["train_labels"][item] = (
|
||||
2
|
||||
* (data_dictionary["train_labels"][item] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
data_dictionary["test_labels"][item] = (
|
||||
2
|
||||
* (data_dictionary["test_labels"][item] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
|
||||
self.data[f"{item}_max"] = train_labels_max
|
||||
self.data[f"{item}_min"] = train_labels_min
|
||||
return data_dictionary
|
||||
|
||||
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||
|
||||
train_max = df.max()
|
||||
train_min = df.min()
|
||||
df = (
|
||||
2 * (df - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
|
||||
for item in train_max.keys():
|
||||
self.data[item + "_max"] = train_max[item]
|
||||
self.data[item + "_min"] = train_min[item]
|
||||
|
||||
return df
|
||||
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Normalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
train_max = [None] * len(df.keys())
|
||||
train_min = [None] * len(df.keys())
|
||||
|
||||
for i, item in enumerate(df.keys()):
|
||||
train_max[i] = self.data[f"{item}_max"]
|
||||
train_min[i] = self.data[f"{item}_min"]
|
||||
|
||||
train_max_series = pd.Series(train_max, index=df.keys())
|
||||
train_min_series = pd.Series(train_min, index=df.keys())
|
||||
|
||||
df = (
|
||||
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Denormalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe of predictions to be denormalized
|
||||
"""
|
||||
|
||||
for label in df.columns:
|
||||
if df[label].dtype == object or label in self.unique_class_list:
|
||||
continue
|
||||
df[label] = (
|
||||
(df[label] + 1)
|
||||
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
|
||||
/ 2
|
||||
) + self.data[f"{label}_min"]
|
||||
|
||||
return df
|
||||
|
||||
|
||||
class SKLearnNormalization(Normalization):
|
||||
def __init__(self,
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list,
|
||||
transformer: TransformerType):
|
||||
super().__init__(config, meta_data, pickle_meta_data, unique_class_list)
|
||||
self.transformer = transformer
|
||||
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
|
||||
# standardize the data by training stats
|
||||
for column in data_dictionary["train_features"].columns:
|
||||
scaler = self.transformer()
|
||||
data_dictionary["train_features"][column] = \
|
||||
scaler.fit_transform(data_dictionary["train_features"][[column]])
|
||||
data_dictionary["test_features"][column] = \
|
||||
scaler.transform(data_dictionary["test_features"][[column]])
|
||||
self.pkl_data[column + "_scaler"] = scaler
|
||||
|
||||
for column in data_dictionary["train_labels"].columns:
|
||||
if data_dictionary["train_labels"][column].dtype == object:
|
||||
continue
|
||||
scaler = self.transformer()
|
||||
data_dictionary["train_labels"][column] = \
|
||||
scaler.fit_transform(data_dictionary["train_labels"][[column]])
|
||||
|
||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
data_dictionary["test_labels"][column] = \
|
||||
scaler.transform(data_dictionary["test_labels"][[column]])
|
||||
|
||||
self.pkl_data[column + "_scaler"] = scaler
|
||||
return data_dictionary
|
||||
|
||||
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||
for column in df.columns:
|
||||
scaler = self.transformer()
|
||||
df[column] = scaler.fit_transform(df[[column]])
|
||||
self.pkl_data[column + "_scaler"] = scaler
|
||||
|
||||
return df
|
||||
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Normalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
for column in df.columns:
|
||||
df[column] = self.pkl_data[column + "_scaler"].transform(df[[column]])
|
||||
|
||||
return df
|
||||
|
||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Denormalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe of predictions to be denormalized
|
||||
"""
|
||||
|
||||
for column in df.columns:
|
||||
if df[column].dtype == object or column in self.unique_class_list:
|
||||
continue
|
||||
df[column] = self.pkl_data[column + "_scaler"].inverse_transform(df[[column]])
|
||||
|
||||
return df
|
||||
|
||||
|
||||
class StandardNormalization(SKLearnNormalization):
|
||||
def __init__(self,
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list):
|
||||
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, StandardScaler)
|
||||
|
||||
|
||||
class MinMaxNormalization(SKLearnNormalization):
|
||||
def __init__(self,
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list):
|
||||
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, MinMaxScaler)
|
||||
|
||||
|
||||
class QuantileNormalization(SKLearnNormalization):
|
||||
def __init__(self,
|
||||
config: Config,
|
||||
meta_data: Dict[str, Any],
|
||||
pickle_meta_data: Dict[str, Any],
|
||||
unique_class_list: list):
|
||||
super().__init__(config, meta_data, pickle_meta_data, unique_class_list,
|
||||
QuantileTransformer)
|
||||
|
||||
|
@ -142,7 +142,7 @@ def make_unfiltered_dataframe(mocker, freqai_conf):
|
||||
return freqai, unfiltered_dataframe
|
||||
|
||||
|
||||
def make_data_dictionary(mocker, freqai_conf):
|
||||
def make_data_dictionary(mocker, freqai_conf, normalized=True):
|
||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||
|
||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||
@ -181,6 +181,7 @@ def make_data_dictionary(mocker, freqai_conf):
|
||||
|
||||
data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
|
||||
if normalized:
|
||||
data_dictionary = freqai.dk.normalize_data(data_dictionary)
|
||||
|
||||
return freqai
|
||||
|
107
tests/freqai/test_normalization.py
Normal file
107
tests/freqai/test_normalization.py
Normal file
@ -0,0 +1,107 @@
|
||||
import pytest
|
||||
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.normalization import (LegacyNormalization, MinMaxNormalization,
|
||||
QuantileNormalization, StandardNormalization)
|
||||
from tests.freqai.conftest import make_data_dictionary
|
||||
|
||||
|
||||
def test_default_normalization_is_legacy(mocker, freqai_conf):
|
||||
freqai_1st = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||
freqai_1st.dk.normalize_data(data_dict_1st)
|
||||
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||
|
||||
assert not freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||
freqai_2nd.dk.data_dictionary['train_features']), "raw data is equal to normalized data"
|
||||
|
||||
freqai_2nd.dk.normalize_data(data_dict_2nd)
|
||||
|
||||
assert freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||
freqai_2nd.dk.data_dictionary['train_features']), \
|
||||
"explicit\\implicit legacy normalization mismatch"
|
||||
|
||||
|
||||
def test_legacy_normalization_add_max_min_columns(mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||
freqai = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict = freqai.dk.data_dictionary
|
||||
freqai.dk.normalize_data(data_dict)
|
||||
|
||||
assert any('_max' in entry for entry in freqai.dk.data.keys())
|
||||
assert any('_min' in entry for entry in freqai.dk.data.keys())
|
||||
|
||||
|
||||
def test_standard_normalization_dont_add_max_min_columns(mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "standard"
|
||||
freqai = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict = freqai.dk.data_dictionary
|
||||
freqai.dk.normalize_data(data_dict)
|
||||
assert all(not entry.endswith('_max') for entry in freqai.dk.data.keys())
|
||||
assert all(not entry.endswith('_min') for entry in freqai.dk.data.keys())
|
||||
|
||||
|
||||
def test_legacy_and_standard_normalization_difference(mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||
freqai_1st = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||
freqai_1st.dk.normalize_data(data_dict_1st)
|
||||
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "standard"
|
||||
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||
freqai_2nd.dk.normalize_data(data_dict_2nd)
|
||||
|
||||
assert not freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||
freqai_2nd.dk.data_dictionary['train_features']), \
|
||||
"legacy and standard normalization produce same features"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"config_id, norm_class",
|
||||
[
|
||||
("legacy", LegacyNormalization),
|
||||
("standard", StandardNormalization),
|
||||
("minmax", MinMaxNormalization),
|
||||
("quantile", QuantileNormalization),
|
||||
],
|
||||
)
|
||||
def test_normalization_class(config_id, norm_class, mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = config_id
|
||||
freqai = make_data_dictionary(mocker, freqai_conf)
|
||||
assert type(freqai.dk.normalizer) == norm_class
|
||||
|
||||
|
||||
def test_assertion_invalid_normalization_id(mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "not_a_norm_id"
|
||||
try:
|
||||
make_data_dictionary(mocker, freqai_conf)
|
||||
assert False, "missing expected normalization factory exception"
|
||||
except OperationalException as e_info:
|
||||
assert str(e_info).startswith("Invalid data normalization identifier"), \
|
||||
"unexpected exception string"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"config_id",
|
||||
[
|
||||
"legacy",
|
||||
"standard",
|
||||
"minmax",
|
||||
"quantile",
|
||||
],
|
||||
)
|
||||
def test_denormalization(config_id, mocker, freqai_conf):
|
||||
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = config_id
|
||||
freqai_1st = make_data_dictionary(mocker, freqai_conf)
|
||||
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||
|
||||
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||
|
||||
denorm_labels = freqai_1st.dk.denormalize_labels_from_metadata(
|
||||
data_dict_1st["train_labels"]).round(9)
|
||||
assert denorm_labels.equals(data_dict_2nd['train_labels'].round(9)), \
|
||||
"raw labels data isn't the same as denormalized labels"
|
Loading…
Reference in New Issue
Block a user