Add additional data normalization methods to freqai module, including StandardScaler, MinMaxScaler, and QuantileTransformer. Add support for pickle metadata, normalization_factory, and unit tests.
This commit is contained in:
parent
8a49d62068
commit
4aa9284737
@ -73,7 +73,8 @@
|
|||||||
10,
|
10,
|
||||||
20
|
20
|
||||||
],
|
],
|
||||||
"plot_feature_importances": 0
|
"plot_feature_importances": 0,
|
||||||
|
"data_normalization": "legacy"
|
||||||
},
|
},
|
||||||
"data_split_parameters": {
|
"data_split_parameters": {
|
||||||
"test_size": 0.33,
|
"test_size": 0.33,
|
||||||
|
@ -427,6 +427,9 @@ class FreqaiDataDrawer:
|
|||||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||||
|
|
||||||
|
with (save_path / f"{dk.model_filename}_metadata.pkl").open("wb") as fp:
|
||||||
|
cloudpickle.dump(dk.pkl_data, fp)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||||
@ -456,10 +459,14 @@ class FreqaiDataDrawer:
|
|||||||
dk.data["model_filename"] = str(dk.model_filename)
|
dk.data["model_filename"] = str(dk.model_filename)
|
||||||
dk.data["training_features_list"] = dk.training_features_list
|
dk.data["training_features_list"] = dk.training_features_list
|
||||||
dk.data["label_list"] = dk.label_list
|
dk.data["label_list"] = dk.label_list
|
||||||
# store the metadata
|
# store the json metadata
|
||||||
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
with (save_path / f"{dk.model_filename}_metadata.json").open("w") as fp:
|
||||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||||
|
|
||||||
|
# store the pickle metadata
|
||||||
|
with (save_path / f"{dk.model_filename}_metadata.pkl").open("wb") as fp:
|
||||||
|
cloudpickle.dump(dk.pkl_data, fp)
|
||||||
|
|
||||||
# save the train data to file so we can check preds for area of applicability later
|
# save the train data to file so we can check preds for area of applicability later
|
||||||
dk.data_dictionary["train_features"].to_pickle(
|
dk.data_dictionary["train_features"].to_pickle(
|
||||||
save_path / f"{dk.model_filename}_trained_df.pkl"
|
save_path / f"{dk.model_filename}_trained_df.pkl"
|
||||||
@ -486,6 +493,16 @@ class FreqaiDataDrawer:
|
|||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def load_pickle_metadata(self, dk: FreqaiDataKitchen):
|
||||||
|
pickle_file_path = dk.data_path / f"{dk.model_filename}_metadata.pkl"
|
||||||
|
exists = pickle_file_path.is_file()
|
||||||
|
# Check if the metadata pickle file exists before attempting to read it.
|
||||||
|
# This is for backward compatibility with models generated before the
|
||||||
|
# pickle metadata feature was implemented.
|
||||||
|
if exists:
|
||||||
|
with (dk.data_path / f"{dk.model_filename}_metadata.pkl").open("rb") as fp:
|
||||||
|
dk.pkl_data = cloudpickle.load(fp)
|
||||||
|
|
||||||
def load_metadata(self, dk: FreqaiDataKitchen) -> None:
|
def load_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||||
"""
|
"""
|
||||||
Load only metadata into datakitchen to increase performance during
|
Load only metadata into datakitchen to increase performance during
|
||||||
@ -496,6 +513,8 @@ class FreqaiDataDrawer:
|
|||||||
dk.training_features_list = dk.data["training_features_list"]
|
dk.training_features_list = dk.data["training_features_list"]
|
||||||
dk.label_list = dk.data["label_list"]
|
dk.label_list = dk.data["label_list"]
|
||||||
|
|
||||||
|
self.load_pickle_metadata(dk)
|
||||||
|
|
||||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
||||||
"""
|
"""
|
||||||
loads all data required to make a prediction on a sub-train time range
|
loads all data required to make a prediction on a sub-train time range
|
||||||
@ -517,6 +536,8 @@ class FreqaiDataDrawer:
|
|||||||
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
|
with (dk.data_path / f"{dk.model_filename}_metadata.json").open("r") as fp:
|
||||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||||
|
|
||||||
|
self.load_pickle_metadata(dk)
|
||||||
|
|
||||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
||||||
)
|
)
|
||||||
|
@ -25,6 +25,7 @@ from freqtrade.constants import Config
|
|||||||
from freqtrade.data.converter import reduce_dataframe_footprint
|
from freqtrade.data.converter import reduce_dataframe_footprint
|
||||||
from freqtrade.exceptions import OperationalException
|
from freqtrade.exceptions import OperationalException
|
||||||
from freqtrade.exchange import timeframe_to_seconds
|
from freqtrade.exchange import timeframe_to_seconds
|
||||||
|
from freqtrade.freqai.normalization import Normalization, normalization_factory
|
||||||
from freqtrade.strategy import merge_informative_pair
|
from freqtrade.strategy import merge_informative_pair
|
||||||
from freqtrade.strategy.interface import IStrategy
|
from freqtrade.strategy.interface import IStrategy
|
||||||
|
|
||||||
@ -68,6 +69,7 @@ class FreqaiDataKitchen:
|
|||||||
pair: str = "",
|
pair: str = "",
|
||||||
):
|
):
|
||||||
self.data: Dict[str, Any] = {}
|
self.data: Dict[str, Any] = {}
|
||||||
|
self.pkl_data: Dict[str, Any] = {}
|
||||||
self.data_dictionary: Dict[str, DataFrame] = {}
|
self.data_dictionary: Dict[str, DataFrame] = {}
|
||||||
self.config = config
|
self.config = config
|
||||||
self.freqai_config: Dict[str, Any] = config["freqai"]
|
self.freqai_config: Dict[str, Any] = config["freqai"]
|
||||||
@ -109,6 +111,8 @@ class FreqaiDataKitchen:
|
|||||||
self.unique_classes: Dict[str, list] = {}
|
self.unique_classes: Dict[str, list] = {}
|
||||||
self.unique_class_list: list = []
|
self.unique_class_list: list = []
|
||||||
self.backtest_live_models_data: Dict[str, Any] = {}
|
self.backtest_live_models_data: Dict[str, Any] = {}
|
||||||
|
self.normalizer: Normalization = normalization_factory(config, self.data, self.pkl_data,
|
||||||
|
self.unique_class_list)
|
||||||
|
|
||||||
def set_paths(
|
def set_paths(
|
||||||
self,
|
self,
|
||||||
@ -308,105 +312,16 @@ class FreqaiDataKitchen:
|
|||||||
return self.data_dictionary
|
return self.data_dictionary
|
||||||
|
|
||||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||||
"""
|
return self.normalizer.normalize_data(data_dictionary)
|
||||||
Normalize all data in the data_dictionary according to the training dataset
|
|
||||||
:param data_dictionary: dictionary containing the cleaned and
|
|
||||||
split training/test data/labels
|
|
||||||
:returns:
|
|
||||||
:data_dictionary: updated dictionary with standardized values.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# standardize the data by training stats
|
|
||||||
train_max = data_dictionary["train_features"].max()
|
|
||||||
train_min = data_dictionary["train_features"].min()
|
|
||||||
data_dictionary["train_features"] = (
|
|
||||||
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
|
|
||||||
)
|
|
||||||
data_dictionary["test_features"] = (
|
|
||||||
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
for item in train_max.keys():
|
|
||||||
self.data[item + "_max"] = train_max[item]
|
|
||||||
self.data[item + "_min"] = train_min[item]
|
|
||||||
|
|
||||||
for item in data_dictionary["train_labels"].keys():
|
|
||||||
if data_dictionary["train_labels"][item].dtype == object:
|
|
||||||
continue
|
|
||||||
train_labels_max = data_dictionary["train_labels"][item].max()
|
|
||||||
train_labels_min = data_dictionary["train_labels"][item].min()
|
|
||||||
data_dictionary["train_labels"][item] = (
|
|
||||||
2
|
|
||||||
* (data_dictionary["train_labels"][item] - train_labels_min)
|
|
||||||
/ (train_labels_max - train_labels_min)
|
|
||||||
- 1
|
|
||||||
)
|
|
||||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
|
||||||
data_dictionary["test_labels"][item] = (
|
|
||||||
2
|
|
||||||
* (data_dictionary["test_labels"][item] - train_labels_min)
|
|
||||||
/ (train_labels_max - train_labels_min)
|
|
||||||
- 1
|
|
||||||
)
|
|
||||||
|
|
||||||
self.data[f"{item}_max"] = train_labels_max
|
|
||||||
self.data[f"{item}_min"] = train_labels_min
|
|
||||||
return data_dictionary
|
|
||||||
|
|
||||||
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||||
|
return self.normalizer.normalize_single_dataframe(df)
|
||||||
train_max = df.max()
|
|
||||||
train_min = df.min()
|
|
||||||
df = (
|
|
||||||
2 * (df - train_min) / (train_max - train_min) - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
for item in train_max.keys():
|
|
||||||
self.data[item + "_max"] = train_max[item]
|
|
||||||
self.data[item + "_min"] = train_min[item]
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
"""
|
return self.normalizer.normalize_data_from_metadata(df)
|
||||||
Normalize a set of data using the mean and standard deviation from
|
|
||||||
the associated training data.
|
|
||||||
:param df: Dataframe to be standardized
|
|
||||||
"""
|
|
||||||
|
|
||||||
train_max = [None] * len(df.keys())
|
|
||||||
train_min = [None] * len(df.keys())
|
|
||||||
|
|
||||||
for i, item in enumerate(df.keys()):
|
|
||||||
train_max[i] = self.data[f"{item}_max"]
|
|
||||||
train_min[i] = self.data[f"{item}_min"]
|
|
||||||
|
|
||||||
train_max_series = pd.Series(train_max, index=df.keys())
|
|
||||||
train_min_series = pd.Series(train_min, index=df.keys())
|
|
||||||
|
|
||||||
df = (
|
|
||||||
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
"""
|
return self.normalizer.denormalize_labels_from_metadata(df)
|
||||||
Denormalize a set of data using the mean and standard deviation from
|
|
||||||
the associated training data.
|
|
||||||
:param df: Dataframe of predictions to be denormalized
|
|
||||||
"""
|
|
||||||
|
|
||||||
for label in df.columns:
|
|
||||||
if df[label].dtype == object or label in self.unique_class_list:
|
|
||||||
continue
|
|
||||||
df[label] = (
|
|
||||||
(df[label] + 1)
|
|
||||||
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
|
|
||||||
/ 2
|
|
||||||
) + self.data[f"{label}_min"]
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
def split_timerange(
|
def split_timerange(
|
||||||
self, tr: str, train_split: int = 28, bt_split: float = 7
|
self, tr: str, train_split: int = 28, bt_split: float = 7
|
||||||
@ -524,7 +439,7 @@ class FreqaiDataKitchen:
|
|||||||
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
||||||
index=self.data_dictionary["train_features"].index,
|
index=self.data_dictionary["train_features"].index,
|
||||||
)
|
)
|
||||||
# normalsing transformed training features
|
# normalizing transformed training features
|
||||||
self.data_dictionary["train_features"] = self.normalize_single_dataframe(
|
self.data_dictionary["train_features"] = self.normalize_single_dataframe(
|
||||||
self.data_dictionary["train_features"])
|
self.data_dictionary["train_features"])
|
||||||
|
|
||||||
|
272
freqtrade/freqai/normalization.py
Normal file
272
freqtrade/freqai/normalization.py
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Dict, TypeVar
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer, StandardScaler
|
||||||
|
|
||||||
|
from freqtrade.constants import Config
|
||||||
|
from freqtrade.exceptions import OperationalException
|
||||||
|
|
||||||
|
|
||||||
|
TransformerType = TypeVar('TransformerType', MinMaxScaler, StandardScaler, QuantileTransformer)
|
||||||
|
|
||||||
|
|
||||||
|
def normalization_factory(
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list
|
||||||
|
):
|
||||||
|
freqai_config: Dict[str, Any] = config["freqai"]
|
||||||
|
norm_config_id = freqai_config["feature_parameters"].get("data_normalization", "legacy")
|
||||||
|
if norm_config_id.lower() == "legacy":
|
||||||
|
return LegacyNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||||
|
elif norm_config_id.lower() == "standard":
|
||||||
|
return StandardNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||||
|
elif norm_config_id.lower() == "minmax":
|
||||||
|
return MinMaxNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||||
|
elif norm_config_id.lower() == "quantile":
|
||||||
|
return QuantileNormalization(config, meta_data, pickle_meta_data, unique_class_list)
|
||||||
|
else:
|
||||||
|
raise OperationalException(f"Invalid data normalization identifier '{norm_config_id}'")
|
||||||
|
|
||||||
|
|
||||||
|
class Normalization(ABC):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list
|
||||||
|
):
|
||||||
|
self.freqai_config: Dict[str, Any] = config["freqai"]
|
||||||
|
self.data: Dict[str, Any] = meta_data
|
||||||
|
self.pkl_data: Dict[str, Any] = pickle_meta_data
|
||||||
|
self.unique_class_list: list = unique_class_list
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||||
|
""""""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||||
|
""""""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
""""""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
""""""
|
||||||
|
|
||||||
|
|
||||||
|
class LegacyNormalization(Normalization):
|
||||||
|
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||||
|
"""
|
||||||
|
Normalize all data in the data_dictionary according to the training dataset
|
||||||
|
:param data_dictionary: dictionary containing the cleaned and
|
||||||
|
split training/test data/labels
|
||||||
|
:returns:
|
||||||
|
:data_dictionary: updated dictionary with standardized values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# standardize the data by training stats
|
||||||
|
train_max = data_dictionary["train_features"].max()
|
||||||
|
train_min = data_dictionary["train_features"].min()
|
||||||
|
data_dictionary["train_features"] = (
|
||||||
|
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
|
||||||
|
)
|
||||||
|
data_dictionary["test_features"] = (
|
||||||
|
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in train_max.keys():
|
||||||
|
self.data[item + "_max"] = train_max[item]
|
||||||
|
self.data[item + "_min"] = train_min[item]
|
||||||
|
|
||||||
|
for item in data_dictionary["train_labels"].keys():
|
||||||
|
if data_dictionary["train_labels"][item].dtype == object:
|
||||||
|
continue
|
||||||
|
train_labels_max = data_dictionary["train_labels"][item].max()
|
||||||
|
train_labels_min = data_dictionary["train_labels"][item].min()
|
||||||
|
data_dictionary["train_labels"][item] = (
|
||||||
|
2
|
||||||
|
* (data_dictionary["train_labels"][item] - train_labels_min)
|
||||||
|
/ (train_labels_max - train_labels_min)
|
||||||
|
- 1
|
||||||
|
)
|
||||||
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
|
data_dictionary["test_labels"][item] = (
|
||||||
|
2
|
||||||
|
* (data_dictionary["test_labels"][item] - train_labels_min)
|
||||||
|
/ (train_labels_max - train_labels_min)
|
||||||
|
- 1
|
||||||
|
)
|
||||||
|
|
||||||
|
self.data[f"{item}_max"] = train_labels_max
|
||||||
|
self.data[f"{item}_min"] = train_labels_min
|
||||||
|
return data_dictionary
|
||||||
|
|
||||||
|
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||||
|
|
||||||
|
train_max = df.max()
|
||||||
|
train_min = df.min()
|
||||||
|
df = (
|
||||||
|
2 * (df - train_min) / (train_max - train_min) - 1
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in train_max.keys():
|
||||||
|
self.data[item + "_max"] = train_max[item]
|
||||||
|
self.data[item + "_min"] = train_min[item]
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Normalize a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:param df: Dataframe to be standardized
|
||||||
|
"""
|
||||||
|
|
||||||
|
train_max = [None] * len(df.keys())
|
||||||
|
train_min = [None] * len(df.keys())
|
||||||
|
|
||||||
|
for i, item in enumerate(df.keys()):
|
||||||
|
train_max[i] = self.data[f"{item}_max"]
|
||||||
|
train_min[i] = self.data[f"{item}_min"]
|
||||||
|
|
||||||
|
train_max_series = pd.Series(train_max, index=df.keys())
|
||||||
|
train_min_series = pd.Series(train_min, index=df.keys())
|
||||||
|
|
||||||
|
df = (
|
||||||
|
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
||||||
|
)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Denormalize a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:param df: Dataframe of predictions to be denormalized
|
||||||
|
"""
|
||||||
|
|
||||||
|
for label in df.columns:
|
||||||
|
if df[label].dtype == object or label in self.unique_class_list:
|
||||||
|
continue
|
||||||
|
df[label] = (
|
||||||
|
(df[label] + 1)
|
||||||
|
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
|
||||||
|
/ 2
|
||||||
|
) + self.data[f"{label}_min"]
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
class SKLearnNormalization(Normalization):
|
||||||
|
def __init__(self,
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list,
|
||||||
|
transformer: TransformerType):
|
||||||
|
super().__init__(config, meta_data, pickle_meta_data, unique_class_list)
|
||||||
|
self.transformer = transformer
|
||||||
|
|
||||||
|
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||||
|
"""
|
||||||
|
Normalize all data in the data_dictionary according to the training dataset
|
||||||
|
:param data_dictionary: dictionary containing the cleaned and
|
||||||
|
split training/test data/labels
|
||||||
|
:returns:
|
||||||
|
:data_dictionary: updated dictionary with standardized values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# standardize the data by training stats
|
||||||
|
for column in data_dictionary["train_features"].columns:
|
||||||
|
scaler = self.transformer()
|
||||||
|
data_dictionary["train_features"][column] = \
|
||||||
|
scaler.fit_transform(data_dictionary["train_features"][[column]])
|
||||||
|
data_dictionary["test_features"][column] = \
|
||||||
|
scaler.transform(data_dictionary["test_features"][[column]])
|
||||||
|
self.pkl_data[column + "_scaler"] = scaler
|
||||||
|
|
||||||
|
for column in data_dictionary["train_labels"].columns:
|
||||||
|
if data_dictionary["train_labels"][column].dtype == object:
|
||||||
|
continue
|
||||||
|
scaler = self.transformer()
|
||||||
|
data_dictionary["train_labels"][column] = \
|
||||||
|
scaler.fit_transform(data_dictionary["train_labels"][[column]])
|
||||||
|
|
||||||
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
|
data_dictionary["test_labels"][column] = \
|
||||||
|
scaler.transform(data_dictionary["test_labels"][[column]])
|
||||||
|
|
||||||
|
self.pkl_data[column + "_scaler"] = scaler
|
||||||
|
return data_dictionary
|
||||||
|
|
||||||
|
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
|
||||||
|
for column in df.columns:
|
||||||
|
scaler = self.transformer()
|
||||||
|
df[column] = scaler.fit_transform(df[[column]])
|
||||||
|
self.pkl_data[column + "_scaler"] = scaler
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Normalize a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:param df: Dataframe to be standardized
|
||||||
|
"""
|
||||||
|
|
||||||
|
for column in df.columns:
|
||||||
|
df[column] = self.pkl_data[column + "_scaler"].transform(df[[column]])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Denormalize a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:param df: Dataframe of predictions to be denormalized
|
||||||
|
"""
|
||||||
|
|
||||||
|
for column in df.columns:
|
||||||
|
if df[column].dtype == object or column in self.unique_class_list:
|
||||||
|
continue
|
||||||
|
df[column] = self.pkl_data[column + "_scaler"].inverse_transform(df[[column]])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
class StandardNormalization(SKLearnNormalization):
|
||||||
|
def __init__(self,
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list):
|
||||||
|
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, StandardScaler)
|
||||||
|
|
||||||
|
|
||||||
|
class MinMaxNormalization(SKLearnNormalization):
|
||||||
|
def __init__(self,
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list):
|
||||||
|
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, MinMaxScaler)
|
||||||
|
|
||||||
|
|
||||||
|
class QuantileNormalization(SKLearnNormalization):
|
||||||
|
def __init__(self,
|
||||||
|
config: Config,
|
||||||
|
meta_data: Dict[str, Any],
|
||||||
|
pickle_meta_data: Dict[str, Any],
|
||||||
|
unique_class_list: list):
|
||||||
|
super().__init__(config, meta_data, pickle_meta_data, unique_class_list,
|
||||||
|
QuantileTransformer)
|
||||||
|
|
||||||
|
|
@ -142,7 +142,7 @@ def make_unfiltered_dataframe(mocker, freqai_conf):
|
|||||||
return freqai, unfiltered_dataframe
|
return freqai, unfiltered_dataframe
|
||||||
|
|
||||||
|
|
||||||
def make_data_dictionary(mocker, freqai_conf):
|
def make_data_dictionary(mocker, freqai_conf, normalized=True):
|
||||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||||
|
|
||||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||||
@ -181,7 +181,8 @@ def make_data_dictionary(mocker, freqai_conf):
|
|||||||
|
|
||||||
data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
|
data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||||
|
|
||||||
data_dictionary = freqai.dk.normalize_data(data_dictionary)
|
if normalized:
|
||||||
|
data_dictionary = freqai.dk.normalize_data(data_dictionary)
|
||||||
|
|
||||||
return freqai
|
return freqai
|
||||||
|
|
||||||
|
107
tests/freqai/test_normalization.py
Normal file
107
tests/freqai/test_normalization.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from freqtrade.exceptions import OperationalException
|
||||||
|
from freqtrade.freqai.normalization import (LegacyNormalization, MinMaxNormalization,
|
||||||
|
QuantileNormalization, StandardNormalization)
|
||||||
|
from tests.freqai.conftest import make_data_dictionary
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_normalization_is_legacy(mocker, freqai_conf):
|
||||||
|
freqai_1st = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||||
|
freqai_1st.dk.normalize_data(data_dict_1st)
|
||||||
|
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||||
|
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||||
|
|
||||||
|
assert not freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||||
|
freqai_2nd.dk.data_dictionary['train_features']), "raw data is equal to normalized data"
|
||||||
|
|
||||||
|
freqai_2nd.dk.normalize_data(data_dict_2nd)
|
||||||
|
|
||||||
|
assert freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||||
|
freqai_2nd.dk.data_dictionary['train_features']), \
|
||||||
|
"explicit\\implicit legacy normalization mismatch"
|
||||||
|
|
||||||
|
|
||||||
|
def test_legacy_normalization_add_max_min_columns(mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||||
|
freqai = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict = freqai.dk.data_dictionary
|
||||||
|
freqai.dk.normalize_data(data_dict)
|
||||||
|
|
||||||
|
assert any('_max' in entry for entry in freqai.dk.data.keys())
|
||||||
|
assert any('_min' in entry for entry in freqai.dk.data.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def test_standard_normalization_dont_add_max_min_columns(mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "standard"
|
||||||
|
freqai = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict = freqai.dk.data_dictionary
|
||||||
|
freqai.dk.normalize_data(data_dict)
|
||||||
|
assert all(not entry.endswith('_max') for entry in freqai.dk.data.keys())
|
||||||
|
assert all(not entry.endswith('_min') for entry in freqai.dk.data.keys())
|
||||||
|
|
||||||
|
|
||||||
|
def test_legacy_and_standard_normalization_difference(mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "legacy"
|
||||||
|
freqai_1st = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||||
|
freqai_1st.dk.normalize_data(data_dict_1st)
|
||||||
|
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "standard"
|
||||||
|
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||||
|
freqai_2nd.dk.normalize_data(data_dict_2nd)
|
||||||
|
|
||||||
|
assert not freqai_1st.dk.data_dictionary['train_features'].equals(
|
||||||
|
freqai_2nd.dk.data_dictionary['train_features']), \
|
||||||
|
"legacy and standard normalization produce same features"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"config_id, norm_class",
|
||||||
|
[
|
||||||
|
("legacy", LegacyNormalization),
|
||||||
|
("standard", StandardNormalization),
|
||||||
|
("minmax", MinMaxNormalization),
|
||||||
|
("quantile", QuantileNormalization),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_normalization_class(config_id, norm_class, mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = config_id
|
||||||
|
freqai = make_data_dictionary(mocker, freqai_conf)
|
||||||
|
assert type(freqai.dk.normalizer) == norm_class
|
||||||
|
|
||||||
|
|
||||||
|
def test_assertion_invalid_normalization_id(mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = "not_a_norm_id"
|
||||||
|
try:
|
||||||
|
make_data_dictionary(mocker, freqai_conf)
|
||||||
|
assert False, "missing expected normalization factory exception"
|
||||||
|
except OperationalException as e_info:
|
||||||
|
assert str(e_info).startswith("Invalid data normalization identifier"), \
|
||||||
|
"unexpected exception string"
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"config_id",
|
||||||
|
[
|
||||||
|
"legacy",
|
||||||
|
"standard",
|
||||||
|
"minmax",
|
||||||
|
"quantile",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_denormalization(config_id, mocker, freqai_conf):
|
||||||
|
freqai_conf["freqai"]["feature_parameters"]["data_normalization"] = config_id
|
||||||
|
freqai_1st = make_data_dictionary(mocker, freqai_conf)
|
||||||
|
data_dict_1st = freqai_1st.dk.data_dictionary
|
||||||
|
|
||||||
|
freqai_2nd = make_data_dictionary(mocker, freqai_conf, normalized=False)
|
||||||
|
data_dict_2nd = freqai_2nd.dk.data_dictionary
|
||||||
|
|
||||||
|
denorm_labels = freqai_1st.dk.denormalize_labels_from_metadata(
|
||||||
|
data_dict_1st["train_labels"]).round(9)
|
||||||
|
assert denorm_labels.equals(data_dict_2nd['train_labels'].round(9)), \
|
||||||
|
"raw labels data isn't the same as denormalized labels"
|
Loading…
Reference in New Issue
Block a user