stable/freqtrade/freqai/normalization.py
2023-03-30 08:25:51 +03:00

270 lines
10 KiB
Python

from abc import ABC, abstractmethod
from typing import Any, Dict, TypeVar
import pandas as pd
from pandas import DataFrame
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer, StandardScaler
from freqtrade.constants import Config
from freqtrade.exceptions import OperationalException
TransformerType = TypeVar('TransformerType', MinMaxScaler, StandardScaler, QuantileTransformer)
def normalization_factory(
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list):
freqai_config: Dict[str, Any] = config["freqai"]
norm_config_id = freqai_config["feature_parameters"].get("data_normalization", "legacy")
if norm_config_id.lower() == "legacy":
return LegacyNormalization(config, meta_data, pickle_meta_data, unique_class_list)
elif norm_config_id.lower() == "standard":
return StandardNormalization(config, meta_data, pickle_meta_data, unique_class_list)
elif norm_config_id.lower() == "minmax":
return MinMaxNormalization(config, meta_data, pickle_meta_data, unique_class_list)
elif norm_config_id.lower() == "quantile":
return QuantileNormalization(config, meta_data, pickle_meta_data, unique_class_list)
else:
raise OperationalException(f"Invalid data normalization identifier '{norm_config_id}'")
class Normalization(ABC):
def __init__(
self,
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list
):
self.freqai_config: Dict[str, Any] = config["freqai"]
self.data: Dict[str, Any] = meta_data
self.pkl_data: Dict[str, Any] = pickle_meta_data
self.unique_class_list: list = unique_class_list
@abstractmethod
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
""""""
@abstractmethod
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
""""""
@abstractmethod
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
""""""
@abstractmethod
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
""""""
class LegacyNormalization(Normalization):
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Normalize all data in the data_dictionary according to the training dataset
:param data_dictionary: dictionary containing the cleaned and
split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
# standardize the data by training stats
train_max = data_dictionary["train_features"].max()
train_min = data_dictionary["train_features"].min()
data_dictionary["train_features"] = (
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
)
data_dictionary["test_features"] = (
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
)
for item in train_max.keys():
self.data[item + "_max"] = train_max[item]
self.data[item + "_min"] = train_min[item]
for item in data_dictionary["train_labels"].keys():
if data_dictionary["train_labels"][item].dtype == object:
continue
train_labels_max = data_dictionary["train_labels"][item].max()
train_labels_min = data_dictionary["train_labels"][item].min()
data_dictionary["train_labels"][item] = (
2
* (data_dictionary["train_labels"][item] - train_labels_min)
/ (train_labels_max - train_labels_min)
- 1
)
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
data_dictionary["test_labels"][item] = (
2
* (data_dictionary["test_labels"][item] - train_labels_min)
/ (train_labels_max - train_labels_min)
- 1
)
self.data[f"{item}_max"] = train_labels_max
self.data[f"{item}_min"] = train_labels_min
return data_dictionary
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
train_max = df.max()
train_min = df.min()
df = (
2 * (df - train_min) / (train_max - train_min) - 1
)
for item in train_max.keys():
self.data[item + "_max"] = train_max[item]
self.data[item + "_min"] = train_min[item]
return df
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe to be standardized
"""
train_max = [None] * len(df.keys())
train_min = [None] * len(df.keys())
for i, item in enumerate(df.keys()):
train_max[i] = self.data[f"{item}_max"]
train_min[i] = self.data[f"{item}_min"]
train_max_series = pd.Series(train_max, index=df.keys())
train_min_series = pd.Series(train_min, index=df.keys())
df = (
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
)
return df
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Denormalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe of predictions to be denormalized
"""
for label in df.columns:
if df[label].dtype == object or label in self.unique_class_list:
continue
df[label] = (
(df[label] + 1)
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
/ 2
) + self.data[f"{label}_min"]
return df
class SKLearnNormalization(Normalization):
def __init__(self,
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list,
transformer: TransformerType):
super().__init__(config, meta_data, pickle_meta_data, unique_class_list)
self.transformer = transformer
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Normalize all data in the data_dictionary according to the training dataset
:param data_dictionary: dictionary containing the cleaned and
split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
# standardize the data by training stats
for column in data_dictionary["train_features"].columns:
scaler = self.transformer()
data_dictionary["train_features"][column] = \
scaler.fit_transform(data_dictionary["train_features"][[column]])
data_dictionary["test_features"][column] = \
scaler.transform(data_dictionary["test_features"][[column]])
self.pkl_data[column + "_scaler"] = scaler
for column in data_dictionary["train_labels"].columns:
if data_dictionary["train_labels"][column].dtype == object:
continue
scaler = self.transformer()
data_dictionary["train_labels"][column] = \
scaler.fit_transform(data_dictionary["train_labels"][[column]])
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
data_dictionary["test_labels"][column] = \
scaler.transform(data_dictionary["test_labels"][[column]])
self.pkl_data[column + "_scaler"] = scaler
return data_dictionary
def normalize_single_dataframe(self, df: DataFrame) -> DataFrame:
for column in df.columns:
scaler = self.transformer()
df[column] = scaler.fit_transform(df[[column]])
self.pkl_data[column + "_scaler"] = scaler
return df
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe to be standardized
"""
for column in df.columns:
df[column] = self.pkl_data[column + "_scaler"].transform(df[[column]])
return df
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Denormalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe of predictions to be denormalized
"""
for column in df.columns:
if df[column].dtype == object or column in self.unique_class_list:
continue
df[column] = self.pkl_data[column + "_scaler"].inverse_transform(df[[column]])
return df
class StandardNormalization(SKLearnNormalization):
def __init__(self,
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list):
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, StandardScaler)
class MinMaxNormalization(SKLearnNormalization):
def __init__(self,
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list):
super().__init__(config, meta_data, pickle_meta_data, unique_class_list, MinMaxScaler)
class QuantileNormalization(SKLearnNormalization):
def __init__(self,
config: Config,
meta_data: Dict[str, Any],
pickle_meta_data: Dict[str, Any],
unique_class_list: list):
super().__init__(config, meta_data, pickle_meta_data, unique_class_list,
QuantileTransformer)