move prediction denormalization into datakitchen. remove duplicate associated code. avoid normalization/denormalization for string dtypes.
This commit is contained in:
parent
81c1aa3c13
commit
f3d46613ee
@ -356,7 +356,7 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
return self.data_dictionary
|
return self.data_dictionary
|
||||||
|
|
||||||
def normalize_data(self, data_dictionary: Dict, do_labels: bool = True) -> Dict[Any, Any]:
|
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||||
"""
|
"""
|
||||||
Normalize all data in the data_dictionary according to the training dataset
|
Normalize all data in the data_dictionary according to the training dataset
|
||||||
:params:
|
:params:
|
||||||
@ -378,24 +378,26 @@ class FreqaiDataKitchen:
|
|||||||
self.data[item + "_max"] = train_max[item]
|
self.data[item + "_max"] = train_max[item]
|
||||||
self.data[item + "_min"] = train_min[item]
|
self.data[item + "_min"] = train_min[item]
|
||||||
|
|
||||||
if do_labels:
|
for item in data_dictionary["train_labels"].keys():
|
||||||
train_labels_max = data_dictionary["train_labels"].max()
|
if data_dictionary["train_labels"][item].dtype == str:
|
||||||
train_labels_min = data_dictionary["train_labels"].min()
|
continue
|
||||||
|
train_labels_max = data_dictionary["train_labels"][item].max()
|
||||||
|
train_labels_min = data_dictionary["train_labels"][item].min()
|
||||||
data_dictionary["train_labels"] = (
|
data_dictionary["train_labels"] = (
|
||||||
2
|
2
|
||||||
* (data_dictionary["train_labels"] - train_labels_min)
|
* (data_dictionary["train_labels"][item] - train_labels_min)
|
||||||
/ (train_labels_max - train_labels_min)
|
/ (train_labels_max - train_labels_min)
|
||||||
- 1
|
- 1
|
||||||
)
|
)
|
||||||
data_dictionary["test_labels"] = (
|
data_dictionary["test_labels"][item] = (
|
||||||
2
|
2
|
||||||
* (data_dictionary["test_labels"] - train_labels_min)
|
* (data_dictionary["test_labels"] - train_labels_min)
|
||||||
/ (train_labels_max - train_labels_min)
|
/ (train_labels_max - train_labels_min)
|
||||||
- 1
|
- 1
|
||||||
)
|
)
|
||||||
|
|
||||||
self.data["labels_max"] = train_labels_max.to_dict()
|
self.data[f"{item}_max"] = train_labels_max # .to_dict()
|
||||||
self.data["labels_min"] = train_labels_min.to_dict()
|
self.data[f"{item}_min"] = train_labels_min # .to_dict()
|
||||||
|
|
||||||
return data_dictionary
|
return data_dictionary
|
||||||
|
|
||||||
@ -417,6 +419,25 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Normalize a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:params:
|
||||||
|
:df: Dataframe of predictions to be denormalized
|
||||||
|
"""
|
||||||
|
|
||||||
|
for label in self.label_list:
|
||||||
|
if df[label].dtype == str:
|
||||||
|
continue
|
||||||
|
df[label] = (
|
||||||
|
(df[label] + 1)
|
||||||
|
* (self.data[f"{label}_max"] - self.data[f"{label}_min"])
|
||||||
|
/ 2
|
||||||
|
) + self.data[f"{label}_min"]
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
def split_timerange(
|
def split_timerange(
|
||||||
self, tr: str, train_split: int = 28, bt_split: int = 7
|
self, tr: str, train_split: int = 28, bt_split: int = 7
|
||||||
) -> Tuple[list, list]:
|
) -> Tuple[list, list]:
|
||||||
|
@ -501,12 +501,8 @@ class IFreqaiModel(ABC):
|
|||||||
) -> None:
|
) -> None:
|
||||||
trained_predictions = model.predict(df)
|
trained_predictions = model.predict(df)
|
||||||
pred_df = DataFrame(trained_predictions, columns=dk.label_list)
|
pred_df = DataFrame(trained_predictions, columns=dk.label_list)
|
||||||
for label in dk.label_list:
|
|
||||||
pred_df[label] = (
|
pred_df = dk.denormalize_labels_from_metadata(pred_df)
|
||||||
(pred_df[label] + 1)
|
|
||||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
|
||||||
/ 2
|
|
||||||
) + dk.data["labels_min"][label]
|
|
||||||
|
|
||||||
self.dd.historic_predictions[pair] = pd.DataFrame()
|
self.dd.historic_predictions[pair] = pd.DataFrame()
|
||||||
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
|
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
|
||||||
|
@ -107,11 +107,6 @@ class BaseRegressionModel(IFreqaiModel):
|
|||||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||||
|
|
||||||
for label in dk.label_list:
|
pred_df = dk.denormalize_labels_from_metadata(pred_df)
|
||||||
pred_df[label] = (
|
|
||||||
(pred_df[label] + 1)
|
|
||||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
|
||||||
/ 2
|
|
||||||
) + dk.data["labels_min"][label]
|
|
||||||
|
|
||||||
return (pred_df, dk.do_predict)
|
return (pred_df, dk.do_predict)
|
||||||
|
Loading…
Reference in New Issue
Block a user