fix bug for target_mean/std array merging in backtesting
This commit is contained in:
@@ -141,9 +141,9 @@ class FreqaiDataKitchen:
|
||||
:model: User trained model which can be inferenced for new predictions
|
||||
"""
|
||||
|
||||
# if self.live:
|
||||
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
|
||||
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
|
||||
if self.live:
|
||||
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
|
||||
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
|
||||
|
||||
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
|
||||
self.data = json.load(fp)
|
||||
@@ -329,42 +329,6 @@ class FreqaiDataKitchen:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
# standardize the data by training stats
|
||||
train_mean = data_dictionary["train_features"].mean()
|
||||
train_std = data_dictionary["train_features"].std()
|
||||
data_dictionary["train_features"] = (
|
||||
data_dictionary["train_features"] - train_mean
|
||||
) / train_std
|
||||
data_dictionary["test_features"] = (
|
||||
data_dictionary["test_features"] - train_mean
|
||||
) / train_std
|
||||
|
||||
train_labels_std = data_dictionary["train_labels"].std()
|
||||
train_labels_mean = data_dictionary["train_labels"].mean()
|
||||
data_dictionary["train_labels"] = (
|
||||
data_dictionary["train_labels"] - train_labels_mean
|
||||
) / train_labels_std
|
||||
data_dictionary["test_labels"] = (
|
||||
data_dictionary["test_labels"] - train_labels_mean
|
||||
) / train_labels_std
|
||||
|
||||
for item in train_std.keys():
|
||||
self.data[item + "_std"] = train_std[item]
|
||||
self.data[item + "_mean"] = train_mean[item]
|
||||
|
||||
self.data["labels_std"] = train_labels_std
|
||||
self.data["labels_mean"] = train_labels_mean
|
||||
|
||||
return data_dictionary
|
||||
|
||||
def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Standardize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
# standardize the data by training stats
|
||||
train_max = data_dictionary["train_features"].max()
|
||||
train_min = data_dictionary["train_features"].min()
|
||||
data_dictionary["train_features"] = 2 * (
|
||||
@@ -392,9 +356,9 @@ class FreqaiDataKitchen:
|
||||
|
||||
return data_dictionary
|
||||
|
||||
def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Standardizes a set of data using the mean and standard deviation from
|
||||
Normalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:params:
|
||||
:df: Dataframe to be standardized
|
||||
@@ -406,19 +370,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Normalizes a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:params:
|
||||
:df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
for item in df.keys():
|
||||
df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
|
||||
|
||||
return df
|
||||
|
||||
def split_timerange(
|
||||
self, tr: str, train_split: int = 28, bt_split: int = 7
|
||||
) -> Tuple[list, list]:
|
||||
@@ -657,12 +608,12 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
|
||||
ones = np.ones(len_dataframe)
|
||||
s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"]
|
||||
target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
|
||||
|
||||
self.full_predictions = np.append(self.full_predictions, predictions)
|
||||
self.full_do_predict = np.append(self.full_do_predict, do_predict)
|
||||
self.full_target_mean = np.append(self.full_target_mean, s_mean)
|
||||
self.full_target_std = np.append(self.full_target_std, s_std)
|
||||
self.full_target_mean = np.append(self.full_target_mean, target_mean)
|
||||
self.full_target_std = np.append(self.full_target_std, target_std)
|
||||
|
||||
return
|
||||
|
||||
@@ -827,6 +778,23 @@ class FreqaiDataKitchen:
|
||||
|
||||
return dataframe
|
||||
|
||||
def fit_labels(self) -> None:
|
||||
import scipy as spy
|
||||
|
||||
f = spy.stats.norm.fit(self.data_dictionary["train_labels"])
|
||||
|
||||
# KEEPME incase we want to let user start to grab quantiles.
|
||||
# upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
|
||||
# 'target_quantile'], *f)
|
||||
# lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
|
||||
# 'target_quantile'], *f)
|
||||
|
||||
self.data["target_mean"], self.data["target_std"] = f[0], f[1]
|
||||
# self.data["upper_quantile"] = upper_q
|
||||
# self.data["lower_quantile"] = lower_q
|
||||
|
||||
return
|
||||
|
||||
def np_encoder(self, object):
|
||||
if isinstance(object, np.generic):
|
||||
return object.item()
|
||||
@@ -968,3 +936,52 @@ class FreqaiDataKitchen:
|
||||
# )
|
||||
|
||||
# return
|
||||
|
||||
# def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
# """
|
||||
# standardize all data in the data_dictionary according to the training dataset
|
||||
# :params:
|
||||
# :data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
# :returns:
|
||||
# :data_dictionary: updated dictionary with standardized values.
|
||||
# """
|
||||
# # standardize the data by training stats
|
||||
# train_mean = data_dictionary["train_features"].mean()
|
||||
# train_std = data_dictionary["train_features"].std()
|
||||
# data_dictionary["train_features"] = (
|
||||
# data_dictionary["train_features"] - train_mean
|
||||
# ) / train_std
|
||||
# data_dictionary["test_features"] = (
|
||||
# data_dictionary["test_features"] - train_mean
|
||||
# ) / train_std
|
||||
|
||||
# train_labels_std = data_dictionary["train_labels"].std()
|
||||
# train_labels_mean = data_dictionary["train_labels"].mean()
|
||||
# data_dictionary["train_labels"] = (
|
||||
# data_dictionary["train_labels"] - train_labels_mean
|
||||
# ) / train_labels_std
|
||||
# data_dictionary["test_labels"] = (
|
||||
# data_dictionary["test_labels"] - train_labels_mean
|
||||
# ) / train_labels_std
|
||||
|
||||
# for item in train_std.keys():
|
||||
# self.data[item + "_std"] = train_std[item]
|
||||
# self.data[item + "_mean"] = train_mean[item]
|
||||
|
||||
# self.data["labels_std"] = train_labels_std
|
||||
# self.data["labels_mean"] = train_labels_mean
|
||||
|
||||
# return data_dictionary
|
||||
|
||||
# def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# Normalizes a set of data using the mean and standard deviation from
|
||||
# the associated training data.
|
||||
# :params:
|
||||
# :df: Dataframe to be standardized
|
||||
# """
|
||||
|
||||
# for item in df.keys():
|
||||
# df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
|
||||
|
||||
# return df
|
||||
|
@@ -158,12 +158,7 @@ class IFreqaiModel(ABC):
|
||||
else:
|
||||
self.model = dh.load_data(metadata['pair'])
|
||||
|
||||
# strategy_provided_features = self.dh.find_features(dataframe_train)
|
||||
# # FIXME doesnt work with PCA
|
||||
# if strategy_provided_features != self.dh.training_features_list:
|
||||
# logger.info("User changed input features, retraining model.")
|
||||
# self.model = self.train(dataframe_train, metadata)
|
||||
# self.dh.save_data(self.model)
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dh)
|
||||
|
||||
preds, do_preds = self.predict(dataframe_backtest, dh)
|
||||
|
||||
@@ -220,16 +215,23 @@ class IFreqaiModel(ABC):
|
||||
|
||||
self.model = dh.load_data(coin=metadata['pair'])
|
||||
|
||||
# FIXME
|
||||
# strategy_provided_features = dh.find_features(dataframe)
|
||||
# if strategy_provided_features != dh.training_features_list:
|
||||
# self.train_model_in_series(new_trained_timerange, metadata, strategy)
|
||||
self.check_if_feature_list_matches_strategy(dataframe, dh)
|
||||
|
||||
preds, do_preds = self.predict(dataframe, dh)
|
||||
dh.append_predictions(preds, do_preds, len(dataframe))
|
||||
|
||||
return dh
|
||||
|
||||
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
|
||||
dh: FreqaiDataKitchen) -> None:
|
||||
strategy_provided_features = dh.find_features(dataframe)
|
||||
if strategy_provided_features != dh.training_features_list:
|
||||
raise OperationalException("Trying to access pretrained model with `identifier` "
|
||||
"but found different features furnished by current strategy."
|
||||
"Change `identifer` to train from scratch, or ensure the"
|
||||
"strategy is furnishing the same features as the pretrained"
|
||||
"model")
|
||||
|
||||
def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Base data cleaning method for train
|
||||
@@ -237,6 +239,7 @@ class IFreqaiModel(ABC):
|
||||
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
|
||||
of how outlier data points are dropped from the dataframe used for training.
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
dh.principal_component_analysis()
|
||||
|
||||
|
@@ -33,10 +33,6 @@ class CatboostPredictionModel(IFreqaiModel):
|
||||
/ dataframe["close"]
|
||||
- 1
|
||||
)
|
||||
dh.data["s_mean"] = dataframe["s"].mean()
|
||||
dh.data["s_std"] = dataframe["s"].std()
|
||||
|
||||
# logger.info("label mean", dh.data["s_mean"], "label std", dh.data["s_std"])
|
||||
|
||||
return dataframe["s"]
|
||||
|
||||
@@ -68,8 +64,9 @@ class CatboostPredictionModel(IFreqaiModel):
|
||||
|
||||
# split data into train/test data.
|
||||
data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
# standardize all data based on train_dataset only
|
||||
data_dictionary = dh.standardize_data(data_dictionary)
|
||||
dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dh.normalize_data(data_dictionary)
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_train(dh)
|
||||
@@ -128,7 +125,7 @@ class CatboostPredictionModel(IFreqaiModel):
|
||||
filtered_dataframe, _ = dh.filter_features(
|
||||
unfiltered_dataframe, original_feature_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dh.standardize_data_from_metadata(filtered_dataframe)
|
||||
filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe)
|
||||
dh.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
@@ -136,7 +133,7 @@ class CatboostPredictionModel(IFreqaiModel):
|
||||
|
||||
predictions = self.model.predict(dh.data_dictionary["prediction_features"])
|
||||
|
||||
# compute the non-standardized predictions
|
||||
# compute the non-normalized predictions
|
||||
dh.predictions = (predictions + 1) * (dh.data["labels_max"] -
|
||||
dh.data["labels_min"]) / 2 + dh.data["labels_min"]
|
||||
|
||||
|
@@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy):
|
||||
dataframe["target_std"],
|
||||
) = self.model.bridge.start(dataframe, metadata, self)
|
||||
|
||||
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5
|
||||
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1
|
||||
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
|
||||
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]
|
||||
return dataframe
|
||||
|
||||
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
|
Reference in New Issue
Block a user