allow user to pass test_size = 0 and avoid using eval sets in prediction models
This commit is contained in:
parent
55cf378ec2
commit
56b17e6f3c
@ -243,20 +243,28 @@ class FreqaiDataKitchen:
|
|||||||
else:
|
else:
|
||||||
stratification = None
|
stratification = None
|
||||||
|
|
||||||
(
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
train_features,
|
(
|
||||||
test_features,
|
train_features,
|
||||||
train_labels,
|
test_features,
|
||||||
test_labels,
|
train_labels,
|
||||||
train_weights,
|
test_labels,
|
||||||
test_weights,
|
train_weights,
|
||||||
) = train_test_split(
|
test_weights,
|
||||||
filtered_dataframe[: filtered_dataframe.shape[0]],
|
) = train_test_split(
|
||||||
labels,
|
filtered_dataframe[: filtered_dataframe.shape[0]],
|
||||||
weights,
|
labels,
|
||||||
stratify=stratification,
|
weights,
|
||||||
**self.config["freqai"]["data_split_parameters"],
|
stratify=stratification,
|
||||||
)
|
**self.config["freqai"]["data_split_parameters"],
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
test_labels = np.zeros(2)
|
||||||
|
test_features = pd.DataFrame()
|
||||||
|
test_weights = np.zeros(2)
|
||||||
|
train_features = filtered_dataframe
|
||||||
|
train_labels = labels
|
||||||
|
train_weights = weights
|
||||||
|
|
||||||
return self.build_data_dictionary(
|
return self.build_data_dictionary(
|
||||||
train_features, test_features, train_labels, test_labels, train_weights, test_weights
|
train_features, test_features, train_labels, test_labels, train_weights, test_weights
|
||||||
@ -392,12 +400,13 @@ class FreqaiDataKitchen:
|
|||||||
/ (train_labels_max - train_labels_min)
|
/ (train_labels_max - train_labels_min)
|
||||||
- 1
|
- 1
|
||||||
)
|
)
|
||||||
data_dictionary["test_labels"][item] = (
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
2
|
data_dictionary["test_labels"][item] = (
|
||||||
* (data_dictionary["test_labels"][item] - train_labels_min)
|
2
|
||||||
/ (train_labels_max - train_labels_min)
|
* (data_dictionary["test_labels"][item] - train_labels_min)
|
||||||
- 1
|
/ (train_labels_max - train_labels_min)
|
||||||
)
|
- 1
|
||||||
|
)
|
||||||
|
|
||||||
self.data[f"{item}_max"] = train_labels_max # .to_dict()
|
self.data[f"{item}_max"] = train_labels_max # .to_dict()
|
||||||
self.data[f"{item}_min"] = train_labels_min # .to_dict()
|
self.data[f"{item}_min"] = train_labels_min # .to_dict()
|
||||||
@ -555,11 +564,12 @@ class FreqaiDataKitchen:
|
|||||||
self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
|
self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
|
||||||
self.training_features_list = self.data_dictionary["train_features"].columns
|
self.training_features_list = self.data_dictionary["train_features"].columns
|
||||||
|
|
||||||
self.data_dictionary["test_features"] = pd.DataFrame(
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
data=test_components,
|
self.data_dictionary["test_features"] = pd.DataFrame(
|
||||||
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
data=test_components,
|
||||||
index=self.data_dictionary["test_features"].index,
|
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
||||||
)
|
index=self.data_dictionary["test_features"].index,
|
||||||
|
)
|
||||||
|
|
||||||
self.data["n_kept_components"] = n_keep_components
|
self.data["n_kept_components"] = n_keep_components
|
||||||
self.pca = pca2
|
self.pca = pca2
|
||||||
@ -652,15 +662,17 @@ class FreqaiDataKitchen:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# same for test data
|
# same for test data
|
||||||
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
|
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
dropped_points = np.where(y_pred == -1, 0, y_pred)
|
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
|
||||||
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
dropped_points = np.where(y_pred == -1, 0, y_pred)
|
||||||
(y_pred == 1)
|
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
||||||
]
|
(y_pred == 1)
|
||||||
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
|
]
|
||||||
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
|
||||||
(y_pred == 1)
|
y_pred == 1)]
|
||||||
]
|
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
||||||
|
(y_pred == 1)
|
||||||
|
]
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
|
f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
|
||||||
|
@ -28,17 +28,22 @@ class CatboostPredictionModel(BaseRegressionModel):
|
|||||||
label=data_dictionary["train_labels"],
|
label=data_dictionary["train_labels"],
|
||||||
weight=data_dictionary["train_weights"],
|
weight=data_dictionary["train_weights"],
|
||||||
)
|
)
|
||||||
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||||
test_data = Pool(
|
test_data = None
|
||||||
data=data_dictionary["test_features"],
|
else:
|
||||||
label=data_dictionary["test_labels"],
|
test_data = Pool(
|
||||||
weight=data_dictionary["test_weights"],
|
data=data_dictionary["test_features"],
|
||||||
)
|
label=data_dictionary["test_labels"],
|
||||||
|
weight=data_dictionary["test_weights"],
|
||||||
|
)
|
||||||
|
|
||||||
model = CatBoostRegressor(
|
model = CatBoostRegressor(
|
||||||
allow_writing_files=False,
|
allow_writing_files=False,
|
||||||
**self.model_training_parameters,
|
**self.model_training_parameters,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||||
|
test_data = None
|
||||||
model.fit(X=train_data, eval_set=test_data)
|
model.fit(X=train_data, eval_set=test_data)
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
@ -36,7 +36,9 @@ class CatboostPredictionMultiModel(BaseRegressionModel):
|
|||||||
|
|
||||||
model = MultiOutputRegressor(estimator=cbr)
|
model = MultiOutputRegressor(estimator=cbr)
|
||||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||||
train_score = model.score(X, y)
|
|
||||||
test_score = model.score(*eval_set)
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
logger.info(f"Train score {train_score}, Test score {test_score}")
|
train_score = model.score(X, y)
|
||||||
|
test_score = model.score(*eval_set)
|
||||||
|
logger.info(f"Train score {train_score}, Test score {test_score}")
|
||||||
return model
|
return model
|
||||||
|
@ -25,11 +25,15 @@ class LightGBMPredictionModel(BaseRegressionModel):
|
|||||||
all the training and test data/labels.
|
all the training and test data/labels.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||||
|
eval_set = None
|
||||||
|
else:
|
||||||
|
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||||
X = data_dictionary["train_features"]
|
X = data_dictionary["train_features"]
|
||||||
y = data_dictionary["train_labels"]
|
y = data_dictionary["train_labels"]
|
||||||
|
|
||||||
model = LGBMRegressor(**self.model_training_parameters)
|
model = LGBMRegressor(**self.model_training_parameters)
|
||||||
|
|
||||||
model.fit(X=X, y=y, eval_set=eval_set)
|
model.fit(X=X, y=y, eval_set=eval_set)
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
Loading…
Reference in New Issue
Block a user