From 10b6aebc5f5ada8b1e6aef733f7c8ccf4e29e8ba Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 10 Sep 2022 16:54:13 +0200 Subject: [PATCH] enable continual learning and evaluation sets on multioutput models. --- .../BaseClassifierModel.py | 0 .../BaseRegressionModel.py | 0 .../BaseTensorFlowModel.py | 0 .../base_models/FreqaiMultiOutputRegressor.py | 75 +++++++++++++++++++ .../prediction_models/CatboostClassifier.py | 2 +- .../prediction_models/CatboostRegressor.py | 2 +- .../CatboostRegressorMultiTarget.py | 41 +++++++--- .../prediction_models/LightGBMClassifier.py | 2 +- .../prediction_models/LightGBMRegressor.py | 2 +- .../LightGBMRegressorMultiTarget.py | 41 +++++++--- .../prediction_models/XGBoostRegressor.py | 6 +- .../XGBoostRegressorMultiTarget.py | 37 ++++++--- 12 files changed, 170 insertions(+), 38 deletions(-) rename freqtrade/freqai/{prediction_models => base_models}/BaseClassifierModel.py (100%) rename freqtrade/freqai/{prediction_models => base_models}/BaseRegressionModel.py (100%) rename freqtrade/freqai/{prediction_models => base_models}/BaseTensorFlowModel.py (100%) create mode 100644 freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseClassifierModel.py rename to freqtrade/freqai/base_models/BaseClassifierModel.py diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseRegressionModel.py rename to freqtrade/freqai/base_models/BaseRegressionModel.py diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/base_models/BaseTensorFlowModel.py similarity index 100% rename from freqtrade/freqai/prediction_models/BaseTensorFlowModel.py rename to freqtrade/freqai/base_models/BaseTensorFlowModel.py diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py new file mode 100644 index 000000000..aa5dbe629 --- /dev/null +++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py @@ -0,0 +1,75 @@ + +from joblib import Parallel +from sklearn.multioutput import MultiOutputRegressor, _fit_estimator +from sklearn.utils.fixes import delayed +from sklearn.utils.validation import has_fit_parameter + + +class FreqaiMultiOutputRegressor(MultiOutputRegressor): + + def fit(self, X, y, sample_weight=None, fit_params=None): + """Fit the model to data, separately for each output variable. + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + fit_params : A list of dicts for the fit_params + Parameters passed to the ``estimator.fit`` method of each step. + Each dict may contain same or different values (e.g. different + eval_sets or init_models) + .. versionadded:: 0.23 + Returns + ------- + self : object + Returns a fitted instance. + """ + + if not hasattr(self.estimator, "fit"): + raise ValueError("The base estimator should implement a fit method") + + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + # if is_classifier(self): + # check_classification_targets(y) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + # fit_params_validated = _check_fit_params(X, fit_params) + + if not fit_params: + fit_params = [None] * y.shape[1] + + # if not init_models: + # init_models = [None] * y.shape[1] + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], sample_weight, **fit_params[i] + # init_model=init_models[i], eval_set=eval_sets[i], + # **fit_params_validated + ) + for i in range(y.shape[1]) + ) + + if hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index cd7afd392..60536e6de 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -3,8 +3,8 @@ from typing import Any, Dict from catboost import CatBoostClassifier, Pool +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index 1ce31b628..73cf6c88a 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from catboost import CatBoostRegressor, Pool +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py index bc52bfdd9..a376b2c33 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py @@ -1,11 +1,11 @@ import logging from typing import Any, Dict -from catboost import CatBoostRegressor # , Pool -from sklearn.multioutput import MultiOutputRegressor +from catboost import CatBoostRegressor, Pool +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -32,17 +32,34 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') - - model = MultiOutputRegressor(estimator=cbr) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) + eval_sets = [None] * y.shape[1] if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + eval_sets = [None] * data_dictionary['test_labels'].shape[1] + + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"].iloc[:, i], + weight=data_dictionary["test_weights"], + ) + + init_model = self.get_init_model(dk.pair) + + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=cbr) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + return model diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py index 69867eae3..3eec516ba 100644 --- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -3,8 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMClassifier +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressor.py b/freqtrade/freqai/prediction_models/LightGBMRegressor.py index 99e9ff887..85c9b691c 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py index c34680dbe..7a9b5c36a 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py @@ -2,10 +2,10 @@ import logging from typing import Any, Dict from lightgbm import LGBMRegressor -from sklearn.multioutput import MultiOutputRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -29,15 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') + eval_weights = None + eval_sets = [None] * y.shape[1] - model = MultiOutputRegressor(estimator=lgb) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = ( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + ) + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, + 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=lgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + # model = FreqaiMultiOutputRegressor(estimator=lgb) + # model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models, + # eval_sets=eval_sets, eval_sample_weight=eval_weights) return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py index acc4386f5..c9be9ce74 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressor.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -3,8 +3,8 @@ from typing import Any, Dict from xgboost import XGBRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -31,6 +31,7 @@ class XGBoostRegressor(BaseRegressionModel): eval_set = None else: eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + eval_weights = [data_dictionary['test_weights']] sample_weight = data_dictionary["train_weights"] @@ -38,6 +39,7 @@ class XGBoostRegressor(BaseRegressionModel): model = XGBRegressor(**self.model_training_parameters) - model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, xgb_model=xgb_model) + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, + sample_weight_eval_set=eval_weights, xgb_model=xgb_model) return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py index 5283501d1..38c478c0b 100644 --- a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py @@ -1,11 +1,11 @@ import logging from typing import Any, Dict -from sklearn.multioutput import MultiOutputRegressor from xgboost import XGBRegressor +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor from freqtrade.freqai.data_kitchen import FreqaiDataKitchen -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel logger = logging.getLogger(__name__) @@ -29,15 +29,32 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] - if self.continual_learning: - logger.warning('Continual learning not supported for MultiTarget models') + eval_weights = None + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = [( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + )] + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights, + 'xgb_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=xgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) - model = MultiOutputRegressor(estimator=xgb) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") return model