From 2ad086dd7a3d9ba9af01edc2a2f8100c5972ee89 Mon Sep 17 00:00:00 2001 From: silur Date: Mon, 10 Oct 2022 14:35:07 +0200 Subject: [PATCH] add XGBoost random forest predictors to freqai --- .../prediction_models/XGBoostRFClassifier.py | 85 +++++++++++++++++++ .../prediction_models/XGBoostRFRegressor.py | 45 ++++++++++ 2 files changed, 130 insertions(+) create mode 100644 freqtrade/freqai/prediction_models/XGBoostRFClassifier.py create mode 100644 freqtrade/freqai/prediction_models/XGBoostRFRegressor.py diff --git a/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py b/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py new file mode 100644 index 000000000..1aba8df85 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py @@ -0,0 +1,85 @@ +import logging +from typing import Any, Dict, Tuple + +import numpy as np +import numpy.typing as npt +import pandas as pd +from pandas import DataFrame +from pandas.api.types import is_integer_dtype +from sklearn.preprocessing import LabelEncoder +from xgboost import XGBRFClassifier + +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRFClassifier(BaseClassifierModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"].to_numpy() + y = data_dictionary["train_labels"].to_numpy()[:, 0] + + le = LabelEncoder() + if not is_integer_dtype(y): + y = pd.Series(le.fit_transform(y), dtype="int64") + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + eval_set = None + else: + test_features = data_dictionary["test_features"].to_numpy() + test_labels = data_dictionary["test_labels"].to_numpy()[:, 0] + + if not is_integer_dtype(test_labels): + test_labels = pd.Series(le.transform(test_labels), dtype="int64") + + eval_set = [(test_features, test_labels)] + + train_weights = data_dictionary["train_weights"] + + init_model = self.get_init_model(dk.pair) + + model = XGBRFClassifier(**self.model_training_parameters) + + model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, + xgb_model=init_model) + + return model + + def predict( + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_df: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs) + + le = LabelEncoder() + label = dk.label_list[0] + labels_before = list(dk.data['labels_std'].keys()) + labels_after = le.fit_transform(labels_before).tolist() + pred_df[label] = le.inverse_transform(pred_df[label]) + pred_df = pred_df.rename( + columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) + + return (pred_df, dk.do_predict) diff --git a/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py new file mode 100644 index 000000000..4c18d594d --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py @@ -0,0 +1,45 @@ +import logging +from typing import Any, Dict + +from xgboost import XGBRFRegressor + +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRFRegressor(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: + eval_set = None + else: + eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + eval_weights = [data_dictionary['test_weights']] + + sample_weight = data_dictionary["train_weights"] + + xgb_model = self.get_init_model(dk.pair) + + model = XGBRFRegressor(**self.model_training_parameters) + + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, + sample_weight_eval_set=eval_weights, xgb_model=xgb_model) + + return model