Merge pull request #7569 from Silur/develop

Add XGBoost random forest predictors to freqai
2022-10-15 16:09:26 +02:00 · 2022-10-15 16:09:26 +02:00 · 62ca822597
commit 62ca822597
parent 46ba3bb357 30a45bb597
3 changed files with 132 additions and 0 deletions
--- a/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
@ -0,0 +1,85 @@
 import logging
 from typing import Any, Dict, Tuple
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from pandas import DataFrame
 from pandas.api.types import is_integer_dtype
 from sklearn.preprocessing import LabelEncoder
 from xgboost import XGBRFClassifier
 from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 logger = logging.getLogger(__name__)
 class XGBoostRFClassifier(BaseClassifierModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """
    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :params:
        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """
        X = data_dictionary["train_features"].to_numpy()
        y = data_dictionary["train_labels"].to_numpy()[:, 0]
        le = LabelEncoder()
        if not is_integer_dtype(y):
            y = pd.Series(le.fit_transform(y), dtype="int64")
        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
            eval_set = None
        else:
            test_features = data_dictionary["test_features"].to_numpy()
            test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
            if not is_integer_dtype(test_labels):
                test_labels = pd.Series(le.transform(test_labels), dtype="int64")
            eval_set = [(test_features, test_labels)]
        train_weights = data_dictionary["train_weights"]
        init_model = self.get_init_model(dk.pair)
        model = XGBRFClassifier(**self.model_training_parameters)
        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
                  xgb_model=init_model)
        return model
    def predict(
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """
        (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
        le = LabelEncoder()
        label = dk.label_list[0]
        labels_before = list(dk.data['labels_std'].keys())
        labels_after = le.fit_transform(labels_before).tolist()
        pred_df[label] = le.inverse_transform(pred_df[label])
        pred_df = pred_df.rename(
            columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
        return (pred_df, dk.do_predict)
--- a/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
+++ b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
@ -0,0 +1,45 @@
 import logging
 from typing import Any, Dict
 from xgboost import XGBRFRegressor
 from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 logger = logging.getLogger(__name__)
 class XGBoostRFRegressor(BaseRegressionModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """
    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary constructed by DataHandler to hold
                                all the training and test data/labels.
        """
        X = data_dictionary["train_features"]
        y = data_dictionary["train_labels"]
        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
            eval_set = None
        else:
            eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
            eval_weights = [data_dictionary['test_weights']]
        sample_weight = data_dictionary["train_weights"]
        xgb_model = self.get_init_model(dk.pair)
        model = XGBRFRegressor(**self.model_training_parameters)
        model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
                  sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
        return model
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@ -30,6 +30,7 @@ def is_mac() -> bool:
@pytest.mark.parametrize('model', [
    'LightGBMRegressor',
    'XGBoostRegressor',
    'XGBoostRFRegressor',
    'CatboostRegressor',
    ])
 def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model):
@ -113,6 +114,7 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model):
    'LightGBMClassifier',
    'CatboostClassifier',
    'XGBoostClassifier',
    'XGBoostRFClassifier',
    ])
 def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
    if is_arm() and model == 'CatboostClassifier':