stable/freqtrade/freqai/prediction_models/CatboostClassifier.py

import logging
from typing import Any, Dict, Tuple
import pandas as pd
from pandas import DataFrame
from catboost import CatBoostClassifier, Pool
import numpy.typing as npt
import numpy as np
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen

logger = logging.getLogger(__name__)


class CatboostClassifier(BaseRegressionModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
    has its own DataHandler where data is held, saved, loaded, and managed.
    """

    def fit(self, data_dictionary: Dict) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :params:
        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """

        train_data = Pool(
            data=data_dictionary["train_features"],
            label=data_dictionary["train_labels"],
            weight=data_dictionary["train_weights"],
        )

        cbr = CatBoostClassifier(
            allow_writing_files=False,
            loss_function='MultiClass',
            **self.model_training_parameters,
        )

        cbr.fit(train_data)

        return cbr

    def predict(
        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

        dk.find_features(unfiltered_dataframe)
        filtered_dataframe, _ = dk.filter_features(
            unfiltered_dataframe, dk.training_features_list, training_filter=False
        )
        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
        dk.data_dictionary["prediction_features"] = filtered_dataframe

        self.data_cleaning_predict(dk, filtered_dataframe)

        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
        pred_df = DataFrame(predictions, columns=dk.label_list)

        predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"])
        pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_)

        pred_df = pd.concat([pred_df, pred_df_prob], axis=1)

        return (pred_df, dk.do_predict)
add classifier, improve model naming scheme 2022-07-09 08:13:33 +00:00			`import logging`
add predict_proba to base classifier, improve historic predictions handling 2022-08-09 15:31:38 +00:00			`from typing import Any, Dict, Tuple`
			`import pandas as pd`
			`from pandas import DataFrame`
add classifier, improve model naming scheme 2022-07-09 08:13:33 +00:00			`from catboost import CatBoostClassifier, Pool`
add predict_proba to base classifier, improve historic predictions handling 2022-08-09 15:31:38 +00:00			`import numpy.typing as npt`
			`import numpy as np`
add classifier, improve model naming scheme 2022-07-09 08:13:33 +00:00			`from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel`
add predict_proba to base classifier, improve historic predictions handling 2022-08-09 15:31:38 +00:00			`from freqtrade.freqai.data_kitchen import FreqaiDataKitchen`
add classifier, improve model naming scheme 2022-07-09 08:13:33 +00:00
			`logger = logging.getLogger(__name__)`


			`class CatboostClassifier(BaseRegressionModel):`
			`"""`
			`User created prediction model. The class needs to override three necessary`
			`functions, predict(), train(), fit(). The class inherits ModelHandler which`
			`has its own DataHandler where data is held, saved, loaded, and managed.`
			`"""`

			`def fit(self, data_dictionary: Dict) -> Any:`
			`"""`
			`User sets up the training and test data to fit their desired model here`
			`:params:`
			`:data_dictionary: the dictionary constructed by DataHandler to hold`
			`all the training and test data/labels.`
			`"""`

			`train_data = Pool(`
			`data=data_dictionary["train_features"],`
			`label=data_dictionary["train_labels"],`
			`weight=data_dictionary["train_weights"],`
			`)`

			`cbr = CatBoostClassifier(`
			`allow_writing_files=False,`
			`loss_function='MultiClass',`
			`**self.model_training_parameters,`
			`)`

			`cbr.fit(train_data)`

			`return cbr`
add predict_proba to base classifier, improve historic predictions handling 2022-08-09 15:31:38 +00:00
			`def predict(`
			`self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False`
			`) -> Tuple[DataFrame, npt.NDArray[np.int_]]:`
			`"""`
			`Filter the prediction features data and predict with it.`
			`:param: unfiltered_dataframe: Full dataframe for the current backtest period.`
			`:return:`
			`:pred_df: dataframe containing the predictions`
			`:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove`
			`data (NaNs) or felt uncertain about data (PCA and DI index)`
			`"""`

			`dk.find_features(unfiltered_dataframe)`
			`filtered_dataframe, _ = dk.filter_features(`
			`unfiltered_dataframe, dk.training_features_list, training_filter=False`
			`)`
			`filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)`
			`dk.data_dictionary["prediction_features"] = filtered_dataframe`

			`self.data_cleaning_predict(dk, filtered_dataframe)`

			`predictions = self.model.predict(dk.data_dictionary["prediction_features"])`
			`pred_df = DataFrame(predictions, columns=dk.label_list)`

			`predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"])`
			`pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_)`

			`pred_df = pd.concat([pred_df, pred_df_prob], axis=1)`

			`return (pred_df, dk.do_predict)`