import logging from typing import Any, Dict, Tuple import pandas as pd from pandas import DataFrame from catboost import CatBoostClassifier, Pool import numpy.typing as npt import numpy as np from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) class CatboostClassifier(BaseRegressionModel): """ User created prediction model. The class needs to override three necessary functions, predict(), train(), fit(). The class inherits ModelHandler which has its own DataHandler where data is held, saved, loaded, and managed. """ def fit(self, data_dictionary: Dict) -> Any: """ User sets up the training and test data to fit their desired model here :params: :data_dictionary: the dictionary constructed by DataHandler to hold all the training and test data/labels. """ train_data = Pool( data=data_dictionary["train_features"], label=data_dictionary["train_labels"], weight=data_dictionary["train_weights"], ) cbr = CatBoostClassifier( allow_writing_files=False, loss_function='MultiClass', **self.model_training_parameters, ) cbr.fit(train_data) return cbr def predict( self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. :return: :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ dk.find_features(unfiltered_dataframe) filtered_dataframe, _ = dk.filter_features( unfiltered_dataframe, dk.training_features_list, training_filter=False ) filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) dk.data_dictionary["prediction_features"] = filtered_dataframe self.data_cleaning_predict(dk, filtered_dataframe) predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"]) pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_) pred_df = pd.concat([pred_df, pred_df_prob], axis=1) return (pred_df, dk.do_predict)