add predict_proba to base classifier, improve historic predictions handling

2022-08-09 17:31:38 +02:00
parent d36da95941
commit 23cc21ce59
3 changed files with 54 additions and 8 deletions
@@ -1,10 +1,12 @@
 import logging
-from typing import Any, Dict
-
+from typing import Any, Dict, Tuple
+import pandas as pd
+from pandas import DataFrame
 from catboost import CatBoostClassifier, Pool
-
+import numpy.typing as npt
+import numpy as np
 from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
-
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen

 logger = logging.getLogger(__name__)

@@ -39,3 +41,34 @@ class CatboostClassifier(BaseRegressionModel):
        cbr.fit(train_data)

        return cbr
+
+    def predict(
+        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        dk.find_features(unfiltered_dataframe)
+        filtered_dataframe, _ = dk.filter_features(
+            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        )
+        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
+        dk.data_dictionary["prediction_features"] = filtered_dataframe
+
+        self.data_cleaning_predict(dk, filtered_dataframe)
+
+        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
+        pred_df = DataFrame(predictions, columns=dk.label_list)
+
+        predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"])
+        pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_)
+
+        pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
+
+        return (pred_df, dk.do_predict)