add predict_proba to base classifier, improve historic predictions handling

This commit is contained in:
robcaulk 2022-08-09 17:31:38 +02:00
parent d36da95941
commit 23cc21ce59
3 changed files with 54 additions and 8 deletions

View File

@ -358,10 +358,12 @@ class FreqaiDataDrawer:
dk.find_features(dataframe)
if self.freqai_info.get('predict_proba', []):
full_labels = dk.label_list + self.freqai_info['predict_proba']
else:
full_labels = dk.label_list
added_labels = []
if dk.unique_classes:
for label in dk.unique_classes:
added_labels += dk.unique_classes[label]
full_labels = dk.label_list + added_labels
for label in full_labels:
dataframe[label] = 0

View File

@ -90,6 +90,7 @@ class FreqaiDataKitchen:
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
self.train_dates: DataFrame = pd.DataFrame()
self.unique_classes: Dict[str, list] = {}
def set_paths(
self,
@ -977,6 +978,8 @@ class FreqaiDataKitchen:
informative=corr_dataframes[i][tf]
)
self.get_unique_classes_from_labels(dataframe)
return dataframe
def fit_labels(self) -> None:
@ -1003,3 +1006,11 @@ class FreqaiDataKitchen:
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
]
return dataframe[to_keep]
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
self.find_features(dataframe)
for key in self.label_list:
if dataframe[key].dtype == object:
self.unique_classes[key] = dataframe[key].dropna().unique()

View File

@ -1,10 +1,12 @@
import logging
from typing import Any, Dict
from typing import Any, Dict, Tuple
import pandas as pd
from pandas import DataFrame
from catboost import CatBoostClassifier, Pool
import numpy.typing as npt
import numpy as np
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -39,3 +41,34 @@ class CatboostClassifier(BaseRegressionModel):
cbr.fit(train_data)
return cbr
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"])
pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_)
pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
return (pred_df, dk.do_predict)