add predict_proba to base classifier, improve historic predictions handling

This commit is contained in:
robcaulk 2022-08-09 17:31:38 +02:00
parent d36da95941
commit 23cc21ce59
3 changed files with 54 additions and 8 deletions

View File

@ -358,10 +358,12 @@ class FreqaiDataDrawer:
dk.find_features(dataframe) dk.find_features(dataframe)
if self.freqai_info.get('predict_proba', []): added_labels = []
full_labels = dk.label_list + self.freqai_info['predict_proba'] if dk.unique_classes:
else: for label in dk.unique_classes:
full_labels = dk.label_list added_labels += dk.unique_classes[label]
full_labels = dk.label_list + added_labels
for label in full_labels: for label in full_labels:
dataframe[label] = 0 dataframe[label] = 0

View File

@ -90,6 +90,7 @@ class FreqaiDataKitchen:
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {}) self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1) self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
self.train_dates: DataFrame = pd.DataFrame() self.train_dates: DataFrame = pd.DataFrame()
self.unique_classes: Dict[str, list] = {}
def set_paths( def set_paths(
self, self,
@ -977,6 +978,8 @@ class FreqaiDataKitchen:
informative=corr_dataframes[i][tf] informative=corr_dataframes[i][tf]
) )
self.get_unique_classes_from_labels(dataframe)
return dataframe return dataframe
def fit_labels(self) -> None: def fit_labels(self) -> None:
@ -1003,3 +1006,11 @@ class FreqaiDataKitchen:
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%") col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
] ]
return dataframe[to_keep] return dataframe[to_keep]
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
self.find_features(dataframe)
for key in self.label_list:
if dataframe[key].dtype == object:
self.unique_classes[key] = dataframe[key].dropna().unique()

View File

@ -1,10 +1,12 @@
import logging import logging
from typing import Any, Dict from typing import Any, Dict, Tuple
import pandas as pd
from pandas import DataFrame
from catboost import CatBoostClassifier, Pool from catboost import CatBoostClassifier, Pool
import numpy.typing as npt
import numpy as np
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -39,3 +41,34 @@ class CatboostClassifier(BaseRegressionModel):
cbr.fit(train_data) cbr.fit(train_data)
return cbr return cbr
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
predictions_prob = self.model.predict_proba(dk.data_dictionary["prediction_features"])
pred_df_prob = DataFrame(predictions_prob, columns=self.model.classes_)
pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
return (pred_df, dk.do_predict)