stable/freqtrade/freqai/prediction_models/PyTorchClassifier.py

import logging
from typing import Dict, List, Tuple

import numpy as np
import numpy.typing as npt
import pandas as pd
import torch
from pandas import DataFrame
from torch.nn import functional as F

from freqtrade.exceptions import OperationalException
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


logger = logging.getLogger(__name__)


class PyTorchClassifier(BasePyTorchModel):
    """
    A PyTorch implementation of a classifier.
    User must implement fit method
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.class_name_to_index = None
        self.index_to_class_name = None

    def predict(
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        :raises ValueError: if 'class_names' doesn't exist in model meta_data.
        """

        class_names = self.model.model_meta_data.get("class_names", None)
        if not class_names:
            raise ValueError(
                "Missing class names. "
                "self.model.model_meta_data[\"class_names\"] is None."
            )

        if not self.class_name_to_index:
            self.init_class_names_to_index_mapping(class_names)

        dk.find_features(unfiltered_df)
        filtered_df, _ = dk.filter_features(
            unfiltered_df, dk.training_features_list, training_filter=False
        )
        filtered_df = dk.normalize_data_from_metadata(filtered_df)
        dk.data_dictionary["prediction_features"] = filtered_df

        self.data_cleaning_predict(dk)
        x = torch.from_numpy(dk.data_dictionary["prediction_features"].values)\
            .float()\
            .to(self.device)

        logits = self.model.model(x)
        probs = F.softmax(logits, dim=-1)
        predicted_classes = torch.argmax(probs, dim=-1)
        predicted_classes_str = self.decode_class_names(predicted_classes)
        pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)
        pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
        pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
        return (pred_df, dk.do_predict)

    def encode_class_names(
            self,
            data_dictionary: Dict[str, pd.DataFrame],
            dk: FreqaiDataKitchen,
            class_names: List[str],
    ):
        """
        encode class name, str -> int
        assuming first column of *_labels data frame to be the target column
        containing the class names
        """

        target_column_name = dk.label_list[0]
        for split in ["train", "test"]:
            label_df = data_dictionary[f"{split}_labels"]
            self.assert_valid_class_names(label_df[target_column_name], class_names)
            label_df[target_column_name] = list(
                map(lambda x: self.class_name_to_index[x], label_df[target_column_name])
            )

    @staticmethod
    def assert_valid_class_names(
            target_column: pd.Series,
            class_names: List[str]
    ):
        non_defined_labels = set(target_column) - set(class_names)
        if len(non_defined_labels) != 0:
            raise OperationalException(
                f"Found non defined labels: {non_defined_labels}, ",
                f"expecting labels: {class_names}"
            )

    def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
        """
        decode class name, int -> str
        """

        return list(map(lambda x: self.index_to_class_name[x.item()], class_ints))

    def init_class_names_to_index_mapping(self, class_names):
        self.class_name_to_index = {s: i for i, s in enumerate(class_names)}
        self.index_to_class_name = {i: s for i, s in enumerate(class_names)}
        logger.info(f"encoded class name to index: {self.class_name_to_index}")

    def convert_label_column_to_int(
            self,
            data_dictionary: Dict[str, pd.DataFrame],
            dk: FreqaiDataKitchen,
            class_names: List[str]
    ):
        self.init_class_names_to_index_mapping(class_names)
        self.encode_class_names(data_dictionary, dk, class_names)

    def get_class_names(self) -> List[str]:
        if not hasattr(self, "class_names"):
            raise ValueError(
                "Missing attribute: self.class_names "
                "set self.freqai.class_names = [\"class a\", \"class b\", \"class c\"] "
                "inside IStrategy.set_freqai_targets method."
            )
        return self.class_names
initial commit 2023-03-05 14:59:24 +00:00			`import logging`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`from typing import Dict, List, Tuple`
initial commit 2023-03-05 14:59:24 +00:00
			`import numpy as np`
sort imports 2023-03-08 14:03:36 +00:00			`import numpy.typing as npt`
initial commit 2023-03-05 14:59:24 +00:00			`import pandas as pd`
			`import torch`
			`from pandas import DataFrame`
			`from torch.nn import functional as F`

add missing import 2023-03-08 14:11:51 +00:00			`from freqtrade.exceptions import OperationalException`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel`
sort imports 2023-03-08 14:03:36 +00:00			`from freqtrade.freqai.data_kitchen import FreqaiDataKitchen`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00
initial commit 2023-03-05 14:59:24 +00:00
			`logger = logging.getLogger(__name__)`


create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`class PyTorchClassifier(BasePyTorchModel):`
add documentation 2023-03-09 09:14:54 +00:00			`"""`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`A PyTorch implementation of a classifier.`
			`User must implement fit method`
add documentation 2023-03-09 09:14:54 +00:00			`"""`
initial commit 2023-03-05 14:59:24 +00:00			`def __init__(self, **kwargs):`
			`super().__init__(**kwargs)`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`self.class_name_to_index = None`
			`self.index_to_class_name = None`
initial commit 2023-03-05 14:59:24 +00:00
			`def predict(`
			`self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs`
			`) -> Tuple[DataFrame, npt.NDArray[np.int_]]:`
			`"""`
			`Filter the prediction features data and predict with it.`
			`:param unfiltered_df: Full dataframe for the current backtest period.`
			`:return:`
			`:pred_df: dataframe containing the predictions`
			`:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove`
			`data (NaNs) or felt uncertain about data (PCA and DI index)`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`:raises ValueError: if 'class_names' doesn't exist in model meta_data.`
initial commit 2023-03-05 14:59:24 +00:00			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`class_names = self.model.model_meta_data.get("class_names", None)`
			`if not class_names:`
			`raise ValueError(`
			`"Missing class names. "`
			`"self.model.model_meta_data[\"class_names\"] is None."`
			`)`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00
			`if not self.class_name_to_index:`
			`self.init_class_names_to_index_mapping(class_names)`
initial commit 2023-03-05 14:59:24 +00:00
			`dk.find_features(unfiltered_df)`
			`filtered_df, _ = dk.filter_features(`
			`unfiltered_df, dk.training_features_list, training_filter=False`
			`)`
			`filtered_df = dk.normalize_data_from_metadata(filtered_df)`
			`dk.data_dictionary["prediction_features"] = filtered_df`

			`self.data_cleaning_predict(dk)`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`x = torch.from_numpy(dk.data_dictionary["prediction_features"].values)\`
			`.float()\`
			`.to(self.device)`
initial commit 2023-03-05 14:59:24 +00:00
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`logits = self.model.model(x)`
initial commit 2023-03-05 14:59:24 +00:00			`probs = F.softmax(logits, dim=-1)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`predicted_classes = torch.argmax(probs, dim=-1)`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`predicted_classes_str = self.decode_class_names(predicted_classes)`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])`
initial commit 2023-03-05 14:59:24 +00:00			`pred_df = pd.concat([pred_df, pred_df_prob], axis=1)`
			`return (pred_df, dk.do_predict)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`def encode_class_names(`
			`self,`
			`data_dictionary: Dict[str, pd.DataFrame],`
			`dk: FreqaiDataKitchen,`
			`class_names: List[str],`
			`):`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`"""`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`encode class name, str -> int`
			`assuming first column of *_labels data frame to be the target column`
			`containing the class names`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`target_column_name = dk.label_list[0]`
			`for split in ["train", "test"]:`
			`label_df = data_dictionary[f"{split}_labels"]`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`self.assert_valid_class_names(label_df[target_column_name], class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`label_df[target_column_name] = list(`
			`map(lambda x: self.class_name_to_index[x], label_df[target_column_name])`
			`)`

create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`@staticmethod`
			`def assert_valid_class_names(`
			`target_column: pd.Series,`
			`class_names: List[str]`
			`):`
			`non_defined_labels = set(target_column) - set(class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`if len(non_defined_labels) != 0:`
			`raise OperationalException(`
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`f"Found non defined labels: {non_defined_labels}, ",`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`f"expecting labels: {class_names}"`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`)`

create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:`
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`"""`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`decode class name, int -> str`
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`return list(map(lambda x: self.index_to_class_name[x.item()], class_ints))`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00
			`def init_class_names_to_index_mapping(self, class_names):`
			`self.class_name_to_index = {s: i for i, s in enumerate(class_names)}`
			`self.index_to_class_name = {i: s for i, s in enumerate(class_names)}`
create children class to PyTorchClassifier to implement the fit method where we initialize the trainer and model objects 2023-03-19 12:38:49 +00:00			`logger.info(f"encoded class name to index: {self.class_name_to_index}")`

			`def convert_label_column_to_int(`
			`self,`
			`data_dictionary: Dict[str, pd.DataFrame],`
			`dk: FreqaiDataKitchen,`
			`class_names: List[str]`
			`):`
			`self.init_class_names_to_index_mapping(class_names)`
			`self.encode_class_names(data_dictionary, dk, class_names)`

			`def get_class_names(self) -> List[str]:`
			`if not hasattr(self, "class_names"):`
			`raise ValueError(`
			`"Missing attribute: self.class_names "`
			`"set self.freqai.class_names = [\"class a\", \"class b\", \"class c\"] "`
			`"inside IStrategy.set_freqai_targets method."`
			`)`
			`return self.class_names`