stable/freqtrade/freqai/prediction_models/PyTorchClassifierMultiTarget.py

import logging
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import numpy.typing as npt
import pandas as pd
import torch
from pandas import DataFrame
from torch.nn import functional as F

from freqtrade.exceptions import OperationalException
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.base_models.PyTorchModelTrainer import PyTorchModelTrainer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel


logger = logging.getLogger(__name__)


class PyTorchClassifierMultiTarget(BasePyTorchModel):
    """
    A PyTorch implementation of a multi-target classifier.
    """
    def __init__(self, **kwargs):
        """
        int: The number of nodes in the hidden layer of the neural network.
        int: The maximum number of iterations to run during training.
        int: The batch size to use during training.
        float: The learning rate to use during training.
        int: The number of training iterations between each evaluation.
        dict: A dictionary mapping class names to their corresponding indices.
        dict: A dictionary mapping indices to their corresponding class names.
        """

        super().__init__(**kwargs)
        model_training_params = self.freqai_info.get("model_training_parameters", {})
        self.n_hidden: int = model_training_params.get("n_hidden", 1024)
        self.max_iters: int = model_training_params.get("max_iters", 100)
        self.batch_size: int = model_training_params.get("batch_size", 64)
        self.learning_rate: float = model_training_params.get("learning_rate", 3e-4)
        self.max_n_eval_batches: Optional[int] = model_training_params.get(
            "max_n_eval_batches", None
        )
        self.model_kwargs: Dict = model_training_params.get("model_kwargs", {})
        self.class_name_to_index = None
        self.index_to_class_name = None

    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary constructed by DataHandler to hold
                                all the training and test data/labels.
        :raises ValueError: If self.class_names is not defined in the parent class.

        """

        if not hasattr(self, "class_names"):
            raise ValueError(
                "Missing attribute: self.class_names "
                "set self.freqai.class_names = [\"class a\", \"class b\", \"class c\"] "
                "inside IStrategy.set_freqai_targets method."
            )

        self.init_class_names_to_index_mapping(self.class_names)
        self.encode_classes_name(data_dictionary, dk)
        n_features = data_dictionary["train_features"].shape[-1]
        model = PyTorchMLPModel(
            input_dim=n_features,
            output_dim=len(self.class_names),
            **self.model_kwargs
        )
        model.to(self.device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
        criterion = torch.nn.CrossEntropyLoss()
        init_model = self.get_init_model(dk.pair)
        trainer = PyTorchModelTrainer(
            model=model,
            optimizer=optimizer,
            criterion=criterion,
            model_meta_data={"class_names": self.class_names},
            device=self.device,
            batch_size=self.batch_size,
            max_iters=self.max_iters,
            max_n_eval_batches=self.max_n_eval_batches,
            init_model=init_model
        )
        trainer.fit(data_dictionary)
        return trainer

    def predict(
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        :raises ValueError: if 'class_name' doesn't exist in model meta_data.
        """

        class_names = self.model.model_meta_data.get("class_names", None)
        if not class_names:
            raise ValueError(
                "Missing class names. "
                "self.model.model_meta_data[\"class_names\"] is None."
            )
        self.init_class_names_to_index_mapping(class_names)

        dk.find_features(unfiltered_df)
        filtered_df, _ = dk.filter_features(
            unfiltered_df, dk.training_features_list, training_filter=False
        )
        filtered_df = dk.normalize_data_from_metadata(filtered_df)
        dk.data_dictionary["prediction_features"] = filtered_df

        self.data_cleaning_predict(dk)
        dk.data_dictionary["prediction_features"] = torch.tensor(
            dk.data_dictionary["prediction_features"].values
        ).float().to(self.device)

        logits = self.model.model(dk.data_dictionary["prediction_features"])
        probs = F.softmax(logits, dim=-1)
        predicted_classes = torch.argmax(probs, dim=-1)
        predicted_classes_str = self.decode_classes_name(predicted_classes)
        pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)
        pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
        pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
        return (pred_df, dk.do_predict)

    def encode_classes_name(self, data_dictionary: Dict[str, pd.DataFrame], dk: FreqaiDataKitchen):
        """
        encode class name str -> int
        assuming first column of *_labels data frame to contain class names
        """

        target_column_name = dk.label_list[0]
        for split in ["train", "test"]:
            label_df = data_dictionary[f"{split}_labels"]
            self.assert_valid_class_names(label_df[target_column_name])
            label_df[target_column_name] = list(
                map(lambda x: self.class_name_to_index[x], label_df[target_column_name])
            )

    def assert_valid_class_names(self, labels: pd.Series):
        non_defined_labels = set(labels) - set(self.class_names)
        if len(non_defined_labels) != 0:
            raise OperationalException(
                f"Found non defined labels: {non_defined_labels}, ",
                f"expecting labels: {self.class_names}"
            )

    def decode_classes_name(self, classes: torch.Tensor) -> List[str]:
        """
        decode class name int -> str
        """

        return list(map(lambda x: self.index_to_class_name[x.item()], classes))

    def init_class_names_to_index_mapping(self, class_names):
        self.class_name_to_index = {s: i for i, s in enumerate(class_names)}
        self.index_to_class_name = {i: s for i, s in enumerate(class_names)}
        logger.info(f"class_name_to_index: {self.class_name_to_index}")
initial commit 2023-03-05 14:59:24 +00:00			`import logging`
sort imports 2023-03-12 14:16:04 +00:00			`from typing import Any, Dict, List, Optional, Tuple`
initial commit 2023-03-05 14:59:24 +00:00
			`import numpy as np`
sort imports 2023-03-08 14:03:36 +00:00			`import numpy.typing as npt`
initial commit 2023-03-05 14:59:24 +00:00			`import pandas as pd`
			`import torch`
			`from pandas import DataFrame`
			`from torch.nn import functional as F`

add missing import 2023-03-08 14:11:51 +00:00			`from freqtrade.exceptions import OperationalException`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel`
			`from freqtrade.freqai.base_models.PyTorchModelTrainer import PyTorchModelTrainer`
sort imports 2023-03-08 14:03:36 +00:00			`from freqtrade.freqai.data_kitchen import FreqaiDataKitchen`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel`

initial commit 2023-03-05 14:59:24 +00:00
			`logger = logging.getLogger(__name__)`


use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`class PyTorchClassifierMultiTarget(BasePyTorchModel):`
add documentation 2023-03-09 09:14:54 +00:00			`"""`
			`A PyTorch implementation of a multi-target classifier.`
			`"""`
initial commit 2023-03-05 14:59:24 +00:00			`def __init__(self, **kwargs):`
add documentation 2023-03-09 09:14:54 +00:00			`"""`
			`int: The number of nodes in the hidden layer of the neural network.`
			`int: The maximum number of iterations to run during training.`
			`int: The batch size to use during training.`
			`float: The learning rate to use during training.`
			`int: The number of training iterations between each evaluation.`
			`dict: A dictionary mapping class names to their corresponding indices.`
			`dict: A dictionary mapping indices to their corresponding class names.`
			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
initial commit 2023-03-05 14:59:24 +00:00			`super().__init__(**kwargs)`
revert to using model_training_parameters 2023-03-12 22:35:51 +00:00			`model_training_params = self.freqai_info.get("model_training_parameters", {})`
			`self.n_hidden: int = model_training_params.get("n_hidden", 1024)`
			`self.max_iters: int = model_training_params.get("max_iters", 100)`
			`self.batch_size: int = model_training_params.get("batch_size", 64)`
			`self.learning_rate: float = model_training_params.get("learning_rate", 3e-4)`
			`self.max_n_eval_batches: Optional[int] = model_training_params.get(`
			`"max_n_eval_batches", None`
			`)`
			`self.model_kwargs: Dict = model_training_params.get("model_kwargs", {})`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`self.class_name_to_index = None`
			`self.index_to_class_name = None`
initial commit 2023-03-05 14:59:24 +00:00
			`def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:`
			`"""`
			`User sets up the training and test data to fit their desired model here`
add documentation 2023-03-09 09:14:54 +00:00			`:param data_dictionary: the dictionary constructed by DataHandler to hold`
initial commit 2023-03-05 14:59:24 +00:00			`all the training and test data/labels.`
add documentation 2023-03-09 09:14:54 +00:00			`:raises ValueError: If self.class_names is not defined in the parent class.`

initial commit 2023-03-05 14:59:24 +00:00			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`if not hasattr(self, "class_names"):`
			`raise ValueError(`
			`"Missing attribute: self.class_names "`
			`"set self.freqai.class_names = [\"class a\", \"class b\", \"class c\"] "`
			`"inside IStrategy.set_freqai_targets method."`
			`)`

			`self.init_class_names_to_index_mapping(self.class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`self.encode_classes_name(data_dictionary, dk)`
convert single quotes to double quotes 2023-03-09 11:29:11 +00:00			`n_features = data_dictionary["train_features"].shape[-1]`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`model = PyTorchMLPModel(`
initial commit 2023-03-05 14:59:24 +00:00			`input_dim=n_features,`
generalize mlp model 2023-03-12 12:31:08 +00:00			`output_dim=len(self.class_names),`
			`**self.model_kwargs`
initial commit 2023-03-05 14:59:24 +00:00			`)`
			`model.to(self.device)`
			`optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`criterion = torch.nn.CrossEntropyLoss()`
initial commit 2023-03-05 14:59:24 +00:00			`init_model = self.get_init_model(dk.pair)`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`trainer = PyTorchModelTrainer(`
			`model=model,`
			`optimizer=optimizer,`
			`criterion=criterion,`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`model_meta_data={"class_names": self.class_names},`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`device=self.device,`
			`batch_size=self.batch_size,`
			`max_iters=self.max_iters,`
use one iteration on all test and train data for evaluation 2023-03-12 10:48:15 +00:00			`max_n_eval_batches=self.max_n_eval_batches,`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`init_model=init_model`
			`)`
			`trainer.fit(data_dictionary)`
initial commit 2023-03-05 14:59:24 +00:00			`return trainer`

			`def predict(`
			`self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs`
			`) -> Tuple[DataFrame, npt.NDArray[np.int_]]:`
			`"""`
			`Filter the prediction features data and predict with it.`
			`:param unfiltered_df: Full dataframe for the current backtest period.`
			`:return:`
			`:pred_df: dataframe containing the predictions`
			`:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove`
			`data (NaNs) or felt uncertain about data (PCA and DI index)`
improve documentation 2023-03-09 12:55:52 +00:00			`:raises ValueError: if 'class_name' doesn't exist in model meta_data.`
initial commit 2023-03-05 14:59:24 +00:00			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`class_names = self.model.model_meta_data.get("class_names", None)`
			`if not class_names:`
			`raise ValueError(`
			`"Missing class names. "`
			`"self.model.model_meta_data[\"class_names\"] is None."`
			`)`
			`self.init_class_names_to_index_mapping(class_names)`
initial commit 2023-03-05 14:59:24 +00:00
			`dk.find_features(unfiltered_df)`
			`filtered_df, _ = dk.filter_features(`
			`unfiltered_df, dk.training_features_list, training_filter=False`
			`)`
			`filtered_df = dk.normalize_data_from_metadata(filtered_df)`
			`dk.data_dictionary["prediction_features"] = filtered_df`

			`self.data_cleaning_predict(dk)`
			`dk.data_dictionary["prediction_features"] = torch.tensor(`
			`dk.data_dictionary["prediction_features"].values`
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`).float().to(self.device)`
initial commit 2023-03-05 14:59:24 +00:00
use data loader, add evaluation on epoch 2023-03-06 14:16:45 +00:00			`logits = self.model.model(dk.data_dictionary["prediction_features"])`
initial commit 2023-03-05 14:59:24 +00:00			`probs = F.softmax(logits, dim=-1)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`predicted_classes = torch.argmax(probs, dim=-1)`
			`predicted_classes_str = self.decode_classes_name(predicted_classes)`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])`
initial commit 2023-03-05 14:59:24 +00:00			`pred_df = pd.concat([pred_df, pred_df_prob], axis=1)`
			`return (pred_df, dk.do_predict)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00
			`def encode_classes_name(self, data_dictionary: Dict[str, pd.DataFrame], dk: FreqaiDataKitchen):`
			`"""`
			`encode class name str -> int`
			`assuming first column of *_labels data frame to contain class names`
			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`target_column_name = dk.label_list[0]`
			`for split in ["train", "test"]:`
			`label_df = data_dictionary[f"{split}_labels"]`
			`self.assert_valid_class_names(label_df[target_column_name])`
			`label_df[target_column_name] = list(`
			`map(lambda x: self.class_name_to_index[x], label_df[target_column_name])`
			`)`

			`def assert_valid_class_names(self, labels: pd.Series):`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`non_defined_labels = set(labels) - set(self.class_names)`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`if len(non_defined_labels) != 0:`
			`raise OperationalException(`
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`f"Found non defined labels: {non_defined_labels}, ",`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00			`f"expecting labels: {self.class_names}"`
ad multiclass target names encoder to ints 2023-03-08 12:29:38 +00:00			`)`

fix tensor type hint 2023-03-08 14:16:49 +00:00			`def decode_classes_name(self, classes: torch.Tensor) -> List[str]:`
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`"""`
			`decode class name int -> str`
			`"""`
improve documentation 2023-03-09 12:55:52 +00:00
change documentation and small bugfix 2023-03-08 13:38:22 +00:00			`return list(map(lambda x: self.index_to_class_name[x.item()], classes))`
set class names in IStrategy.set_freqai_targets method, also save class name with model meta data 2023-03-08 16:36:44 +00:00
			`def init_class_names_to_index_mapping(self, class_names):`
			`self.class_name_to_index = {s: i for i, s in enumerate(class_names)}`
			`self.index_to_class_name = {i: s for i, s in enumerate(class_names)}`
			`logger.info(f"class_name_to_index: {self.class_name_to_index}")`