organize pytorch files

This commit is contained in:
robcaulk
2023-03-21 15:09:54 +01:00
parent 83a7d888bc
commit 1ba01746a0
7 changed files with 10 additions and 10 deletions

View File

@@ -1,148 +0,0 @@
import logging
from typing import Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
import torch
from pandas import DataFrame
from torch.nn import functional as F
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class PyTorchClassifier(BasePyTorchModel):
"""
A PyTorch implementation of a classifier.
User must implement fit method
Important!
- User must declare the target class names in the strategy,
under IStrategy.set_freqai_targets method.
for example, in your strategy:
```
def set_freqai_targets(self, dataframe: DataFrame, metadata: Dict, **kwargs):
self.freqai.class_names = ["down", "up"]
dataframe['&s-up_or_down'] = np.where(dataframe["close"].shift(-100) >
dataframe["close"], 'up', 'down')
return dataframe
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.class_name_to_index = None
self.index_to_class_name = None
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
:raises ValueError: if 'class_names' doesn't exist in model meta_data.
"""
class_names = self.model.model_meta_data.get("class_names", None)
if not class_names:
raise ValueError(
"Missing class names. "
"self.model.model_meta_data['class_names'] is None."
)
if not self.class_name_to_index:
self.init_class_names_to_index_mapping(class_names)
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = torch.from_numpy(dk.data_dictionary["prediction_features"].values)\
.float()\
.to(self.device)
logits = self.model.model(x)
probs = F.softmax(logits, dim=-1)
predicted_classes = torch.argmax(probs, dim=-1)
predicted_classes_str = self.decode_class_names(predicted_classes)
pred_df_prob = DataFrame(probs.detach().numpy(), columns=class_names)
pred_df = DataFrame(predicted_classes_str, columns=[dk.label_list[0]])
pred_df = pd.concat([pred_df, pred_df_prob], axis=1)
return (pred_df, dk.do_predict)
def encode_class_names(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str],
):
"""
encode class name, str -> int
assuming first column of *_labels data frame to be the target column
containing the class names
"""
target_column_name = dk.label_list[0]
for split in ["train", "test"]:
label_df = data_dictionary[f"{split}_labels"]
self.assert_valid_class_names(label_df[target_column_name], class_names)
label_df[target_column_name] = list(
map(lambda x: self.class_name_to_index[x], label_df[target_column_name])
)
@staticmethod
def assert_valid_class_names(
target_column: pd.Series,
class_names: List[str]
):
non_defined_labels = set(target_column) - set(class_names)
if len(non_defined_labels) != 0:
raise OperationalException(
f"Found non defined labels: {non_defined_labels}, ",
f"expecting labels: {class_names}"
)
def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
"""
decode class name, int -> str
"""
return list(map(lambda x: self.index_to_class_name[x.item()], class_ints))
def init_class_names_to_index_mapping(self, class_names):
self.class_name_to_index = {s: i for i, s in enumerate(class_names)}
self.index_to_class_name = {i: s for i, s in enumerate(class_names)}
logger.info(f"encoded class name to index: {self.class_name_to_index}")
def convert_label_column_to_int(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str]
):
self.init_class_names_to_index_mapping(class_names)
self.encode_class_names(data_dictionary, dk, class_names)
def get_class_names(self) -> List[str]:
if not self.class_names:
raise ValueError(
"self.class_names is empty, "
"set self.freqai.class_names = ['class a', 'class b', 'class c'] "
"inside IStrategy.set_freqai_targets method."
)
return self.class_names

View File

@@ -2,13 +2,13 @@ from typing import Any, Dict
import torch
from freqtrade.freqai.base_models.PyTorchModelTrainer import PyTorchModelTrainer
from freqtrade.freqai.base_models.BaseTorchClassifier import BaseTorchClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.prediction_models.PyTorchClassifier import PyTorchClassifier
from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
class PyTorchMLPClassifier(PyTorchClassifier):
class PyTorchMLPClassifier(BaseTorchClassifier):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.

View File

@@ -1,95 +0,0 @@
import logging
import torch
import torch.nn as nn
logger = logging.getLogger(__name__)
class PyTorchMLPModel(nn.Module):
"""
A multi-layer perceptron (MLP) model implemented using PyTorch.
This class mainly serves as a simple example for the integration of PyTorch model's
to freqai. It is not optimized at all and should not be used for production purposes.
:param input_dim: The number of input features. This parameter specifies the number
of features in the input data that the MLP will use to make predictions.
:param output_dim: The number of output classes. This parameter specifies the number
of classes that the MLP will predict.
:param hidden_dim: The number of hidden units in each layer. This parameter controls
the complexity of the MLP and determines how many nonlinear relationships the MLP
can represent. Increasing the number of hidden units can increase the capacity of
the MLP to model complex patterns, but it also increases the risk of overfitting
the training data. Default: 256
:param dropout_percent: The dropout rate for regularization. This parameter specifies
the probability of dropping out a neuron during training to prevent overfitting.
The dropout rate should be tuned carefully to balance between underfitting and
overfitting. Default: 0.2
:param n_layer: The number of layers in the MLP. This parameter specifies the number
of layers in the MLP architecture. Adding more layers to the MLP can increase its
capacity to model complex patterns, but it also increases the risk of overfitting
the training data. Default: 1
:returns: The output of the MLP, with shape (batch_size, output_dim)
"""
def __init__(self, input_dim: int, output_dim: int, **kwargs):
super().__init__()
hidden_dim: int = kwargs.get("hidden_dim", 256)
dropout_percent: int = kwargs.get("dropout_percent", 0.2)
n_layer: int = kwargs.get("n_layer", 1)
self.input_layer = nn.Linear(input_dim, hidden_dim)
self.blocks = nn.Sequential(*[Block(hidden_dim, dropout_percent) for _ in range(n_layer)])
self.output_layer = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=dropout_percent)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.relu(self.input_layer(x))
x = self.dropout(x)
x = self.blocks(x)
x = self.output_layer(x)
return x
class Block(nn.Module):
"""
A building block for a multi-layer perceptron (MLP).
:param hidden_dim: The number of hidden units in the feedforward network.
:param dropout_percent: The dropout rate for regularization.
:returns: torch.Tensor. with shape (batch_size, hidden_dim)
"""
def __init__(self, hidden_dim: int, dropout_percent: int):
super().__init__()
self.ff = FeedForward(hidden_dim)
self.dropout = nn.Dropout(p=dropout_percent)
self.ln = nn.LayerNorm(hidden_dim)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.ff(self.ln(x))
x = self.dropout(x)
return x
class FeedForward(nn.Module):
"""
A simple fully-connected feedforward neural network block.
:param hidden_dim: The number of hidden units in the block.
:return: torch.Tensor. with shape (batch_size, hidden_dim)
"""
def __init__(self, hidden_dim: int):
super().__init__()
self.net = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.net(x)

View File

@@ -2,13 +2,13 @@ from typing import Any, Dict
import torch
from freqtrade.freqai.base_models.PyTorchModelTrainer import PyTorchModelTrainer
from freqtrade.freqai.base_models.BaseTorchRegressor import BaseTorchRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.prediction_models.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.prediction_models.PyTorchRegressor import PyTorchRegressor
from freqtrade.freqai.torch.PyTorchMLPModel import PyTorchMLPModel
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchModelTrainer
class PyTorchMLPRegressor(PyTorchRegressor):
class PyTorchMLPRegressor(BaseTorchRegressor):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.

View File

@@ -1,50 +0,0 @@
import logging
from typing import Tuple
import numpy as np
import numpy.typing as npt
import torch
from pandas import DataFrame
from freqtrade.freqai.base_models.BasePyTorchModel import BasePyTorchModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class PyTorchRegressor(BasePyTorchModel):
"""
A PyTorch implementation of a regressor.
User must implement fit method
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = torch.from_numpy(dk.data_dictionary["prediction_features"].values)\
.float()\
.to(self.device)
y = self.model.model(x)
pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]])
return (pred_df, dk.do_predict)