add mlp documentation
This commit is contained in:
parent
ddd1b5c0ff
commit
ea08931ab3
@ -1,16 +1,42 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch import Tensor
|
import torch
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class PyTorchMLPModel(nn.Module):
|
class PyTorchMLPModel(nn.Module):
|
||||||
|
"""
|
||||||
|
A multi-layer perceptron (MLP) model implemented using PyTorch.
|
||||||
|
|
||||||
|
:param input_dim: The number of input features.
|
||||||
|
:param output_dim: The number of output classes.
|
||||||
|
:param hidden_dim: The number of hidden units in each layer. Default: 256
|
||||||
|
:param dropout_percent: The dropout rate for regularization. Default: 0.2
|
||||||
|
:param n_layer: The number of layers in the MLP. Default: 1
|
||||||
|
|
||||||
|
:returns: The output of the MLP, with shape (batch_size, output_dim)
|
||||||
|
|
||||||
|
|
||||||
|
A neural network typically consists of input, output, and hidden layers, where the
|
||||||
|
information flows from the input layer through the hidden layers to the output layer.
|
||||||
|
In a feedforward neural network, also known as a multilayer perceptron (MLP), the
|
||||||
|
information flows in one direction only. Each hidden layer contains multiple units
|
||||||
|
or nodes that take input from the previous layer and produce output that goes to the
|
||||||
|
next layer.
|
||||||
|
|
||||||
|
The hidden_dim parameter in the FeedForward class refers to the number of units
|
||||||
|
(or nodes) in the hidden layer. This parameter controls the complexity of the neural
|
||||||
|
network and determines how many nonlinear relationships the network can represent.
|
||||||
|
A higher value of hidden_dim allows the network to represent more complex functions
|
||||||
|
but may also make the network more prone to overfitting, where the model memorizes
|
||||||
|
the training data instead of learning general patterns.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim: int, output_dim: int, **kwargs):
|
def __init__(self, input_dim: int, output_dim: int, **kwargs):
|
||||||
super(PyTorchMLPModel, self).__init__()
|
super(PyTorchMLPModel, self).__init__()
|
||||||
hidden_dim: int = kwargs.get("hidden_dim", 1024)
|
hidden_dim: int = kwargs.get("hidden_dim", 256)
|
||||||
dropout_percent: int = kwargs.get("dropout_percent", 0.2)
|
dropout_percent: int = kwargs.get("dropout_percent", 0.2)
|
||||||
n_layer: int = kwargs.get("n_layer", 1)
|
n_layer: int = kwargs.get("n_layer", 1)
|
||||||
self.input_layer = nn.Linear(input_dim, hidden_dim)
|
self.input_layer = nn.Linear(input_dim, hidden_dim)
|
||||||
@ -19,7 +45,7 @@ class PyTorchMLPModel(nn.Module):
|
|||||||
self.relu = nn.ReLU()
|
self.relu = nn.ReLU()
|
||||||
self.dropout = nn.Dropout(p=dropout_percent)
|
self.dropout = nn.Dropout(p=dropout_percent)
|
||||||
|
|
||||||
def forward(self, x: Tensor) -> Tensor:
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
x = self.relu(self.input_layer(x))
|
x = self.relu(self.input_layer(x))
|
||||||
x = self.dropout(x)
|
x = self.dropout(x)
|
||||||
x = self.blocks(x)
|
x = self.blocks(x)
|
||||||
@ -28,19 +54,35 @@ class PyTorchMLPModel(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class Block(nn.Module):
|
class Block(nn.Module):
|
||||||
|
"""
|
||||||
|
A building block for a multi-layer perceptron (MLP) implemented using PyTorch.
|
||||||
|
|
||||||
|
:param hidden_dim: The number of hidden units in the feedforward network.
|
||||||
|
:param dropout_percent: The dropout rate for regularization.
|
||||||
|
|
||||||
|
:returns: torch.Tensor. with shape (batch_size, hidden_dim)
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, hidden_dim: int, dropout_percent: int):
|
def __init__(self, hidden_dim: int, dropout_percent: int):
|
||||||
super(Block, self).__init__()
|
super(Block, self).__init__()
|
||||||
self.ff = FeedForward(hidden_dim)
|
self.ff = FeedForward(hidden_dim)
|
||||||
self.dropout = nn.Dropout(p=dropout_percent)
|
self.dropout = nn.Dropout(p=dropout_percent)
|
||||||
self.ln = nn.LayerNorm(hidden_dim)
|
self.ln = nn.LayerNorm(hidden_dim)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
x = self.ff(self.ln(x))
|
x = self.ff(self.ln(x))
|
||||||
x = self.dropout(x)
|
x = self.dropout(x)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
class FeedForward(nn.Module):
|
class FeedForward(nn.Module):
|
||||||
|
"""
|
||||||
|
A fully-connected feedforward neural network block.
|
||||||
|
|
||||||
|
:param hidden_dim: The number of hidden units in the block.
|
||||||
|
:return: torch.Tensor. with shape (batch_size, hidden_dim)
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, hidden_dim: int):
|
def __init__(self, hidden_dim: int):
|
||||||
super(FeedForward, self).__init__()
|
super(FeedForward, self).__init__()
|
||||||
self.net = nn.Sequential(
|
self.net = nn.Sequential(
|
||||||
@ -48,5 +90,5 @@ class FeedForward(nn.Module):
|
|||||||
nn.ReLU(),
|
nn.ReLU(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
return self.net(x)
|
return self.net(x)
|
||||||
|
Loading…
Reference in New Issue
Block a user