Source code for RADAR.time_series.algorithms.modelsTransformersTS.vanillaTransformer.embed

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
    

[docs]
class PositionalEmbedding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEmbedding, self).__init__()
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)


[docs]
    def forward(self, x):
        return self.pe[:, :x.size(1)]


    

[docs]
class PositionalEncoding(nn.Module):
    """
    Positional Encoding class for Transformers.

    This module implements positional encoding for a Transformer model. Positional encoding helps
    maintain the temporal information of the data. The chosen implementation follows the vanilla
    method as originally defined in the "Attention Is All You Need" paper.

    Parameters
    ----------
    d_model : int
        The dimensionality of the model, representing the number of features or channels in
        the input and output embeddings.
    dropout : float, optional
        The dropout probability applied to the positional encodings, by default 0.1.
    max_len : int, optional
        The maximum sequence length for which positional encodings are calculated, by default 5000.

    Attributes
    ----------
    pe : torch.Tensor
        The positional encodings matrix.

    Methods
    -------
    forward(x, pos=0)
        Forward pass through the Positional Encoding layer.

    Examples
    --------
    Create a PositionalEncoding instance:

    >>> positional_encoder = PositionalEncoding(d_model=512, dropout=0.1, max_len=1000)
    >>> input_data = torch.rand(32, 1000, 512)
    >>> output_data = positional_encoder(input_data)

    Notes
    -----
    PositionalEncoding is a crucial component in Transformer models to capture the order of
    tokens in input sequences.

    References
    ----------
    Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., ... & Polosukhin, I. (2017).
    Attention is all you need. In Advances in neural information processing systems (pp. 30-31).

    """
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model).float() * (-math.log(10000.0) / d_model))
        pe += torch.sin(position * div_term)
        pe += torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        # Not registered as parameter (it is not) but saved along them
        self.register_buffer('pe', pe)


[docs]
    def forward(self, x, pos=0):
        """
        Apply positional encoding to the input data.

        Parameters
        ----------
        x : torch.Tensor
            The input tensor to which positional encoding is applied.
        pos : int, optional
            The starting position for adding positional encodings, by default 0.

        Returns
        -------
        torch.Tensor
            The input tensor with positional encoding added and dropout applied.

        """
        
        x = x + self.pe[pos:pos+x.size(0), :]
        return self.dropout(x)