Source code for qfeval_functions.functions.nancovar

import torch

from .nanmean import nanmean
from .nanmulmean import nanmulmean
from .nanones import nanones



[docs]
def nancovar(
    x: torch.Tensor,
    y: torch.Tensor,
    dim: int = -1,
    keepdim: bool = False,
    ddof: int = 1,
) -> torch.Tensor:
    r"""Compute covariance between two tensors, ignoring NaN values.

    This function calculates the covariance between tensors :attr:`x` and
    :attr:`y` along the specified dimension, excluding any pairs where either
    value is NaN. Covariance measures how much two variables change together.
    Unlike ``numpy.cov``, this function computes element-wise covariance for
    each batch index rather than producing a covariance matrix.

    The function is memory-efficient when broadcasting tensors but may have
    reduced precision (approximately half-precision) when dealing with many
    NaN values due to the prioritization of memory efficiency over numerical
    precision.

    The NaN-aware covariance is computed as:

    .. math::
        \text{Cov}(X, Y) = \text{E}[(X - \mu_X)(Y - \mu_Y)]

    where the expectation is computed only over valid (non-NaN) pairs, and
    :math:`\mu_X`, :math:`\mu_Y` are the means computed over valid values.

    Args:
        x (Tensor):
            The first input tensor.
        y (Tensor):
            The second input tensor. Must be broadcastable with :attr:`x`.
        dim (int, optional):
            The dimension along which to compute the covariance.
            Default is -1 (the last dimension).
        keepdim (bool, optional):
            Whether the output tensor has :attr:`dim`
            retained or not. Default is False.
        ddof (int, optional):
            Delta degrees of freedom. The divisor used in
            the calculation is ``N - ddof``, where ``N`` represents the number
            of valid (non-NaN) pairs. Default is 1.

    Returns:
        Tensor:
            The covariance values computed only over valid (non-NaN) pairs.
            The shape depends on the input dimensions and the :attr:`keepdim`
            parameter.

    Example:

        >>> # Simple covariance with NaN values
        >>> x = torch.tensor([1.0, 2.0, nan, 4.0, 5.0])
        >>> y = torch.tensor([2.0, 4.0, 6.0, nan, 10.0])
        >>> QF.nancovar(x, y, dim=0)
        tensor(8.6250)

        >>> # 2D tensors with NaN values
        >>> x = torch.tensor([[1.0, nan, 3.0, 4.0],
        ...                   [5.0, 6.0, nan, 8.0]])
        >>> y = torch.tensor([[2.0, 4.0, 6.0, nan],
        ...                   [10.0, nan, 14.0, 16.0]])
        >>> QF.nancovar(x, y, dim=1)
        tensor([4.0000, 9.1111])

        >>> # Population covariance (ddof=0)
        >>> x = torch.tensor([1.0, nan, 3.0, 4.0])
        >>> y = torch.tensor([2.0, 4.0, nan, 8.0])
        >>> QF.nancovar(x, y, dim=0, ddof=0)
        tensor(4.5000)

        >>> # With keepdim
        >>> x = torch.tensor([[1.0, 2.0, nan],
        ...                   [4.0, nan, 6.0]])
        >>> y = torch.tensor([[2.0, nan, 6.0],
        ...                   [8.0, 10.0, nan]])
        >>> QF.nancovar(x, y, dim=1, keepdim=True)
        tensor([[nan],
                [nan]])

    .. warning::
        The calculation may have reduced precision (approximately half-precision)
        when dealing with many NaN values due to memory efficiency optimizations.
        For higher precision with many NaNs, consider using CUDA kernels via
        PyTorch JIT compilation.

    .. seealso::
        - :func:`covar`: Covariance without NaN handling.
        - :func:`nancorrel`: NaN-aware correlation function.
        - :func:`nanmean`: NaN-aware mean function.
    """
    # Improve the precision by subtracting their averages first.
    x = x - nanmean(x, dim=dim, keepdim=True)
    y = y - nanmean(y, dim=dim, keepdim=True)
    mx = nanmulmean(x, nanones(y), dim=dim, keepdim=keepdim, _ddof=ddof)
    my = nanmulmean(nanones(x), y, dim=dim, keepdim=keepdim, _ddof=ddof)
    mxy = nanmulmean(x, y, dim=dim, keepdim=keepdim, _ddof=ddof)
    # NOTE: E((X - E[X])(Y - E[Y])) = E(XY) - E(X)E(Y)
    return (mxy - mx * my).to(x)