Source code for qfeval_functions.functions.nancovar

import torch

from .nanmean import nanmean
from .nanmulmean import nanmulmean
from .nanones import nanones


[docs] def nancovar( x: torch.Tensor, y: torch.Tensor, dim: int = -1, keepdim: bool = False, ddof: int = 1, ) -> torch.Tensor: r"""Compute covariance between two tensors, ignoring NaN values. This function calculates the covariance between tensors :attr:`x` and :attr:`y` along the specified dimension, excluding any pairs where either value is NaN. Covariance measures how much two variables change together. Unlike ``numpy.cov``, this function computes element-wise covariance for each batch index rather than producing a covariance matrix. The function is memory-efficient when broadcasting tensors but may have reduced precision (approximately half-precision) when dealing with many NaN values due to the prioritization of memory efficiency over numerical precision. The NaN-aware covariance is computed as: .. math:: \text{Cov}(X, Y) = \text{E}[(X - \mu_X)(Y - \mu_Y)] where the expectation is computed only over valid (non-NaN) pairs, and :math:`\mu_X`, :math:`\mu_Y` are the means computed over valid values. Args: x (Tensor): The first input tensor. y (Tensor): The second input tensor. Must be broadcastable with :attr:`x`. dim (int, optional): The dimension along which to compute the covariance. Default is -1 (the last dimension). keepdim (bool, optional): Whether the output tensor has :attr:`dim` retained or not. Default is False. ddof (int, optional): Delta degrees of freedom. The divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of valid (non-NaN) pairs. Default is 1. Returns: Tensor: The covariance values computed only over valid (non-NaN) pairs. The shape depends on the input dimensions and the :attr:`keepdim` parameter. Example: >>> # Simple covariance with NaN values >>> x = torch.tensor([1.0, 2.0, nan, 4.0, 5.0]) >>> y = torch.tensor([2.0, 4.0, 6.0, nan, 10.0]) >>> QF.nancovar(x, y, dim=0) tensor(8.6250) >>> # 2D tensors with NaN values >>> x = torch.tensor([[1.0, nan, 3.0, 4.0], ... [5.0, 6.0, nan, 8.0]]) >>> y = torch.tensor([[2.0, 4.0, 6.0, nan], ... [10.0, nan, 14.0, 16.0]]) >>> QF.nancovar(x, y, dim=1) tensor([4.0000, 9.1111]) >>> # Population covariance (ddof=0) >>> x = torch.tensor([1.0, nan, 3.0, 4.0]) >>> y = torch.tensor([2.0, 4.0, nan, 8.0]) >>> QF.nancovar(x, y, dim=0, ddof=0) tensor(4.5000) >>> # With keepdim >>> x = torch.tensor([[1.0, 2.0, nan], ... [4.0, nan, 6.0]]) >>> y = torch.tensor([[2.0, nan, 6.0], ... [8.0, 10.0, nan]]) >>> QF.nancovar(x, y, dim=1, keepdim=True) tensor([[nan], [nan]]) .. warning:: The calculation may have reduced precision (approximately half-precision) when dealing with many NaN values due to memory efficiency optimizations. For higher precision with many NaNs, consider using CUDA kernels via PyTorch JIT compilation. .. seealso:: - :func:`covar`: Covariance without NaN handling. - :func:`nancorrel`: NaN-aware correlation function. - :func:`nanmean`: NaN-aware mean function. """ # Improve the precision by subtracting their averages first. x = x - nanmean(x, dim=dim, keepdim=True) y = y - nanmean(y, dim=dim, keepdim=True) mx = nanmulmean(x, nanones(y), dim=dim, keepdim=keepdim, _ddof=ddof) my = nanmulmean(nanones(x), y, dim=dim, keepdim=keepdim, _ddof=ddof) mxy = nanmulmean(x, y, dim=dim, keepdim=keepdim, _ddof=ddof) # NOTE: E((X - E[X])(Y - E[Y])) = E(XY) - E(X)E(Y) return (mxy - mx * my).to(x)