Source code for qfeval_functions.functions.nancorrel

import math
import typing

import torch

from .nanmean import nanmean
from .nanmulmean import nanmulmean
from .nansum import nansum


[docs] def nancorrel( x: torch.Tensor, y: torch.Tensor, dim: typing.Union[int, typing.Tuple[int, ...]] = (), keepdim: bool = False, ) -> torch.Tensor: r"""Compute Pearson correlation coefficient between two tensors, ignoring NaN values. This function calculates the Pearson correlation coefficient between tensors :attr:`x` and :attr:`y` along the specified dimension(s), excluding any pairs where either value is NaN. The correlation coefficient measures the linear relationship between two variables and ranges from -1 (perfect negative correlation) to 1 (perfect positive correlation), with 0 indicating no linear correlation. Unlike :func:`correl`, this function handles missing data by ignoring NaN values in the computation. If either :attr:`x` or :attr:`y` has a NaN at a given position, that pair is excluded from the correlation calculation. The NaN-aware Pearson correlation is computed as: .. math:: r = \frac{\text{E}[(X - \mu_X)(Y - \mu_Y)]}{ \sqrt{\text{E}[(X - \mu_X)^2]}\sqrt{\text{E}[(Y - \mu_Y)^2]}} where the expectations are computed only over valid (non-NaN) pairs. Args: x (Tensor): The first input tensor. y (Tensor): The second input tensor. Must be the same shape as :attr:`x`. dim (int or tuple of ints, optional): The dimension(s) along which to compute the correlation. If not specified (default is empty tuple), computes element-wise correlation and sums the result. keepdim (bool, optional): Whether the output tensor has :attr:`dim` retained or not. Default is False. Returns: Tensor: The Pearson correlation coefficient(s), computed only over valid (non-NaN) pairs. The shape depends on the input dimensions and the :attr:`keepdim` parameter. Example: >>> # Perfect positive correlation with some NaNs >>> x = torch.tensor([1.0, 2.0, nan, 4.0, 5.0]) >>> y = torch.tensor([2.0, 4.0, 6.0, 8.0, nan]) >>> QF.nancorrel(x, y, dim=0) tensor(1.) >>> # 2D tensors with NaN values >>> x = torch.tensor([[1.0, nan, 3.0], ... [4.0, 5.0, nan]]) >>> y = torch.tensor([[2.0, 4.0, 5.0], ... [nan, 10.0, 12.0]]) >>> QF.nancorrel(x, y, dim=1) tensor([1., nan]) >>> # Comparison with regular correl (which would give NaN) >>> x_with_nan = torch.tensor([1.0, 2.0, nan, 4.0]) >>> y_with_nan = torch.tensor([2.0, 4.0, 6.0, 8.0]) >>> QF.nancorrel(x_with_nan, y_with_nan, dim=0) # Ignores NaN tensor(1.) >>> # With keepdim >>> x = torch.tensor([[1.0, nan, 3.0], ... [4.0, 5.0, 6.0]]) >>> y = torch.tensor([[2.0, 4.0, 6.0], ... [8.0, 10.0, 12.0]]) >>> QF.nancorrel(x, y, dim=1, keepdim=True) tensor([[1.], [1.]]) .. seealso:: - :func:`correl`: Pearson correlation without NaN handling. - :func:`nancovar`: NaN-aware covariance function. - :func:`nanmean`: NaN-aware mean function. """ isnan = x.isnan() | y.isnan() x = torch.where(isnan, torch.as_tensor(math.nan).to(x), x) y = torch.where(isnan, torch.as_tensor(math.nan).to(y), y) ax = x - nanmean(x, dim=dim, keepdim=True) ay = y - nanmean(y, dim=dim, keepdim=True) axy = nanmulmean(ax, ay, dim=dim, keepdim=True) ax2 = nanmean(ax**2, dim=dim, keepdim=True) ay2 = nanmean(ay**2, dim=dim, keepdim=True) result = axy / ax2.sqrt() / ay2.sqrt() return nansum(result, dim=dim, keepdim=keepdim)