Source code for stats_misc.meta_analysis

"""
Meta-analysis estimators for weighted averages and heterogeneity assessment.

This module provides methods for calculating weighted averages of point
estimates using fixed-effect and random-effects models. It includes
implementations of standard estimators for between-study variance
(tau-squared), Q-tests for heterogeneity, and I-squared statistics.
Iterative and non-iterative estimators are supported, including
DerSimonian-Laird, Cochrane ANOVA, Paule-Mandel, and method of moments.

Classes
-------
HeterogeneityResults
    A results container for heterogeneity estimates including Q-statistic,
    I-squared, and tau-squared values.

Functions
---------
fixed_effect(estimates, standard_errors)
    Computes a fixed-effect meta-analytic estimate and standard error.

random_effects(estimates, standard_errors, between_estimate_variance)
    Computes a random-effects meta-analytic estimate and standard error.

heterogeneity(estimates, standard_errors, overall_estimate, alpha=0.05,
              tau2=None)
    Computes heterogeneity measures: Q-test, I-squared, and tau-squared.

estimate_tau(estimates, standard_errors, method='mm-it', **kwargs)
    Estimates the between-study variance (tau-squared) using various methods.

Notes
-----
These implementations are partially adapted from `statsmodels`, with
adjustments for flexibility and reproducibility.
"""

import warnings
import numpy as np
from typing import Any, Literal
from scipy.stats import (
    chi2,
    norm,
)
from stats_misc.constants import (
    Error_MSG,
    NamesMetaAnalysis as NamesMA,
    CLASS_NAME,
)
from stats_misc.errors import (
    is_type,
    string_join_delimiter,
    same_len,
    check_limits,
)
from stats_misc.utils.helpers import (
    Results,
)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
EXPECTED_TAU_METHODS = [NamesMA.TAU_METHOD_DL, NamesMA.TAU_METHOD_CA,
                        NamesMA.TAU_METHOD_MM_IT, NamesMA.TAU_METHOD_CA2,
                        NamesMA.TAU_METHOD_DL2, NamesMA.TAU_METHOD_PM_IT,
                        ]
MSG1 = Error_MSG.DIFF_LENGTHS
MSG2 = Error_MSG.INCORRECT_STRING_INPUT

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] class HeterogeneityResults(Results): """ A results container for heterogeneity estimates. Attributes ---------- q_statistic : `float` The Q-statistic. q_pvalue : `float` The Q-statistic p-value. i_squared : `float` The I-squared heterogeneity statistic. i_squared_ci : `tuple` [`float`] The I-squared confidence interval. i_squared_ci_coverage : `float` The confidence level of the I-squared interval. tau_squared : `float` The tau-squared heterogeneity statistic. """ SET_ARGS = [ CLASS_NAME, NamesMA.QSTAT, NamesMA.QPVAL, NamesMA.ISQR, NamesMA.ISQR_CI, NamesMA.ISQR_CI_COV, NamesMA.TSQR, ] def __init__(self, **kwargs) -> None: super().__init__(set_args=self.SET_ARGS, **kwargs)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Tau-square estimates def _tau_iter_pm(estimates:list[float], standard_errors:list[float], tau_start:float=0, atol:float=1e-5, maxiter:int=50, ) -> tuple[float, bool]: """ The Paule-Mandel (PM) iterative estimator of the between-study variance. This method estimates tau-squared by iteratively solving the estimating equation until convergence. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. tau_start : `float`, default 0 Starting value of the tau-squared. atol : `float`, default 1e-5 Convergence tolerance. Itteration will terminate when the change is smaller than `atol`. maxiter : `int`, default 50 Maximum number of iterations. Returns ------- float Estimated tau-squared. bool Whether the algorithm converged. Notes ----- See DerSimonian and Kacker 2007 [1]_. This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L628>`_. Hash: d5b11ca References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### check input is_type(estimates, list) is_type(standard_errors, list) is_type(tau_start, (int, float)) is_type(atol, (int, float)) is_type(maxiter, int) same_len(estimates, standard_errors, ['estimates', 'standard_errors']) tausquared = tau_start v = [s**2 for s in standard_errors] # #### iteration converged = False for _ in range(maxiter): w = np.array([1/(i + tausquared) for i in v]) m = w.dot(estimates) / w.sum(0) squared_res = [(e-m)**2 for e in estimates] q_w = w.dot(squared_res) # F(tausquard_[-i]) estimating equation ee = q_w - (len(estimates) - 1) # if negative set tau to zero if ee < 0: tausquared = 0 converged = True break # if close enough to 0 stop if np.allclose(ee, 0, atol=atol): converged = True break # update tausquared delta = ee / (w**2).dot(squared_res) tausquared += delta # return return tausquared, converged # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def _tau_mm(estimates:list[float], standard_errors:list[float], weights:list[float] ) -> float: """ Method of moments estimator for the tau-squared (i.e. the between study variance) Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. weights : `list` [`float`] The weights used to calculate the overall point estimates using a weighted average. Returns ------- float Estimated tau-squared. Notes ----- This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713C1-L754C1>`_. Hash: d5b11ca The code is based on [1]_ and implements equation 6, where depending on the weights on can use the following tau estimators: Cochrane ANOVA (CA), DerSimonian and Laird (DL), CA2 (the two-stage CA estimator), and the DL2 (the two stage DL estimator). | Estimator | Weights | Notes | | --------- | -------------------------- | -------------------- | | CA | 1/k | for k studies | | DL | 1/(s_i)**2 | for standard error s | | CA2 | 1/(_tau_mm(CA) + (s_i)**2) | - | | DL2 | 1/(_tau_mm(DL) + (s_i)**2) | - | | --------- | -------------------------- | -------------------- | References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### test input is_type(estimates, list) is_type(standard_errors, list) is_type(weights, list) same_len(estimates, standard_errors, ['estimates', 'standard_errors']) same_len(weights, standard_errors, ['weights', 'standard_errors']) # ### calculations v = [s**2 for s in standard_errors] w = np.array(weights) # weighted estimate m = w.dot(estimates)/w.sum(0) # squared residuals resid_sq = [(e - m)**2 for e in estimates] q_w = w.dot(resid_sq) w_t = w.sum() expect = w.dot(v) - (w**2).dot(v)/w_t denom = w_t - (w**2).sum()/w_t # moment estimate from estimating equation tau2 = (q_w - expect)/denom # return tau return tau2 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def _tau_iter_mm(estimates:list[float], standard_errors:list[float], tau_start:float=0, atol:float=1e-5, maxiter:int=50, ) -> tuple[float, bool]: """ General method-of-moments iterative estimator of tau-squared. This approach updates tau-squared until convergence using MM equations. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. tau_start : `float`, default 0 Starting value of the tau-squared. atol : `float`, default 1e-5 Convergence tolerance for change in tau-squared estimate between iterations. maxiter : `int`, default 50 Maximum number of iterations. Returns ------- float Estimated tau-squared. bool Whether the algorithm converged. Notes ----- See DerSimonian and Kacker 2007 [1]_. This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713>`_. Hash: d5b11ca References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### check input is_type(estimates, list) is_type(standard_errors, list) is_type(tau_start, (int, float)) is_type(atol, (int, float)) is_type(maxiter, int) same_len(estimates, standard_errors, ['estimates', 'standard_errors']) tausquared = tau_start v = [s**2 for s in standard_errors] # #### iterating converged = False for _ in range(maxiter): w = [1/(i + tausquared) for i in v] tau_new = _tau_mm(estimates, standard_errors, w) tau_new = max(0, tau_new) delta = tau_new - tausquared # if close enough to 0 stop if np.allclose(delta, 0, atol=atol): converged = True break # final result tausquared = tau_new # return return tausquared, converged # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def estimate_tau( estimates:list[float], standard_errors:list[float], method:Literal['dl', 'ca', 'dl2', 'ca2', 'mm-it', 'pm-it']='mm-it', **kwargs: Any, ) -> float: """ Calculate the between study/estimate variance, see DerSimonian and Kacker 2007 [1]_. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. method : {'dl', 'ca', 'dl2', 'ca2', 'mm-it', 'pm-it'}, default `dl2` Which methods of moments (MM) estimator to use: - dl : DerSimonian and Laird (DL) estimator, - ca : Cochrane ANOVA (CA) estimator, - dl2 : The two-stage DL estimator, - ca2 : The two-stage CA estimator, - mm-it : The general MM iterative estimator, - pm-it : The Paule Mandel iterative estimator. **kwargs : `any` Keyword arguments supplied to `_tau_iter_mm` or `_tau_iter_pm`. Returns ------- float Estimated tau-squared. References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### confirm input is_type(estimates, list) is_type(standard_errors, list) is_type(method, str) same_len(estimates, standard_errors, ['estimates', 'standard_errors']) # ensure the same format method = method.lower() tau2 = None if method not in EXPECTED_TAU_METHODS: raise ValueError(Error_MSG.INCORRECT_STRING_INPUT.method('method', string_join_delimiter(EXPECTED_TAU_METHODS))) # #### perform calculations if method == NamesMA.TAU_METHOD_CA: w = [1/len(estimates)] * len(estimates) tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) elif method == NamesMA.TAU_METHOD_DL: w = [1/s**2 for s in standard_errors] tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) elif method == NamesMA.TAU_METHOD_CA2: w = [1/len(estimates)] * len(estimates) tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) w2 = [1/(tau2_temp + s**2) for s in standard_errors] # the final tau estimate tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w2) elif method == NamesMA.TAU_METHOD_DL2: w = [1/s**2 for s in standard_errors] tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) w2 = [1/(tau2_temp + s**2) for s in standard_errors] # the final tau estimate tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w2) elif method == NamesMA.TAU_METHOD_MM_IT: tau2, converged = _tau_iter_mm(estimates=estimates, standard_errors=standard_errors, **kwargs) if converged == False: warnings.warn( Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_MM_IT)) elif method == NamesMA.TAU_METHOD_PM_IT: tau2, converged = _tau_iter_pm(estimates=estimates, standard_errors=standard_errors, **kwargs) if converged == False: warnings.warn( Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_PM_IT)) # #### return return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Q-test
[docs] def heterogeneity(estimates:list[float], standard_errors:list[float], overall_estimate:float | int, alpha:float=0.05, tau2:float | None=None, ) -> HeterogeneityResults: """ Provides heterogeneity estimates (Q-test, I-squared, and Tau-square) [2]_, determining too what extent the individual `estimates` are distinct from the `overall_estimate`, accounting for difference in precision based on the supplied estimate specific standard_errors. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. overall_estimate : `float` or `int` The overall estimate, for example based on a meta-analysis of the point estimates. alpha : `float`, default 0.05 The alpha for the (1-alpha/2)*100% confidence interval. tau2 : `float`, optional A possible external estimate of the tausquard used in the random effects estimator. If `NoneType` a non-iterative MM estimate will be used. Returns ------- HeterogeneityResults Object containing heterogeneity statistics. References ---------- .. [2] Higgins JP, Thompson SG. Quantifying heterogeneity in a meta-analysis. Stat Med. 2002;21(11):1539-1558. doi:10.1002/sim.1186 Notes ----- Heterogeneity will be internally evaluated using a Chi-square distribution with `len(estimates) - 1` degrees of freedom. The heterogeneity estimates are based on [2]_, specifically the tau-squared is estimated using the DerSimonian and Laird method of moments method (without iteration). """ # #### check input is_type(estimates, list) is_type(standard_errors, list) is_type(overall_estimate, (int, float)) is_type(alpha, float) same_len(estimates, standard_errors, ['estimates', 'standard_errors']) # is alpha between 0 and 1 check_limits(alpha, 0.0, 1.0) # #### Calculate the Q-test k = len(estimates) - 1 weights = [1/s**2 for s in standard_errors] qstatistic = sum( [w * (b-overall_estimate)**2 for w,b in zip(weights, estimates)] ) # if there is only one study set to 1 if k == 0: qpvalue = 1 else: qpvalue = 1-chi2.cdf(qstatistic, k) # #### Calcualte the I-squared if qstatistic <= k: isquared = 0 else: isquared = 100*(qstatistic-k)/qstatistic # get confidence intervals H = qstatistic/k H_se = 0.5*(np.log(qstatistic) - np.log(k))/\ (np.sqrt(2*qstatistic) - np.sqrt(2*k-2)) H_ub = np.exp(np.log(H) + norm.ppf(1-alpha/2)*H_se) H_lb = np.exp(np.log(H) - norm.ppf(1-alpha/2)*H_se) isquared_ub = 100*(H_ub-1)/H_ub isquared_lb = 100*(H_lb-1)/H_lb # truncate if need - this is really poor, but apparently standard if isquared_ub < 0: isquared_ub = 0 if isquared_lb < 0: isquared_lb = 0 if isquared_ub > 100: isquared_ub = 100 if isquared_lb > 100: isquared_lb = 100 # #### Calcualte the Tau-squared tausquared = tau2 if tau2 is None: ctau = sum(weights) - sum(w**2 for w in weights)/sum(weights) tausquared = (qstatistic - k)/ctau if tausquared < 0: tausquared = 0 # ### results results = { CLASS_NAME : 'heterogeneity', NamesMA.QSTAT: qstatistic, NamesMA.QPVAL: qpvalue, NamesMA.ISQR: isquared, NamesMA.ISQR_CI: (isquared_lb, isquared_ub), NamesMA.ISQR_CI_COV: 1-alpha, NamesMA.TSQR: tausquared, } return HeterogeneityResults(**results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def fixed_effect(estimates:list[float], standard_errors:list[float], ) -> tuple[float, float]: """ Compute fixed-effect meta-analysis estimate. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. Returns ------- estimate : `float` The average estimate, standard_error : `float` The standard error of the average estimate. """ # #### check input is_type(estimates, list) is_type(standard_errors, list) # #### Calculations w = [1/s**2 for s in standard_errors] # weighted average b_pool = sum(e*w for e,w in zip(estimates, w))/np.sum(w) # weighted standard error se_pool = np.sqrt(1/np.sum(w)) # return stuff return b_pool, se_pool
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def random_effects(estimates:list[float], standard_errors:list[float], between_estimate_variance: float, ) -> tuple[float, float]: """ Compute random-effects meta-analysis estimate. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] Corresponding standard errors for each estimate. between_estimate_variance : `float` The between estimate variance, often referred to as the tau-squared. Can be estimated using the `Heterogeneity` function. Returns ------- estimate : `float` The average estimate, standard_error : `float` The standard error of the average estimate. Notes ----- Setting `between_estimate_variance` to zero will results in a fixed effect estimate. """ # #### check input is_type(estimates, list) is_type(standard_errors, list) is_type(between_estimate_variance, (int, float)) # #### Calculations # updates the standard error to include the between_estimate_variance s_random = [np.sqrt(s**2 + between_estimate_variance)\ for s in standard_errors] # supply this to the regular fixed effect function estimate, standard_error = fixed_effect(estimates=estimates, standard_errors=s_random, ) # return return estimate, standard_error