Source code for stats_misc.meta_analysis

'''
A module to calculated weighted averages of point estimates, using fixed
effect or random effects methods. Includes various estimates of heterogeneity.
'''

# imports
import warnings
import numpy as np
from typing import Any, List, Union, Tuple, Optional, Dict, Optional
from scipy.stats import chi2
from scipy.stats import norm
from stats_misc.constants import (
    Error_MSG,
    is_type,
    InputValidationError,
    NamesMetaAnalysis as NamesMA,
    string_join_delimiter,
)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
EXPECTED_TAU_METHODS = [NamesMA.TAU_METHOD_DL, NamesMA.TAU_METHOD_CA,
                        NamesMA.TAU_METHOD_MM_IT, NamesMA.TAU_METHOD_CA2,
                        NamesMA.TAU_METHOD_DL2, NamesMA.TAU_METHOD_PM_IT,
                        ]
MSG1 = Error_MSG.DIFF_LENGTHS
MSG2 = Error_MSG.INCORRECT_STRING_INPUT

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] class HeterogeneityResults(object): ''' A results objects for the heterogeneity function. Attributes ---------- q_statistic : `float` The Q-statistic. q_pvalue : `float` The Q-statistic p-value. i_squared : `float` The I-squared heterogeneity statistic. i_squared_ci : `tuple` [`float`] The I-squared confidence interval. i_squared_ci_coverage : `float` The coverage of the I-squared confidence interval. tau_squared : `float` The Tau-squared heterogeneity statistic. ''' SET_ARGS = [ NamesMA.QSTAT, NamesMA.QPVAL, NamesMA.ISQR, NamesMA.ISQR_CI, NamesMA.ISQR_CI_COV, NamesMA.TSQR, ] # Initiation the class def __init__(self, **kwargs): """ Initialise """ for k in kwargs.keys(): if k not in self.__class__.SET_ARGS: raise AttributeError("unrecognised argument '{0}'".format(k)) # Loops over `SET_ARGS`, assigns the kwargs content to name `s`. # if argument is missing in kwargs, print a warning. for s in self.__class__.SET_ARGS: try: setattr(self, s, kwargs[s]) except KeyError: warnings.warn("argument '{0}' is set to 'None'".format(s)) setattr(self, s, None) # ///////////////////////////////////////////////////////////////////////// def __str__(self): return f"An `Heterogeneity` results class."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Tau-square estimates def _tau_iter_pm(estimates:List[float], standard_errors:List[float], tau_start:float=0, atol:float=1e-5, maxiter:int=50, ) -> Tuple[float, float]: """ The Paule-Mandel (PM) estimator of the between estimates/study variance [1]_. This repeatedly estimates tau-squared, stopping when there is less a than `atol` change in tau. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. tau_start : `float`, default 0 starting value of tau-squared for iteration. atol : `float`, default 1e-5 convergence tolerance for change in tau-squared estimate between iterations. maxiter : `int`, default 50 maximum number of iterations. Returns ------- tau-squared : `float` estimate of random effects variance tau squared converged : `bool` True if algorithm has converged. Notes ----- This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L628>`_. Hash: d5b11ca References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### check input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') is_type(tau_start, (int, float), 'tau_start') is_type(atol, (int, float), 'atol') is_type(maxiter, int, 'maxiter') if len(estimates) != len(standard_errors): raise InputValidationError(MSG1.format('estimates', 'standard_errors', len(estimates), len(standard_errors), )) tausquared = tau_start v = [s**2 for s in standard_errors] # #### iteration converged = False for _ in range(maxiter): w = np.array([1/(i + tausquared) for i in v]) m = w.dot(estimates) / w.sum(0) squared_res = [(e-m)**2 for e in estimates] q_w = w.dot(squared_res) # F(tausquard_[-i]) estimating equation ee = q_w - (len(estimates) - 1) # if negative set tau to zero if ee < 0: tausquared = 0 converged = True break # if close enough to 0 stop if np.allclose(ee, 0, atol=atol): converged = True break # update tausquared delta = ee / (w**2).dot(squared_res) tausquared += delta # return return tausquared, converged # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def _tau_mm(estimates:List[float], standard_errors:List[float], weights:List[float] ) -> float: """ Method of moment estimator of the between study random effect variance. This can be used to implement multiple methods, see `Notes` and the `estimate_tau` function. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. weights : `list` [`float`] The weights used to calculate the overall point estimates using a weighted average. Returns ------- tausquared : `float` estimate of random effects variance tau squared Notes ----- This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713C1-L754C1>`_. Hash: d5b11ca The code is based on [1]_ and implements equation 6, where depending on the weights on can use the following tau estimators: Cochrane ANOVA (CA), DerSimonian and Laird (DL), CA2 (the two-stage CA estimator), and the DL2 (the two stage DL estimator). | Estimator | Weights | Notes | | --------- | -------------------------- | -------------------- | | CA | 1/k | for k studies | | DL | 1/(s_i)**2 | for standard error s | | CA2 | 1/(_tau_mm(CA) + (s_i)**2) | - | | DL2 | 1/(_tau_mm(DL) + (s_i)**2) | - | | --------- | -------------------------- | -------------------- | References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### test input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') is_type(weights, list, 'weights') if len(estimates) != len(standard_errors): raise InputValidationError(MSG1.format('estimates', 'standard_errors', len(estimates), len(standard_errors), )) if len(weights) != len(standard_errors): raise InputValidationError(MSG1.format('weights', 'standard_errors', len(weights), len(standard_errors), )) # ### calculations v = [s**2 for s in standard_errors] w = np.array(weights) # weighted estimate m = w.dot(estimates)/w.sum(0) # squared residuals resid_sq = [(e - m)**2 for e in estimates] q_w = w.dot(resid_sq) w_t = w.sum() expect = w.dot(v) - (w**2).dot(v)/w_t denom = w_t - (w**2).sum()/w_t # moment estimate from estimating equation tau2 = (q_w - expect)/denom # return tau return tau2 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def _tau_iter_mm(estimates:List[float], standard_errors:List[float], tau_start:float=0, atol:float=1e-5, maxiter:int=50, ) -> Tuple[float, float]: """ The general method of moment (MM) estimate of between estimates/study variance [1]_. This repeatedly estimates tau-squared, stopping when there is less a than `atol` change in tau. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. tau_start : `float`, default 0 starting value of tau-squared for iteration. atol : `float`, default 1e-5 convergence tolerance for change in tau-squared estimate between iterations. maxiter : `int`, default 50 maximum number of iterations. Returns ------- tau-squared : `float` estimate of random effects variance tau squared converged : `bool` True if algorithm has converged. Notes ----- This code was addapted from `statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713>`_. Hash: d5b11ca References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 """ # #### check input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') is_type(tau_start, (int, float), 'tau_start') is_type(atol, (int, float), 'atol') is_type(maxiter, int, 'maxiter') if len(estimates) != len(standard_errors): raise InputValidationError(MSG1.format('estimates', 'standard_errors', len(estimates), len(standard_errors), )) tausquared = tau_start v = [s**2 for s in standard_errors] # #### iterating converged = False for _ in range(maxiter): w = [1/(i + tausquared) for i in v] tau_new = _tau_mm(estimates, standard_errors, w) tau_new = max(0, tau_new) delta = tau_new - tausquared # if close enough to 0 stop if np.allclose(delta, 0, atol=atol): converged = True break # final result tausquared = tau_new # return return tausquared, converged # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def estimate_tau(estimates:List[float], standard_errors:List[float], method:str='mm-it', **kwargs:Optional[Any]) -> float: ''' Calculate the between study/estimate variance. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. method : `str`, default `dl2` Which methods of moments (MM) estimator to use: - dl : DerSimonian and Laird (DL) estimator, - ca : Cochrane ANOVA (CA) estimator, - dl2 : The two-stage DL estimator, - ca2 : The two-stage CA estimator, - mm-it : The general MM iterative estimator, - pm-it : The Paule Mandel iterative estimator. **kwargs : Optional keyword argument supplied to _tau_iter_mm or _tau_iter_pm . Returns ------- tausquared : `float` The tau-sqaured estimate. References ---------- .. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114. doi:10.1016/j.cct.2006.04.004 ''' # #### confirm input is_type(estimates, list) is_type(standard_errors, list) is_type(method, str) if len(estimates) != len(standard_errors): raise InputValidationError(MSG1.format('estimates', 'standard_errors', len(estimates), len(standard_errors), )) # ensure the same format method = method.lower() if method not in EXPECTED_TAU_METHODS: raise ValueError(Error_MSG.INCORRECT_STRING_INPUT.method('method', string_join_delimiter(EXPECTED_TAU_METHODS))) # #### perform calculations if method == NamesMA.TAU_METHOD_CA: w = [1/len(estimates)] * len(estimates) tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) elif method == NamesMA.TAU_METHOD_DL: w = [1/s**2 for s in standard_errors] tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) elif method == NamesMA.TAU_METHOD_CA2: w = [1/len(estimates)] * len(estimates) tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) w2 = [1/(tau2_temp + s**2) for s in standard_errors] # the final tau estimate tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w2) elif method == NamesMA.TAU_METHOD_DL2: w = [1/s**2 for s in standard_errors] tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w) w2 = [1/(tau2_temp + s**2) for s in standard_errors] # the final tau estimate tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors, weights=w2) elif method == NamesMA.TAU_METHOD_MM_IT: tau2, converged = _tau_iter_mm(estimates=estimates, standard_errors=standard_errors, **kwargs) if converged == False: warnings.warn( Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_MM_IT)) elif method == NamesMA.TAU_METHOD_PM_IT: tau2, converged = _tau_iter_pm(estimates=estimates, standard_errors=standard_errors, **kwargs) if converged == False: warnings.warn( Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_PM_IT)) # #### return return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Q-test
[docs] def heterogeneity(estimates:List[float], standard_errors:List[float], overall_estimate:Union[float, int], alpha:float=0.05, tau2:Optional[Any]=None, ) -> HeterogeneityResults: ''' Provides heterogeneity estimates (Q-test, I-squared, and Tau-square) [2]_, determining too what extent the individual `estimates` are distinct from the `overall_estimate`, accounting for difference in precision based on the supplied estimate specific standard_errors. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. overall_estimate : `float` or `int` The overall estimate, for example based on a meta-analysis of the point estimates of `estimates`. alpha : `float`, default 0.05 The alpha for the (1-alpha/2)*100% confidence interval. tau2 : `float`, optional A possible external estimate of the tausquard used in the random effects estimator. If `NoneType` a non-iterative MM estimate will be used. Returns ------- results : HeterogeneityResults An instance of the heterogeneity results class. References ---------- .. [2] Higgins JP, Thompson SG. Quantifying heterogeneity in a meta-analysis. Stat Med. 2002;21(11):1539-1558. doi:10.1002/sim.1186 Notes ----- Heterogeneity will be internally evaluated using a Chi-square distribution with `len(estimates) - 1` degrees of freedom. The heterogeneity estimates are based on [2]_, specifically the tau-squared is estimated using the DerSimonian and Laird method of moments method (without iteration). ''' # #### check input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') is_type(overall_estimate, (int, float), 'overall_estimate') is_type(alpha, float, 'alpha') if len(estimates) != len(standard_errors): raise InputValidationError(MSG1.format('estimates', 'standard_errors', len(estimates), len(standard_errors), )) # is alpha between 0 and 1 if alpha <= 0 or alpha >= 1: raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1)) # #### Calculate the Q-test k = len(estimates) - 1 weights = [1/s**2 for s in standard_errors] qstatistic = sum( [w * (b-overall_estimate)**2 for w,b in zip(weights, estimates)] ) # if there is only one study set to 1 if k == 0: qpvalue = 1 else: qpvalue = 1-chi2.cdf(qstatistic, k) # #### Calcualte the I-squared if qstatistic <= k: isquared = 0 else: isquared = 100*(qstatistic-k)/qstatistic # get confidence intervals H = qstatistic/k H_se = 0.5*(np.log(qstatistic) - np.log(k))/\ (np.sqrt(2*qstatistic) - np.sqrt(2*k-2)) H_ub = np.exp(np.log(H) + norm.ppf(1-alpha/2)*H_se) H_lb = np.exp(np.log(H) - norm.ppf(1-alpha/2)*H_se) isquared_ub = 100*(H_ub-1)/H_ub isquared_lb = 100*(H_lb-1)/H_lb # truncate if need - this is really poor, but apparently standard if isquared_ub < 0: isquared_ub = 0 if isquared_lb < 0: isquared_lb = 0 if isquared_ub > 100: isquared_ub = 100 if isquared_lb > 100: isquared_lb = 100 # #### Calcualte the Tau-squared tausquared = tau2 if tau2 is None: ctau = sum(weights) - sum(w**2 for w in weights)/sum(weights) tausquared = (qstatistic - k)/ctau if tausquared < 0: tausquared = 0 # ### results results = {NamesMA.QSTAT: qstatistic, NamesMA.QPVAL: qpvalue, NamesMA.ISQR: isquared, NamesMA.ISQR_CI: (isquared_lb, isquared_ub), NamesMA.ISQR_CI_COV: 1-alpha, NamesMA.TSQR: tausquared, } return HeterogeneityResults(**results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def fixed_effect(estimates:List[float], standard_errors:List[float], ) -> Tuple[float, float]: ''' Calculated an average, with the contribution of each element in estimates weighted by the inverse of the squared standard error of the estimate. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. Returns ------- estimate : `float` The average estimate, standard_error : `float` The standard error of estimate ''' # #### check input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') # #### Calculations w = [1/s**2 for s in standard_errors] # weighted average b_pool = sum(e*w for e,w in zip(estimates, w))/np.sum(w) # weighted standard error se_pool = np.sqrt(1/np.sum(w)) # return stuff return b_pool, se_pool
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs] def random_effects(estimates:List[float], standard_errors:List[float], between_estimate_variance: float, ) -> Tuple[float, float]: ''' Calculated an average, with the contribution of each element in estimates weighted by the inverse of the squared standard error of the estimate plus an estimate of the between `estimate` variance. This is equivalent to a fixed effect meta-analysis, which assumes the between `study` variance is zero. Parameters ---------- estimates : `list` [`float`] A list of point estimates (e.g., mean differences or log odds ratios). standard_errors : `list` [`float`] A list of standard errors of equal length as `estimates`. between_estimate_variance `float` The between estimate variance, often referred to as the tau-squared. Can be estimated using the `Heterogeneity` function. Returns ------- estimate : `float` The average estimate, standard_error : `float` The standard error of estimate ''' # #### check input is_type(estimates, list, 'estimates') is_type(standard_errors, list, 'standard_errors') is_type(between_estimate_variance, (int, float), 'between_estimate_variance') # #### Calculations # updates the standard error to include the between_estimate_variance s_random = [np.sqrt(s**2 + between_estimate_variance)\ for s in standard_errors] # supply this to the regular fixed effect function estimate, standard_error = fixed_effect(estimates=estimates, standard_errors=s_random, ) # return return estimate, standard_error