'''
A module to calculated weighted averages of point estimates, using fixed
effect or random effects methods. Includes various estimates of heterogeneity.
'''
# imports
import warnings
import numpy as np
from typing import Any, List, Union, Tuple, Optional, Dict, Optional
from scipy.stats import chi2
from scipy.stats import norm
from stats_misc.constants import (
Error_MSG,
is_type,
InputValidationError,
NamesMetaAnalysis as NamesMA,
string_join_delimiter,
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
EXPECTED_TAU_METHODS = [NamesMA.TAU_METHOD_DL, NamesMA.TAU_METHOD_CA,
NamesMA.TAU_METHOD_MM_IT, NamesMA.TAU_METHOD_CA2,
NamesMA.TAU_METHOD_DL2, NamesMA.TAU_METHOD_PM_IT,
]
MSG1 = Error_MSG.DIFF_LENGTHS
MSG2 = Error_MSG.INCORRECT_STRING_INPUT
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class HeterogeneityResults(object):
'''
A results objects for the heterogeneity function.
Attributes
----------
q_statistic : `float`
The Q-statistic.
q_pvalue : `float`
The Q-statistic p-value.
i_squared : `float`
The I-squared heterogeneity statistic.
i_squared_ci : `tuple` [`float`]
The I-squared confidence interval.
i_squared_ci_coverage : `float`
The coverage of the I-squared confidence interval.
tau_squared : `float`
The Tau-squared heterogeneity statistic.
'''
SET_ARGS = [
NamesMA.QSTAT,
NamesMA.QPVAL,
NamesMA.ISQR,
NamesMA.ISQR_CI,
NamesMA.ISQR_CI_COV,
NamesMA.TSQR,
]
# Initiation the class
def __init__(self, **kwargs):
"""
Initialise
"""
for k in kwargs.keys():
if k not in self.__class__.SET_ARGS:
raise AttributeError("unrecognised argument '{0}'".format(k))
# Loops over `SET_ARGS`, assigns the kwargs content to name `s`.
# if argument is missing in kwargs, print a warning.
for s in self.__class__.SET_ARGS:
try:
setattr(self, s, kwargs[s])
except KeyError:
warnings.warn("argument '{0}' is set to 'None'".format(s))
setattr(self, s, None)
# /////////////////////////////////////////////////////////////////////////
def __str__(self):
return f"An `Heterogeneity` results class."
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Tau-square estimates
def _tau_iter_pm(estimates:List[float], standard_errors:List[float],
tau_start:float=0, atol:float=1e-5, maxiter:int=50,
) -> Tuple[float, float]:
"""
The Paule-Mandel (PM) estimator of the between estimates/study
variance [1]_. This repeatedly estimates tau-squared, stopping when there
is less a than `atol` change in tau.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
tau_start : `float`, default 0
starting value of tau-squared for iteration.
atol : `float`, default 1e-5
convergence tolerance for change in tau-squared estimate between
iterations.
maxiter : `int`, default 50
maximum number of iterations.
Returns
-------
tau-squared : `float`
estimate of random effects variance tau squared
converged : `bool`
True if algorithm has converged.
Notes
-----
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L628>`_.
Hash: d5b11ca
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### check input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
is_type(tau_start, (int, float), 'tau_start')
is_type(atol, (int, float), 'atol')
is_type(maxiter, int, 'maxiter')
if len(estimates) != len(standard_errors):
raise InputValidationError(MSG1.format('estimates', 'standard_errors',
len(estimates),
len(standard_errors),
))
tausquared = tau_start
v = [s**2 for s in standard_errors]
# #### iteration
converged = False
for _ in range(maxiter):
w = np.array([1/(i + tausquared) for i in v])
m = w.dot(estimates) / w.sum(0)
squared_res = [(e-m)**2 for e in estimates]
q_w = w.dot(squared_res)
# F(tausquard_[-i]) estimating equation
ee = q_w - (len(estimates) - 1)
# if negative set tau to zero
if ee < 0:
tausquared = 0
converged = True
break
# if close enough to 0 stop
if np.allclose(ee, 0, atol=atol):
converged = True
break
# update tausquared
delta = ee / (w**2).dot(squared_res)
tausquared += delta
# return
return tausquared, converged
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _tau_mm(estimates:List[float], standard_errors:List[float],
weights:List[float]
) -> float:
"""
Method of moment estimator of the between study random effect variance.
This can be used to implement multiple methods, see `Notes` and the
`estimate_tau` function.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
weights : `list` [`float`]
The weights used to calculate the overall point estimates using
a weighted average.
Returns
-------
tausquared : `float`
estimate of random effects variance tau squared
Notes
-----
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713C1-L754C1>`_.
Hash: d5b11ca
The code is based on [1]_ and implements equation 6, where depending on the
weights on can use the following tau estimators: Cochrane ANOVA (CA),
DerSimonian and Laird (DL), CA2 (the two-stage CA estimator), and the DL2
(the two stage DL estimator).
| Estimator | Weights | Notes |
| --------- | -------------------------- | -------------------- |
| CA | 1/k | for k studies |
| DL | 1/(s_i)**2 | for standard error s |
| CA2 | 1/(_tau_mm(CA) + (s_i)**2) | - |
| DL2 | 1/(_tau_mm(DL) + (s_i)**2) | - |
| --------- | -------------------------- | -------------------- |
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### test input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
is_type(weights, list, 'weights')
if len(estimates) != len(standard_errors):
raise InputValidationError(MSG1.format('estimates', 'standard_errors',
len(estimates),
len(standard_errors),
))
if len(weights) != len(standard_errors):
raise InputValidationError(MSG1.format('weights', 'standard_errors',
len(weights),
len(standard_errors),
))
# ### calculations
v = [s**2 for s in standard_errors]
w = np.array(weights)
# weighted estimate
m = w.dot(estimates)/w.sum(0)
# squared residuals
resid_sq = [(e - m)**2 for e in estimates]
q_w = w.dot(resid_sq)
w_t = w.sum()
expect = w.dot(v) - (w**2).dot(v)/w_t
denom = w_t - (w**2).sum()/w_t
# moment estimate from estimating equation
tau2 = (q_w - expect)/denom
# return tau
return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _tau_iter_mm(estimates:List[float], standard_errors:List[float],
tau_start:float=0, atol:float=1e-5, maxiter:int=50,
) -> Tuple[float, float]:
"""
The general method of moment (MM) estimate of between estimates/study
variance [1]_.
This repeatedly estimates tau-squared, stopping when there is less a
than `atol` change in tau.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
tau_start : `float`, default 0
starting value of tau-squared for iteration.
atol : `float`, default 1e-5
convergence tolerance for change in tau-squared estimate between
iterations.
maxiter : `int`, default 50
maximum number of iterations.
Returns
-------
tau-squared : `float`
estimate of random effects variance tau squared
converged : `bool`
True if algorithm has converged.
Notes
-----
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713>`_.
Hash: d5b11ca
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### check input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
is_type(tau_start, (int, float), 'tau_start')
is_type(atol, (int, float), 'atol')
is_type(maxiter, int, 'maxiter')
if len(estimates) != len(standard_errors):
raise InputValidationError(MSG1.format('estimates', 'standard_errors',
len(estimates),
len(standard_errors),
))
tausquared = tau_start
v = [s**2 for s in standard_errors]
# #### iterating
converged = False
for _ in range(maxiter):
w = [1/(i + tausquared) for i in v]
tau_new = _tau_mm(estimates, standard_errors, w)
tau_new = max(0, tau_new)
delta = tau_new - tausquared
# if close enough to 0 stop
if np.allclose(delta, 0, atol=atol):
converged = True
break
# final result
tausquared = tau_new
# return
return tausquared, converged
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def estimate_tau(estimates:List[float], standard_errors:List[float],
method:str='mm-it', **kwargs:Optional[Any]) -> float:
'''
Calculate the between study/estimate variance.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
method : `str`, default `dl2`
Which methods of moments (MM) estimator to use:
- dl : DerSimonian and Laird (DL) estimator,
- ca : Cochrane ANOVA (CA) estimator,
- dl2 : The two-stage DL estimator,
- ca2 : The two-stage CA estimator,
- mm-it : The general MM iterative estimator,
- pm-it : The Paule Mandel iterative estimator.
**kwargs : Optional
keyword argument supplied to _tau_iter_mm or _tau_iter_pm .
Returns
-------
tausquared : `float`
The tau-sqaured estimate.
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
'''
# #### confirm input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(method, str)
if len(estimates) != len(standard_errors):
raise InputValidationError(MSG1.format('estimates', 'standard_errors',
len(estimates),
len(standard_errors),
))
# ensure the same format
method = method.lower()
if method not in EXPECTED_TAU_METHODS:
raise ValueError(Error_MSG.INCORRECT_STRING_INPUT.method('method',
string_join_delimiter(EXPECTED_TAU_METHODS)))
# #### perform calculations
if method == NamesMA.TAU_METHOD_CA:
w = [1/len(estimates)] * len(estimates)
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
elif method == NamesMA.TAU_METHOD_DL:
w = [1/s**2 for s in standard_errors]
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
elif method == NamesMA.TAU_METHOD_CA2:
w = [1/len(estimates)] * len(estimates)
tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
w2 = [1/(tau2_temp + s**2) for s in standard_errors]
# the final tau estimate
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w2)
elif method == NamesMA.TAU_METHOD_DL2:
w = [1/s**2 for s in standard_errors]
tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
w2 = [1/(tau2_temp + s**2) for s in standard_errors]
# the final tau estimate
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w2)
elif method == NamesMA.TAU_METHOD_MM_IT:
tau2, converged = _tau_iter_mm(estimates=estimates,
standard_errors=standard_errors,
**kwargs)
if converged == False:
warnings.warn(
Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_MM_IT))
elif method == NamesMA.TAU_METHOD_PM_IT:
tau2, converged = _tau_iter_pm(estimates=estimates,
standard_errors=standard_errors,
**kwargs)
if converged == False:
warnings.warn(
Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_PM_IT))
# #### return
return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Q-test
[docs]
def heterogeneity(estimates:List[float], standard_errors:List[float],
overall_estimate:Union[float, int],
alpha:float=0.05,
tau2:Optional[Any]=None,
) -> HeterogeneityResults:
'''
Provides heterogeneity estimates (Q-test, I-squared, and Tau-square) [2]_,
determining too what extent the individual `estimates` are distinct from
the `overall_estimate`, accounting for difference in precision based on
the supplied estimate specific standard_errors.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
overall_estimate : `float` or `int`
The overall estimate, for example based on a meta-analysis of the
point estimates of `estimates`.
alpha : `float`, default 0.05
The alpha for the (1-alpha/2)*100% confidence interval.
tau2 : `float`, optional
A possible external estimate of the tausquard used in the random effects
estimator. If `NoneType` a non-iterative MM estimate will be used.
Returns
-------
results : HeterogeneityResults
An instance of the heterogeneity results class.
References
----------
.. [2] Higgins JP, Thompson SG. Quantifying heterogeneity in a
meta-analysis. Stat Med. 2002;21(11):1539-1558. doi:10.1002/sim.1186
Notes
-----
Heterogeneity will be internally evaluated using a Chi-square distribution
with `len(estimates) - 1` degrees of freedom.
The heterogeneity estimates are based on [2]_, specifically the tau-squared
is estimated using the DerSimonian and Laird method of moments method
(without iteration).
'''
# #### check input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
is_type(overall_estimate, (int, float), 'overall_estimate')
is_type(alpha, float, 'alpha')
if len(estimates) != len(standard_errors):
raise InputValidationError(MSG1.format('estimates', 'standard_errors',
len(estimates),
len(standard_errors),
))
# is alpha between 0 and 1
if alpha <= 0 or alpha >= 1:
raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
# #### Calculate the Q-test
k = len(estimates) - 1
weights = [1/s**2 for s in standard_errors]
qstatistic = sum(
[w * (b-overall_estimate)**2 for w,b in zip(weights, estimates)]
)
# if there is only one study set to 1
if k == 0:
qpvalue = 1
else:
qpvalue = 1-chi2.cdf(qstatistic, k)
# #### Calcualte the I-squared
if qstatistic <= k:
isquared = 0
else:
isquared = 100*(qstatistic-k)/qstatistic
# get confidence intervals
H = qstatistic/k
H_se = 0.5*(np.log(qstatistic) - np.log(k))/\
(np.sqrt(2*qstatistic) - np.sqrt(2*k-2))
H_ub = np.exp(np.log(H) + norm.ppf(1-alpha/2)*H_se)
H_lb = np.exp(np.log(H) - norm.ppf(1-alpha/2)*H_se)
isquared_ub = 100*(H_ub-1)/H_ub
isquared_lb = 100*(H_lb-1)/H_lb
# truncate if need - this is really poor, but apparently standard
if isquared_ub < 0:
isquared_ub = 0
if isquared_lb < 0:
isquared_lb = 0
if isquared_ub > 100:
isquared_ub = 100
if isquared_lb > 100:
isquared_lb = 100
# #### Calcualte the Tau-squared
tausquared = tau2
if tau2 is None:
ctau = sum(weights) - sum(w**2 for w in weights)/sum(weights)
tausquared = (qstatistic - k)/ctau
if tausquared < 0:
tausquared = 0
# ### results
results = {NamesMA.QSTAT: qstatistic, NamesMA.QPVAL: qpvalue,
NamesMA.ISQR: isquared,
NamesMA.ISQR_CI: (isquared_lb, isquared_ub),
NamesMA.ISQR_CI_COV: 1-alpha,
NamesMA.TSQR: tausquared,
}
return HeterogeneityResults(**results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def fixed_effect(estimates:List[float], standard_errors:List[float],
) -> Tuple[float, float]:
'''
Calculated an average, with the contribution of each element in estimates
weighted by the inverse of the squared standard error of the estimate.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
Returns
-------
estimate : `float`
The average estimate,
standard_error : `float`
The standard error of estimate
'''
# #### check input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
# #### Calculations
w = [1/s**2 for s in standard_errors]
# weighted average
b_pool = sum(e*w for e,w in zip(estimates, w))/np.sum(w)
# weighted standard error
se_pool = np.sqrt(1/np.sum(w))
# return stuff
return b_pool, se_pool
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def random_effects(estimates:List[float], standard_errors:List[float],
between_estimate_variance: float,
) -> Tuple[float, float]:
'''
Calculated an average, with the contribution of each element in estimates
weighted by the inverse of the squared standard error of the estimate plus
an estimate of the between `estimate` variance.
This is equivalent to a fixed effect meta-analysis, which assumes
the between `study` variance is zero.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
A list of standard errors of equal length as `estimates`.
between_estimate_variance `float`
The between estimate variance, often referred to as the tau-squared.
Can be estimated using the `Heterogeneity` function.
Returns
-------
estimate : `float`
The average estimate,
standard_error : `float`
The standard error of estimate
'''
# #### check input
is_type(estimates, list, 'estimates')
is_type(standard_errors, list, 'standard_errors')
is_type(between_estimate_variance, (int, float),
'between_estimate_variance')
# #### Calculations
# updates the standard error to include the between_estimate_variance
s_random = [np.sqrt(s**2 + between_estimate_variance)\
for s in standard_errors]
# supply this to the regular fixed effect function
estimate, standard_error = fixed_effect(estimates=estimates,
standard_errors=s_random,
)
# return
return estimate, standard_error