"""
Meta-analysis estimators for weighted averages and heterogeneity assessment.
This module provides methods for calculating weighted averages of point
estimates using fixed-effect and random-effects models. It includes
implementations of standard estimators for between-study variance
(tau-squared), Q-tests for heterogeneity, and I-squared statistics.
Iterative and non-iterative estimators are supported, including
DerSimonian-Laird, Cochrane ANOVA, Paule-Mandel, and method of moments.
Classes
-------
HeterogeneityResults
A results container for heterogeneity estimates including Q-statistic,
I-squared, and tau-squared values.
Functions
---------
fixed_effect(estimates, standard_errors)
Computes a fixed-effect meta-analytic estimate and standard error.
random_effects(estimates, standard_errors, between_estimate_variance)
Computes a random-effects meta-analytic estimate and standard error.
heterogeneity(estimates, standard_errors, overall_estimate, alpha=0.05,
tau2=None)
Computes heterogeneity measures: Q-test, I-squared, and tau-squared.
estimate_tau(estimates, standard_errors, method='mm-it', **kwargs)
Estimates the between-study variance (tau-squared) using various methods.
Notes
-----
These implementations are partially adapted from `statsmodels`, with
adjustments for flexibility and reproducibility.
"""
import warnings
import numpy as np
from typing import Any, Literal
from scipy.stats import (
chi2,
norm,
)
from stats_misc.constants import (
Error_MSG,
NamesMetaAnalysis as NamesMA,
CLASS_NAME,
)
from stats_misc.errors import (
is_type,
string_join_delimiter,
same_len,
check_limits,
)
from stats_misc.utils.helpers import (
Results,
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# constants
EXPECTED_TAU_METHODS = [NamesMA.TAU_METHOD_DL, NamesMA.TAU_METHOD_CA,
NamesMA.TAU_METHOD_MM_IT, NamesMA.TAU_METHOD_CA2,
NamesMA.TAU_METHOD_DL2, NamesMA.TAU_METHOD_PM_IT,
]
MSG1 = Error_MSG.DIFF_LENGTHS
MSG2 = Error_MSG.INCORRECT_STRING_INPUT
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class HeterogeneityResults(Results):
"""
A results container for heterogeneity estimates.
Attributes
----------
q_statistic : `float`
The Q-statistic.
q_pvalue : `float`
The Q-statistic p-value.
i_squared : `float`
The I-squared heterogeneity statistic.
i_squared_ci : `tuple` [`float`]
The I-squared confidence interval.
i_squared_ci_coverage : `float`
The confidence level of the I-squared interval.
tau_squared : `float`
The tau-squared heterogeneity statistic.
"""
SET_ARGS = [
CLASS_NAME,
NamesMA.QSTAT,
NamesMA.QPVAL,
NamesMA.ISQR,
NamesMA.ISQR_CI,
NamesMA.ISQR_CI_COV,
NamesMA.TSQR,
]
def __init__(self, **kwargs) -> None:
super().__init__(set_args=self.SET_ARGS, **kwargs)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Tau-square estimates
def _tau_iter_pm(estimates:list[float], standard_errors:list[float],
tau_start:float=0, atol:float=1e-5, maxiter:int=50,
) -> tuple[float, bool]:
"""
The Paule-Mandel (PM) iterative estimator of the between-study variance.
This method estimates tau-squared by iteratively solving the estimating
equation until convergence.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
tau_start : `float`, default 0
Starting value of the tau-squared.
atol : `float`, default 1e-5
Convergence tolerance. Itteration will terminate when the change is
smaller than `atol`.
maxiter : `int`, default 50
Maximum number of iterations.
Returns
-------
float
Estimated tau-squared.
bool
Whether the algorithm converged.
Notes
-----
See DerSimonian and Kacker 2007 [1]_.
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L628>`_.
Hash: d5b11ca
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### check input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(tau_start, (int, float))
is_type(atol, (int, float))
is_type(maxiter, int)
same_len(estimates, standard_errors, ['estimates', 'standard_errors'])
tausquared = tau_start
v = [s**2 for s in standard_errors]
# #### iteration
converged = False
for _ in range(maxiter):
w = np.array([1/(i + tausquared) for i in v])
m = w.dot(estimates) / w.sum(0)
squared_res = [(e-m)**2 for e in estimates]
q_w = w.dot(squared_res)
# F(tausquard_[-i]) estimating equation
ee = q_w - (len(estimates) - 1)
# if negative set tau to zero
if ee < 0:
tausquared = 0
converged = True
break
# if close enough to 0 stop
if np.allclose(ee, 0, atol=atol):
converged = True
break
# update tausquared
delta = ee / (w**2).dot(squared_res)
tausquared += delta
# return
return tausquared, converged
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _tau_mm(estimates:list[float], standard_errors:list[float],
weights:list[float]
) -> float:
"""
Method of moments estimator for the tau-squared (i.e. the between study
variance)
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
weights : `list` [`float`]
The weights used to calculate the overall point estimates using
a weighted average.
Returns
-------
float
Estimated tau-squared.
Notes
-----
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713C1-L754C1>`_.
Hash: d5b11ca
The code is based on [1]_ and implements equation 6, where depending on the
weights on can use the following tau estimators: Cochrane ANOVA (CA),
DerSimonian and Laird (DL), CA2 (the two-stage CA estimator), and the DL2
(the two stage DL estimator).
| Estimator | Weights | Notes |
| --------- | -------------------------- | -------------------- |
| CA | 1/k | for k studies |
| DL | 1/(s_i)**2 | for standard error s |
| CA2 | 1/(_tau_mm(CA) + (s_i)**2) | - |
| DL2 | 1/(_tau_mm(DL) + (s_i)**2) | - |
| --------- | -------------------------- | -------------------- |
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### test input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(weights, list)
same_len(estimates, standard_errors, ['estimates', 'standard_errors'])
same_len(weights, standard_errors, ['weights', 'standard_errors'])
# ### calculations
v = [s**2 for s in standard_errors]
w = np.array(weights)
# weighted estimate
m = w.dot(estimates)/w.sum(0)
# squared residuals
resid_sq = [(e - m)**2 for e in estimates]
q_w = w.dot(resid_sq)
w_t = w.sum()
expect = w.dot(v) - (w**2).dot(v)/w_t
denom = w_t - (w**2).sum()/w_t
# moment estimate from estimating equation
tau2 = (q_w - expect)/denom
# return tau
return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _tau_iter_mm(estimates:list[float], standard_errors:list[float],
tau_start:float=0, atol:float=1e-5, maxiter:int=50,
) -> tuple[float, bool]:
"""
General method-of-moments iterative estimator of tau-squared.
This approach updates tau-squared until convergence using MM equations.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
tau_start : `float`, default 0
Starting value of the tau-squared.
atol : `float`, default 1e-5
Convergence tolerance for change in tau-squared estimate between
iterations.
maxiter : `int`, default 50
Maximum number of iterations.
Returns
-------
float
Estimated tau-squared.
bool
Whether the algorithm converged.
Notes
-----
See DerSimonian and Kacker 2007 [1]_.
This code was addapted from
`statsmodels <https://github.com/statsmodels/statsmodels/blob/main/statsmodels/stats/meta_analysis.py#L713>`_.
Hash: d5b11ca
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### check input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(tau_start, (int, float))
is_type(atol, (int, float))
is_type(maxiter, int)
same_len(estimates, standard_errors, ['estimates', 'standard_errors'])
tausquared = tau_start
v = [s**2 for s in standard_errors]
# #### iterating
converged = False
for _ in range(maxiter):
w = [1/(i + tausquared) for i in v]
tau_new = _tau_mm(estimates, standard_errors, w)
tau_new = max(0, tau_new)
delta = tau_new - tausquared
# if close enough to 0 stop
if np.allclose(delta, 0, atol=atol):
converged = True
break
# final result
tausquared = tau_new
# return
return tausquared, converged
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def estimate_tau(
estimates:list[float], standard_errors:list[float],
method:Literal['dl', 'ca', 'dl2', 'ca2', 'mm-it', 'pm-it']='mm-it',
**kwargs: Any,
) -> float:
"""
Calculate the between study/estimate variance, see DerSimonian and Kacker
2007 [1]_.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
method : {'dl', 'ca', 'dl2', 'ca2', 'mm-it', 'pm-it'}, default `dl2`
Which methods of moments (MM) estimator to use:
- dl : DerSimonian and Laird (DL) estimator,
- ca : Cochrane ANOVA (CA) estimator,
- dl2 : The two-stage DL estimator,
- ca2 : The two-stage CA estimator,
- mm-it : The general MM iterative estimator,
- pm-it : The Paule Mandel iterative estimator.
**kwargs : `any`
Keyword arguments supplied to `_tau_iter_mm` or `_tau_iter_pm`.
Returns
-------
float
Estimated tau-squared.
References
----------
.. [1] DerSimonian R, Kacker R. Random-effects model for meta-analysis of
clinical trials: an update. Contemp Clin Trials. 2007;28(2):105-114.
doi:10.1016/j.cct.2006.04.004
"""
# #### confirm input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(method, str)
same_len(estimates, standard_errors, ['estimates', 'standard_errors'])
# ensure the same format
method = method.lower()
tau2 = None
if method not in EXPECTED_TAU_METHODS:
raise ValueError(Error_MSG.INCORRECT_STRING_INPUT.method('method',
string_join_delimiter(EXPECTED_TAU_METHODS)))
# #### perform calculations
if method == NamesMA.TAU_METHOD_CA:
w = [1/len(estimates)] * len(estimates)
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
elif method == NamesMA.TAU_METHOD_DL:
w = [1/s**2 for s in standard_errors]
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
elif method == NamesMA.TAU_METHOD_CA2:
w = [1/len(estimates)] * len(estimates)
tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
w2 = [1/(tau2_temp + s**2) for s in standard_errors]
# the final tau estimate
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w2)
elif method == NamesMA.TAU_METHOD_DL2:
w = [1/s**2 for s in standard_errors]
tau2_temp = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w)
w2 = [1/(tau2_temp + s**2) for s in standard_errors]
# the final tau estimate
tau2 = _tau_mm(estimates=estimates, standard_errors=standard_errors,
weights=w2)
elif method == NamesMA.TAU_METHOD_MM_IT:
tau2, converged = _tau_iter_mm(estimates=estimates,
standard_errors=standard_errors,
**kwargs)
if converged == False:
warnings.warn(
Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_MM_IT))
elif method == NamesMA.TAU_METHOD_PM_IT:
tau2, converged = _tau_iter_pm(estimates=estimates,
standard_errors=standard_errors,
**kwargs)
if converged == False:
warnings.warn(
Error_MSG.NON_CONVERGENCE.format(NamesMA.TAU_METHOD_PM_IT))
# #### return
return tau2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Q-test
[docs]
def heterogeneity(estimates:list[float], standard_errors:list[float],
overall_estimate:float | int,
alpha:float=0.05,
tau2:float | None=None,
) -> HeterogeneityResults:
"""
Provides heterogeneity estimates (Q-test, I-squared, and Tau-square) [2]_,
determining too what extent the individual `estimates` are distinct from
the `overall_estimate`, accounting for difference in precision based on
the supplied estimate specific standard_errors.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
overall_estimate : `float` or `int`
The overall estimate, for example based on a meta-analysis of the
point estimates.
alpha : `float`, default 0.05
The alpha for the (1-alpha/2)*100% confidence interval.
tau2 : `float`, optional
A possible external estimate of the tausquard used in the random effects
estimator. If `NoneType` a non-iterative MM estimate will be used.
Returns
-------
HeterogeneityResults
Object containing heterogeneity statistics.
References
----------
.. [2] Higgins JP, Thompson SG. Quantifying heterogeneity in a
meta-analysis. Stat Med. 2002;21(11):1539-1558. doi:10.1002/sim.1186
Notes
-----
Heterogeneity will be internally evaluated using a Chi-square distribution
with `len(estimates) - 1` degrees of freedom.
The heterogeneity estimates are based on [2]_, specifically the tau-squared
is estimated using the DerSimonian and Laird method of moments method
(without iteration).
"""
# #### check input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(overall_estimate, (int, float))
is_type(alpha, float)
same_len(estimates, standard_errors, ['estimates', 'standard_errors'])
# is alpha between 0 and 1
check_limits(alpha, 0.0, 1.0)
# #### Calculate the Q-test
k = len(estimates) - 1
weights = [1/s**2 for s in standard_errors]
qstatistic = sum(
[w * (b-overall_estimate)**2 for w,b in zip(weights, estimates)]
)
# if there is only one study set to 1
if k == 0:
qpvalue = 1
else:
qpvalue = 1-chi2.cdf(qstatistic, k)
# #### Calcualte the I-squared
if qstatistic <= k:
isquared = 0
else:
isquared = 100*(qstatistic-k)/qstatistic
# get confidence intervals
H = qstatistic/k
H_se = 0.5*(np.log(qstatistic) - np.log(k))/\
(np.sqrt(2*qstatistic) - np.sqrt(2*k-2))
H_ub = np.exp(np.log(H) + norm.ppf(1-alpha/2)*H_se)
H_lb = np.exp(np.log(H) - norm.ppf(1-alpha/2)*H_se)
isquared_ub = 100*(H_ub-1)/H_ub
isquared_lb = 100*(H_lb-1)/H_lb
# truncate if need - this is really poor, but apparently standard
if isquared_ub < 0:
isquared_ub = 0
if isquared_lb < 0:
isquared_lb = 0
if isquared_ub > 100:
isquared_ub = 100
if isquared_lb > 100:
isquared_lb = 100
# #### Calcualte the Tau-squared
tausquared = tau2
if tau2 is None:
ctau = sum(weights) - sum(w**2 for w in weights)/sum(weights)
tausquared = (qstatistic - k)/ctau
if tausquared < 0:
tausquared = 0
# ### results
results = {
CLASS_NAME : 'heterogeneity',
NamesMA.QSTAT: qstatistic, NamesMA.QPVAL: qpvalue,
NamesMA.ISQR: isquared,
NamesMA.ISQR_CI: (isquared_lb, isquared_ub),
NamesMA.ISQR_CI_COV: 1-alpha,
NamesMA.TSQR: tausquared,
}
return HeterogeneityResults(**results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def fixed_effect(estimates:list[float], standard_errors:list[float],
) -> tuple[float, float]:
"""
Compute fixed-effect meta-analysis estimate.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
Returns
-------
estimate : `float`
The average estimate,
standard_error : `float`
The standard error of the average estimate.
"""
# #### check input
is_type(estimates, list)
is_type(standard_errors, list)
# #### Calculations
w = [1/s**2 for s in standard_errors]
# weighted average
b_pool = sum(e*w for e,w in zip(estimates, w))/np.sum(w)
# weighted standard error
se_pool = np.sqrt(1/np.sum(w))
# return stuff
return b_pool, se_pool
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def random_effects(estimates:list[float], standard_errors:list[float],
between_estimate_variance: float,
) -> tuple[float, float]:
"""
Compute random-effects meta-analysis estimate.
Parameters
----------
estimates : `list` [`float`]
A list of point estimates (e.g., mean differences or log odds ratios).
standard_errors : `list` [`float`]
Corresponding standard errors for each estimate.
between_estimate_variance : `float`
The between estimate variance, often referred to as the tau-squared.
Can be estimated using the `Heterogeneity` function.
Returns
-------
estimate : `float`
The average estimate,
standard_error : `float`
The standard error of the average estimate.
Notes
-----
Setting `between_estimate_variance` to zero will results in a fixed effect
estimate.
"""
# #### check input
is_type(estimates, list)
is_type(standard_errors, list)
is_type(between_estimate_variance, (int, float))
# #### Calculations
# updates the standard error to include the between_estimate_variance
s_random = [np.sqrt(s**2 + between_estimate_variance)\
for s in standard_errors]
# supply this to the regular fixed effect function
estimate, standard_error = fixed_effect(estimates=estimates,
standard_errors=s_random,
)
# return
return estimate, standard_error