"""
Tools to evaluate prediction models at a fixed time-point.
This module provides functions to assess the performance of binary classifiers,
with a focus on both discrimination (e.g. c-statistics) and calibration
(e.g. calibration slope and intercept). Recalibration tools for risk prediction
and agreement metrics for continuous outcomes are also included.
Classes
-------
CstatisticResults
A results container for c-statistic estimates and confidence intervals.
CalibrationResults
A results class for calibration metrics, including calibration slope,
intercept, and optionally a grouped observed vs predicted table.
RecalibrationResults
A results class for recalibrated slope and intercept estimates, and a
table of updated predictions.
ConcordanceCorrelationCoefficient
A class to compute Lin’s concordance correlation coefficient (CCC), with
options for delta-method and Fisher-based confidence intervals.
Functions
---------
cstatistic(observed, predicted, alpha=None)
Computes Harrell’s c-statistic and optionally its confidence interval.
format_roc(observed, predicted, **kwargs)
Formats ROC curve metrics as a table with sensitivity and false positive
rates at various thresholds.
get_calibration(data, observed, predicted, alpha=0.05, bins=None,
kwargs_ci=None)
Computes the calibration slope and intercept, and optionally a grouped
observed vs predicted risk table with confidence intervals.
recalibrate(data, score, observed, model='binomial')
Recalibrates a risk score using a binomial or Gaussian model.
"""
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
from stats_misc.utils.general import (
invlogit,
# logit,
)
from stats_misc.intervals import (
beta_confidence_interval,
)
from stats_misc.constants import (
NamesValidation as NamesVal,
NamesIntervals as NamesI,
Error_MSG,
CLASS_NAME,
)
from stats_misc.errors import (
is_type,
is_df,
are_columns_in_df,
same_len,
)
from stats_misc.utils.helpers import (
Results,
)
from sklearn.metrics import (
roc_curve,
)
from scipy import stats
from typing import (
Any, Literal,
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class CstatisticResults(Results):
"""
A results objects containing c-statistic estimates.
Attributes
----------
c_statistic : `float`
The c-statistic.
lower_bound : `float`
The lower bound of the confidence interval.
upper_bound : `float`
The upper bound of the confidence interval.
confidence_interval: `tuple` [`float`, `float`]
The lower and upper bounds of the confidence interval.
coverage : `float`
The interval coverage.
standard_error : `float`
The standard error of the c-statistic, based on an asymptotic
approximation to the normal distribution.
"""
SET_ARGS = [
CLASS_NAME,
NamesVal.CSTAT,
NamesVal.CSTAT_LB,
NamesVal.CSTAT_UB,
NamesVal.CSTAT_INTERVAL,
NamesVal.CSTAT_COVERAGE,
NamesVal.CSTAT_SE,
]
# /////////////////////////////////////////////////////////////////////////
# Initiation the class
def __init__(self, **kwargs: Any) -> None:
"""Initialise a CstatisticResults instance."""
super().__init__(set_args=self.SET_ARGS, **kwargs)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class CalibrationResults(CstatisticResults):
"""
A results objects containing calibration estimates.
Attributes
----------
calibration_slope : `float`
The calibration slope.
calibration_slope_se : `float`
The standard error of the calibration slope.
calibration_intercept : `float`
Calibration-in-the-large estimate.
calibration_intercept_se : `float`
The standard error of the calibration-in-the-large.
observed_predict_table : `pd.DataFrame`
an optional table with observed and predicted risks.
"""
SET_ARGS = [
CLASS_NAME,
NamesVal.CAL_SLOPE,
NamesVal.CAL_SLOPE_SE,
NamesVal.CAL_INTERCEPT,
NamesVal.CAL_INTERCEPT_SE,
NamesVal.CAL_TABLE,
]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class RecalibrationResults(CstatisticResults):
"""
A results objects containing recalibration estimates.
Attributes
----------
slope : `float`
The slope estimate
intercept : `float`
The intercept estimate
table_recalibrated : `pd.DataFrame`
A table with the recalibrated estimates appended.
"""
SET_ARGS = [
CLASS_NAME,
NamesVal.RECAL_SLOPE,
NamesVal.RECAL_INTERCEPT,
NamesVal.RECAL_TABLE,
]
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# discrimination functions
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def cstatistic(
observed: pd.Series | np.ndarray,
predicted: pd.Series | np.ndarray,
alpha: float | None = None,
) -> CstatisticResults:
"""
Compute Harrell's concordance statistic (c-statistic) for a model
predicting a binary outcome.
Parameters
----------
observed : `pd.Series` or `np.ndarray`
a column vector with the observed class.
predicted : `pd.Series` or `np.ndarray`
a column vector with the predicted class or log/predicted probability.
alpha : `float` or `None`, default `None`
the (1-alpha)% confidence interval. Default `None` for no confidence
interval.
Returns
-------
`CstatisticResults`
Harrel's c-statistics with optional confidence interval.
Notes
-----
This function estimates the probability that, for a randomly chosen pair
of individuals (one with and one without the event), the predicted score is
higher for the individual with the event. Optionally, a confidence interval
can be calculated based on an asymptotic approximation.
Predictions (predicted column) can range between +- infinity, allowing for
evaluations of non-Prob scores.
The standard error are derived based on _[1] Hanley and McNeil.
References
----------
.. [1] JA Hanley and BJ McNeil, "The meaning and use of the area under a
receiver operating characteristic (ROC) curve, Radiology 1982; 29-36." see
(Table 2).
"""
# Check input
is_type(observed, (pd.Series, np.ndarray), 'observed')
is_type(predicted, (pd.Series, np.ndarray), 'predicted')
is_type(alpha, (type(None), float), 'alpha')
# is alpha between 0 and 1
if not alpha is None:
if alpha <= 0 or alpha >= 1:
raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
same_len(predicted, observed)
# set pd.Series to numpy array
if isinstance(predicted, pd.Series):
predicted = predicted.to_numpy()
if isinstance(observed, pd.Series):
observed = observed.to_numpy()
# normal versus abnormal
norm_ = predicted[observed == 0]
abnorm = predicted[observed == 1]
# outer subtraction
mat_out = np.subtract.outer(abnorm, norm_)
# cstat
if len(norm_) == 0 or len(abnorm) == 0:
cstat = np.nan
else:
cstat = np.mean( (mat_out > 0) + 0.5 * (mat_out == 0))
res_dict = {NamesVal.CSTAT: cstat}
# standard error
if alpha is not None:
# check type
is_type(alpha, float)
# do stuff
Q1 = cstat/(2-cstat)
Q2 = (2 * cstat**2)/(1+cstat)
nn = len(norm_)
na = len(abnorm)
num = ( cstat * (1 - cstat) + (na - 1)*(Q1-cstat**2) + (nn - 1)*(Q2 - cstat**2))
denom = na * nn
if denom == 0:
var = np.nan
else:
var = num/denom
# confidence interval (large sample size!!)
z = np.sqrt(stats.chi2.ppf(1-alpha, 1))
# results
res_dict[NamesVal.CSTAT_LB] = cstat - z * np.sqrt(var)
res_dict[NamesVal.CSTAT_UB] = cstat + z * np.sqrt(var)
res_dict[NamesVal.CSTAT_INTERVAL] =\
(res_dict[NamesVal.CSTAT_LB],
res_dict[NamesVal.CSTAT_UB]
)
res_dict[NamesVal.CSTAT_COVERAGE] = 1-alpha
res_dict[NamesVal.CSTAT_SE] = np.sqrt(var)
# ignoring class warnings due to potential missing results
warnings.filterwarnings("ignore", category=UserWarning)
res_dict[CLASS_NAME] = 'cstatistic'
results = CstatisticResults(**res_dict)
warnings.resetwarnings()
# return stuff
return results
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# calibration function
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def get_calibration(
data: pd.DataFrame,
observed: str,
predicted: str,
alpha: float = 0.05,
bins: int | None = None,
kwargs_ci: dict | None = None,
) -> CalibrationResults:
"""
Estimate the calibration slope and calibration-in-the-large assuming a
binomial data generating model. A binomial model is generally appropriate
if the predicted risk reflects an event occurring at a fixed moment in time,
e.g. after 1 year or 1 hour.
Parameters
----------
data : `pd.DataFrames`
A table with the columns `observed` and `predicted`.
observed : `str`
a column name in `data` referencing the binary outcome column.
predicted : `str`
a column name in `data` referencing the logit predicted risk.
bins : `int`, optional, default `NoneType`
an optional integer used to create equally sized bins of the predicted
logit risk and returns the average predicted and observed risk in
each bin (an observed vs expected table)
alpha: `float`, default 0.05
the (1-alpha)% confidence interval. Used for the observed risk when
bin is supplied.
kwargs_ci : `dict` [`str`, `any`] or `None`, default `None`.
Any keyword arguments for `beta_confidence_interval`. For example,
to supply `integer = False`.
Returns
-------
CalibrationResults
A calibration intercept and slope estimates, and an optional
observed and expected table.
Notes
-----
The model assumes the predicted value is on the log-odds scale (logit).
If your model outputs probabilities, use the `logit` transform beforehand.
The calibration intercept is computed using a GLM with an offset, and the
slope via standard logistic regression.
The grouped calibration table is based on quantile binning of predicted
risks.
The function DOES not presuppose the predicted risk is derived from a
logistic (binomial) regression model. ANY model predicting risk at a fixed
moment in time is acceptable, including models that typically provide
discrete predictions such a classification trees. Some/many models or rules
may only provide predicted risk not the logit risk, in such cases simply
call the `logit` function to derive the appropriate variable.
"""
# Constants
CS = NamesVal.CAL_SLOPE
CS_SE = NamesVal.CAL_SLOPE_SE
CIL = NamesVal.CAL_INTERCEPT
CIL_SE = NamesVal.CAL_INTERCEPT_SE
# Check input
is_type(observed, str, 'observed')
is_type(predicted, str, 'predicted')
is_type(alpha, float, 'alpha')
is_type(kwargs_ci, (type(None), dict))
# is alpha between 0 and 1
if alpha <= 0 or alpha >= 1:
raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
dt = data.copy()
is_df(dt)
are_columns_in_df(dt, [observed] + [predicted])
# empty kwargs
kwargs_ci = kwargs_ci or {}
# outcome and predictor
out = dt[observed]
s = sm.add_constant(dt[predicted])
# calculating the calibration slope with standard errors
cs_model = sm.Logit(out, s)
try:
fit_cs = cs_model.fit(disp=0)
except Exception as exc:
raise RuntimeError(f"Recalibration error: {exc}")
# extracting data
res_dict = {CS: fit_cs.params.iloc[1], CS_SE: fit_cs.bse.iloc[1]}
# calculating the calibration slope with standard errors
# NOTE the sm.Logit function ignored the offset argument
cil_model = sm.GLM(out, s[s.columns[0]],
offset=s[s.columns[1]], family=sm.families.Binomial())
try:
fit_cil = cil_model.fit(disp=0)
except Exception as exc:
raise RuntimeError(f"Recalibration error: {exc}")
# extracting data
res_dict[CIL] = fit_cil.params.iloc[0]
res_dict[CIL_SE] = fit_cil.bse.iloc[0]
# calculating average predicted and observed risk
if not bins == None:
# check input
is_type(bins, int)
if bins > dt.shape[0]:
raise ValueError('The number of requested bins is larger than the '
'number of observations ins `data`.')
# calculate required data
dt['bins'] = pd.qcut(dt[predicted], bins, labels=False)
dt[NamesVal.PREDICTED_RISK] = invlogit(dt[predicted].to_numpy())
### calculate means
observed_expect_table = dt.groupby('bins')[
[NamesVal.PREDICTED_RISK, observed]].agg(['mean', 'count'])
# remove duplicate col
observed_expect_table.drop((observed,'count'), axis=1, inplace=True)
# get confidence interval
lower, upper = [], []
for _, row in observed_expect_table.iterrows():
kwargs_ci_new = {**kwargs_ci, **{'alpha': alpha}}
res = beta_confidence_interval(
row.iloc[2], int(row.iloc[1]),
**kwargs_ci_new,
# integer=True,
# alpha=alpha,
)
l, u = res.__getattribute__(NamesI.VALUES)
lower.append(l)
upper.append(u)
observed_expect_table[NamesVal.AVG_OBSERVED_RISK_LB] = lower
observed_expect_table[NamesVal.AVG_OBSERVED_RISK_UB] = upper
# rename
observed_expect_table.columns = [
NamesVal.AVG_PREDICTED_RISK, NamesVal.NO_SUBJECTS,
NamesVal.AVG_OBSERVED_RISK, NamesVal.AVG_OBSERVED_RISK_LB,
NamesVal.AVG_OBSERVED_RISK_UB,
]
# reorder
observed_expect_table = observed_expect_table[
[NamesVal.AVG_PREDICTED_RISK, NamesVal.AVG_OBSERVED_RISK,
NamesVal.AVG_OBSERVED_RISK_LB, NamesVal.AVG_OBSERVED_RISK_UB,
NamesVal.NO_SUBJECTS]
]
res_dict[NamesVal.CAL_TABLE] = observed_expect_table
# ignoring class warnings due to potential missing results
warnings.filterwarnings("ignore", category=UserWarning)
res_dict[CLASS_NAME] = 'get_calibration'
results = CalibrationResults(**res_dict)
warnings.resetwarnings()
# returns results class
return results
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def recalibrate(
data: pd.DataFrame,
score: str,
observed: str,
model: str = 'binomial',
) -> RecalibrationResults:
"""
The original `score` will be re-trained on observed outcome data, improving
the agreement between the observed and predicted risk/outcome.
Parameters
----------
data : `pd.DataFrame`
A table with columns `score` and `observed`.
observed : `str`
a column name in `data`
score : `str`
a column name in `data`. Note that contrary to the calibration function
the score can be in any format depending on intended use. For binomial
model one would typically want to supply a `logit risk`.
model : `str`, default `binomial`
which model should be used (default: 'binomial', or 'gaussian')
Returns
-------
RecalibrationResults
The recalibration intercept and slope, as well as a table with the
recalibrated predictions.
"""
# Constants
BINOM = NamesVal.RECAL_BINOMIAL; GAUSS = NamesVal.RECAL_GAUSSIAN
SLOPE = NamesVal.RECAL_SLOPE; INTERCEPT = NamesVal.RECAL_INTERCEPT
# Check input
dt = data.copy()
is_df(dt)
are_columns_in_df(dt, [observed, score])
if not any([i in model for i in [BINOM, GAUSS]]):
raise NameError("Error: model should equal {0} or {1}.".format(
BINOM, GAUSS))
# Recalibrate the score
out = dt[observed]
s = sm.add_constant(dt[score])
if model == "binomial":
# recalibrating the model
recal_model = sm.Logit(out, s)
try:
fit = recal_model.fit(disp=0)
except Exception as exc:
raise RuntimeError(f"Recalibration error: {exc}")
if model == "gaussian":
# recalibrating the model
recal_model = sm.OLS(out, s)
try:
fit = recal_model.fit(disp=0)
except Exception as exc:
raise RuntimeError(f"Recalibration error {exc}")
# results
dt_recal = pd.DataFrame({score: fit.predict()})
results = {INTERCEPT: fit.params.iloc[0], SLOPE: fit.params.iloc[1],
NamesVal.RECAL_TABLE: dt_recal}
# returning stuff
results[CLASS_NAME] = 'recalibrate'
return RecalibrationResults(**results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class ConcordanceCorrelationCoefficient(object):
"""
Compute Lin's Concordance Correlation Coefficient (CCC) between two
continuous variables.
Parameters
----------
x : `np.ndarray`
First set of measurements.
y : `np.ndarray`
Second set of measurements.
Attributes
----------
ccc : `float`
Concordance correlation coefficient.
ccc_ci : `tuple` [`float`, `float`]
pearson_correlation : `float`
Pearson correlation coefficient between x and y.
bias_correction : `float`
Bias correction factor accounting for scale and location mismatch.
scale_factor : `float`
The vertical scaling constant (σ_X / σ_Y).
translation_constant : `float`
The vertical translation constant ((μ_X - μ_Y) / sqrt(σ_X σ_Y)).
Notes
-----
The CCC is defined as:
CCC = 2ρ σ_X σ_Y / (σ_X² + σ_Y² + (μ_X - μ_Y)²)
where ρ is the Pearson correlation, σ_X and σ_Y are the standard deviations
of x and y, and μ_X, μ_Y their respective means.
The CCC combines measures of precision and accuracy to assess agreement
between paired measurements. It is often used in method comparison studies,
biometrics, and reliability analysis.
It penalises for both lack of correlation and systematic bias (e.g. shifts
in location or scale), in contrast to the Pearson correlation which captures
only linear association.
The CCC is conceptually related to the Intraclass Correlation Coefficient
(ICC). Both are measures of agreement, but the CCC is specifically
designed for paired continuous data and includes a built-in bias penalty.
The ICC is more general and often used in the context of multiple raters or
repeated measurements under different conditions.
References
----------
.. [2] Lin, L. I.-K. (1989). A concordance correlation coefficient to
evaluate reproducibility. *Biometrics*, 45(1), 255–268.
"""
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def __init__(self, x: np.ndarray, y: np.ndarray) -> None:
"""Initialise a ConcordanceCorrelationCoefficient instance."""
# ### check input
is_type(x, np.ndarray)
is_type(y, np.ndarray)
# confirm length
same_len(x, y, ['x', 'y'])
# ### assign
self.x = np.asarray(x)
self.y = np.asarray(y)
# None
setattr(self, NamesVal.CCC_S, np.nan)
setattr(self, NamesVal.CCC_S_CI, (np.nan, np.nan))
setattr(self, NamesVal.CCC_S_COR, np.nan)
setattr(self, NamesVal.CCC_S_BIAS, np.nan)
setattr(self, NamesVal.CCC_S_SCALE, np.nan)
setattr(self, NamesVal.CCC_S_TRANS, np.nan)
setattr(self, NamesI.COVERAGE, np.nan)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def __call__(self, alpha: float = 0.05,
ci_method: Literal["delta", "fisher"] = 'delta',
) -> float:
"""
Calculates the concordance correlation coefficient (CCC) and its
confidence interval.
Parameters
----------
alpha : `float`, default `0.05`
Significance level for the confidence interval.
ci_method : {"delta", "fisher"}, default `delta`
Method to compute the confidence interval, either using the delta
method or Fisher's z-transformation
Returns
-------
float
The estimated concordance correlation coefficient.
"""
# ### check input
is_type(alpha, float)
is_type(ci_method, str)
# is alpha between 0 and 1
if alpha <= 0 or alpha >= 1:
raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
# confirm ci_method name
EXP_METHOD = [NamesVal.CCC_METHOD_F,
NamesVal.CCC_METHOD_D,
]
if not ci_method.lower() in EXP_METHOD:
raise ValueError(
Error_MSG.INCORRECT_STRING_INPUT.format(
'ci_method', EXP_METHOD))
# assign
self.alpha = alpha
self.ci_method = ci_method.lower()
# ### Get calculations
res_dict = self._compute_ccc()
# assign to self
setattr(self, NamesVal.CCC_S, res_dict[NamesVal.CCC],)
setattr(self, NamesVal.CCC_S_CI, res_dict[NamesVal.CCC_CI], )
setattr(self, NamesVal.CCC_S_COR, res_dict[NamesVal.CCC_COR], )
setattr(self, NamesVal.CCC_S_BIAS, res_dict[NamesVal.CCC_BIAS], )
setattr(self, NamesVal.CCC_S_SCALE, res_dict[NamesVal.CCC_SCALE], )
setattr(self, NamesVal.CCC_S_TRANS, res_dict[NamesVal.CCC_TRANS], )
setattr(self, NamesI.COVERAGE, 1-self.alpha)
# return
return res_dict[NamesVal.CCC]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _compute_ccc(self, ddof: int = 1) -> dict[str, float]:
"""
Calculate the Concordance Correlation Coefficient.
Parameters
----------
ddof : `int`, default 1
The divisor in the (co-)variance calculations (N-ddof), passed to
np.var and np.cov.
Returns
-------
dict
A dictionary with the concordance correlation cofficients along
with confidence intervals, the Spearman correlation,
the bias factor, and the horizontal scale and translation factor.
"""
x, y = self.x, self.y
n = len(x)
var_scale = (n-1)/n
var_x = np.var(x, ddof=ddof)*var_scale
var_y = np.var(y, ddof=ddof)*var_scale
# s_x = np.sqrt(var_x)
# s_y = np.sqrt(var_y)
# Pearson correlation
rho = np.cov(x, y, ddof=ddof)[0, 1]/(
np.sqrt(np.var(x,ddof=ddof)) * np.sqrt(np.var(y, ddof=ddof)))
# the horizontal scale and translation factors
delta = np.mean(y) -np.mean(x)
scale = np.std(y, ddof=ddof)/np.std(x, ddof=ddof)
trans = delta / (var_x * var_y)**(1/4)
# The Pearson penalisation factor
bias_correction = (2 * np.sqrt(var_x * var_y))/(var_x + var_y + delta**2)
# getting the ccc
ccc = rho * bias_correction
# The critical value
z_crit = stats.norm.ppf(1 - self.alpha / 2)
# The confidence interval
if self.ci_method == NamesVal.CCC_METHOD_F:
lower, upper = self._fisher_ci(ccc, rho, trans, n, z_crit)
else:
lower, upper, _ = self._delta_method_ci(
ccc, rho, trans, n, z_crit,
)
# return
return {
NamesVal.CCC : ccc,
NamesVal.CCC_CI : (lower, upper),
NamesVal.CCC_COR : rho,
NamesVal.CCC_BIAS : bias_correction,
NamesVal.CCC_SCALE : scale,
NamesVal.CCC_TRANS : trans,
}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _fisher_ci(self, ccc: float, rho: float, trans: float, n: int,
z_crit: float,
) -> tuple[float, float]:
"""
Calculates Fisher's z-transformation based confidence interval for the
CCC.
Parameters
----------
ccc : float
Point estimate of the CCC.
rho : float
Pearson correlation coefficient.
trans : float
Translation constant.
n : int
Number of observations.
z_crit : float
Critical value from a standard normal distribution for the desired
confidence level.
Returns
-------
tuple of float
Lower and upper bounds of the CCC confidence interval.
Notes
-----
Applies Fisher’s z-transformation to the CCC and computes symmetric
confidence limits on the z-scale, which are then back-transformed to
the original scale.
"""
# first get the delta method se
_, _, se_delta = self._delta_method_ci(ccc, rho, trans, n, z_crit)
# map ccc to the z-scale
z = 0.5 * np.log((1 + ccc) / (1 - ccc))
se_z = se_delta/(1-ccc**2)
z_lower = z - z_crit * se_z
z_upper = z + z_crit * se_z
# mapp to the original scale
ci_lower = (np.exp(2 * z_lower) - 1) / (np.exp(2 * z_lower) + 1)
ci_upper = (np.exp(2 * z_upper) - 1) / (np.exp(2 * z_upper) + 1)
# return
return ci_lower, ci_upper
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def _delta_method_ci(self, ccc: float, rho: float, trans: float, n: int,
z_crit: float,
) -> tuple[float, float, float]:
"""
Calculates the delta method based confidence interval for the CCC.
Parameters
----------
ccc : float
Point estimate of the concordance correlation coefficient.
rho : float
Pearson correlation coefficient.
trans : float
Translation constant.
n : int
Number of observations.
z_crit : float
Critical value from a standard normal distribution for the desired
confidence level.
Returns
-------
tuple of floats
Lower bound, upper bound, and standard error of the CCC.
Notes
-----
The delta method uses a Taylor expansion to approximate the standard
error of the CCC, from which symmetric confidence intervals are derived.
"""
# calculate the standard error
term1 = (1 - rho**2) * ccc**2 * (1-ccc**2)/rho**2
term2 = 2 * ccc**3 * (1-ccc)*trans**2/rho
term3 = 0.5 * ccc**4 * trans**4/rho**2
var_ccc = (term1 + term2 - term3)/(n-2)
se_ccc = np.sqrt(var_ccc)
# get the lower and upper bounds
ci_lower = ccc - z_crit * se_ccc
ci_upper = ccc + z_crit * se_ccc
# return
return ci_lower, ci_upper, se_ccc
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def __str__(self) -> str:
ci_ = getattr(self, NamesVal.CCC_S_CI)
return (
f"CCC: {getattr(self, NamesVal.CCC_S):.4f} "
f"(95%CI: {ci_[0]:.4f}; {ci_[1]:.4f})\n"
f"Pearson correlation: {getattr(self, NamesVal.CCC_S_COR):.4f}\n"
f"Bias correction factor: {getattr(self, NamesVal.CCC_S_BIAS):.4f}\n"
f"Scale factor: {getattr(self, NamesVal.CCC_S_SCALE):.4f}\n"
f"Translation constants: {getattr(self, NamesVal.CCC_S_TRANS):.4f}"
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def __repr__(self) -> str:
CLASS_NAME = type(self).__name__
x_preview = np.array2string(self.x, precision=2, threshold=5, edgeitems=2)
y_preview = np.array2string(self.y, precision=2, threshold=5, edgeitems=2)
return f"{CLASS_NAME}(x={x_preview}, y={y_preview})"