Source code for stats_misc.machine_learning.validation

"""
Tools to evaluate prediction models at a fixed time-point.

This module provides functions to assess the performance of binary classifiers,
with a focus on both discrimination (e.g. c-statistics) and calibration
(e.g. calibration slope and intercept). Recalibration tools for risk prediction
and agreement metrics for continuous outcomes are also included.

Classes
-------
CstatisticResults
    A results container for c-statistic estimates and confidence intervals.

CalibrationResults
    A results class for calibration metrics, including calibration slope,
    intercept, and optionally a grouped observed vs predicted table.

RecalibrationResults
    A results class for recalibrated slope and intercept estimates, and a
    table of updated predictions.

ConcordanceCorrelationCoefficient
    A class to compute Lin’s concordance correlation coefficient (CCC), with
    options for delta-method and Fisher-based confidence intervals.

Functions
---------
cstatistic(observed, predicted, alpha=None)
    Computes Harrell’s c-statistic and optionally its confidence interval.

format_roc(observed, predicted, **kwargs)
    Formats ROC curve metrics as a table with sensitivity and false positive
    rates at various thresholds.

get_calibration(data, observed, predicted, alpha=0.05, bins=None,
    kwargs_ci=None)
    Computes the calibration slope and intercept, and optionally a grouped
    observed vs predicted risk table with confidence intervals.

recalibrate(data, score, observed, model='binomial')
    Recalibrates a risk score using a binomial or Gaussian model.

"""

import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
from stats_misc.utils.general import (
    invlogit,
    # logit,
)
from stats_misc.intervals import (
    beta_confidence_interval,
)
from stats_misc.constants import (
    NamesValidation as NamesVal,
    NamesIntervals as NamesI,
    Error_MSG,
    CLASS_NAME,
)
from stats_misc.errors import (
    is_type,
    is_df,
    are_columns_in_df,
    same_len,
)
from stats_misc.utils.helpers import (
    Results,
)
from sklearn.metrics import (
    roc_curve,
)
from scipy import stats
from typing import (
    Any, Literal,
)

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
class CstatisticResults(Results):
    """
    A results objects containing c-statistic estimates.
    
    Attributes
    ----------
    c_statistic : `float`
        The c-statistic.
    lower_bound : `float`
        The lower bound of the confidence  interval.
    upper_bound : `float`
        The upper bound of the confidence  interval.
    confidence_interval: `tuple` [`float`, `float`]
        The lower and upper bounds of the confidence interval.
    coverage : `float`
        The interval coverage.
    standard_error : `float`
        The standard error of the c-statistic, based on an asymptotic
        approximation to the normal distribution.
    """
    SET_ARGS = [
        CLASS_NAME,
        NamesVal.CSTAT,
        NamesVal.CSTAT_LB,
        NamesVal.CSTAT_UB,
        NamesVal.CSTAT_INTERVAL,
        NamesVal.CSTAT_COVERAGE,
        NamesVal.CSTAT_SE,
    ]
    # /////////////////////////////////////////////////////////////////////////
    # Initiation the class
    def __init__(self, **kwargs: Any) -> None:
        """Initialise a CstatisticResults instance."""
        super().__init__(set_args=self.SET_ARGS, **kwargs)


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
class CalibrationResults(CstatisticResults):
    """
    A results objects containing calibration estimates.
    
    Attributes
    ----------
    calibration_slope : `float`
        The calibration slope.
    calibration_slope_se : `float`
        The standard error of the calibration slope.
    calibration_intercept : `float`
        Calibration-in-the-large estimate.
    calibration_intercept_se : `float`
        The standard error of the calibration-in-the-large.
    observed_predict_table : `pd.DataFrame`
        an optional table with observed and predicted risks.
    """
    SET_ARGS = [
        CLASS_NAME,
        NamesVal.CAL_SLOPE,
        NamesVal.CAL_SLOPE_SE,
        NamesVal.CAL_INTERCEPT,
        NamesVal.CAL_INTERCEPT_SE,
        NamesVal.CAL_TABLE,
    ]


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
class RecalibrationResults(CstatisticResults):
    """
    A results objects containing recalibration estimates.
    
    Attributes
    ----------
    slope : `float`
        The slope estimate
    intercept : `float`
        The intercept estimate
    table_recalibrated : `pd.DataFrame`
        A table with the recalibrated estimates appended.
    """
    SET_ARGS = [
        CLASS_NAME,
        NamesVal.RECAL_SLOPE,
        NamesVal.RECAL_INTERCEPT,
        NamesVal.RECAL_TABLE,
    ]


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# discrimination functions
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
def cstatistic(
    observed: pd.Series | np.ndarray,
    predicted: pd.Series | np.ndarray,
    alpha: float | None = None,
) -> CstatisticResults:
    """
    Compute Harrell's concordance statistic (c-statistic) for a model
    predicting a binary outcome.
    
    Parameters
    ----------
    observed : `pd.Series` or `np.ndarray`
        a column vector with the observed class.
    predicted : `pd.Series` or `np.ndarray`
        a column vector with the predicted class or log/predicted probability.
    alpha : `float` or `None`, default `None`
        the (1-alpha)% confidence interval. Default `None` for no confidence
        interval.
    
    Returns
    -------
    `CstatisticResults`
        Harrel's c-statistics with optional confidence interval.
    
    Notes
    -----
    This function estimates the probability that, for a randomly chosen pair
    of individuals (one with and one without the event), the predicted score is
    higher for the individual with the event. Optionally, a confidence interval
    can be calculated based on an asymptotic approximation.
    
    Predictions (predicted column) can range between +- infinity, allowing for
    evaluations of non-Prob scores.
    
    The standard error are derived based on _[1] Hanley and McNeil.
    
    References
    ----------
    .. [1] JA Hanley and BJ McNeil, "The meaning and use of the area under a
    receiver operating characteristic (ROC) curve, Radiology 1982; 29-36." see
    (Table 2).
    """
    # Check input
    is_type(observed, (pd.Series, np.ndarray), 'observed')
    is_type(predicted, (pd.Series, np.ndarray), 'predicted')
    is_type(alpha, (type(None), float), 'alpha')
    # is alpha between 0 and 1
    if not alpha is None:
        if alpha <= 0 or alpha >= 1:
            raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
    same_len(predicted, observed)
    # set pd.Series to numpy array
    if isinstance(predicted, pd.Series):
        predicted = predicted.to_numpy()
    if isinstance(observed, pd.Series):
        observed = observed.to_numpy()
    # normal versus abnormal
    norm_ = predicted[observed == 0]
    abnorm = predicted[observed == 1]
    # outer subtraction
    mat_out = np.subtract.outer(abnorm, norm_)
    # cstat
    if len(norm_) == 0 or len(abnorm) == 0:
        cstat = np.nan
    else:
        cstat = np.mean( (mat_out > 0) + 0.5 * (mat_out == 0))
    res_dict = {NamesVal.CSTAT: cstat}
    # standard error
    if alpha is not None:
        # check type
        is_type(alpha, float)
        # do stuff
        Q1 = cstat/(2-cstat)
        Q2 = (2 * cstat**2)/(1+cstat)
        nn = len(norm_)
        na = len(abnorm)
        num = ( cstat * (1 - cstat) + (na - 1)*(Q1-cstat**2) + (nn - 1)*(Q2 - cstat**2))
        denom = na * nn
        if denom == 0:
            var = np.nan
        else:
            var = num/denom
        # confidence interval (large sample size!!)
        z = np.sqrt(stats.chi2.ppf(1-alpha, 1))
        # results
        res_dict[NamesVal.CSTAT_LB] = cstat - z * np.sqrt(var)
        res_dict[NamesVal.CSTAT_UB] = cstat + z * np.sqrt(var)
        res_dict[NamesVal.CSTAT_INTERVAL] =\
            (res_dict[NamesVal.CSTAT_LB],
             res_dict[NamesVal.CSTAT_UB]
             )
        res_dict[NamesVal.CSTAT_COVERAGE] = 1-alpha
        res_dict[NamesVal.CSTAT_SE] = np.sqrt(var)
    # ignoring class warnings due to potential missing results
    warnings.filterwarnings("ignore", category=UserWarning)
    res_dict[CLASS_NAME] = 'cstatistic'
    results = CstatisticResults(**res_dict)
    warnings.resetwarnings()
    # return stuff
    return results


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
def format_roc(
    observed: pd.Series | np.ndarray,
    predicted: pd.Series | np.ndarray,
    **kwargs: Any,
) -> pd.DataFrame:
    """
    Compute the ROC curve and return it as a tidy DataFrame.
    
    Parameters
    ----------
    observed : `np.ndarray`
        A column vector of the observed binary outcomes.
    predicted : `np.ndarray`
        A column vector of the predicted outcome, e.g., representing the
        predicted probability.
    **kwargs : `Any`
        Additional keyword arguments passed to `sklearn.metrics.roc_curve`.
    
    Returns
    -------
    pd.DataFrame
        A table with the following columns:
        - `false_positive`: False positive rate at each threshold.
        - `sensitivity`: True positive rate (sensitivity).
        - `threshold`: Score thresholds used to compute the curve.
    
    Notes
    -----
    This function wraps `sklearn.metrics.roc_curve` and converts the result into
    a structured DataFrame for convenient plotting or inspection
    """
    # check input
    is_type(observed, (pd.Series, np.ndarray))
    is_type(predicted, (pd.Series, np.ndarray))
    same_len(observed,predicted)
    # get columns
    false_positive, sensitivity, threshold = roc_curve(
        y_true=observed, y_score=predicted,
        **kwargs,
    )
    # make data frame
    results = pd.DataFrame(
        {
            NamesVal.FALSE_POSITIVE: false_positive,
            NamesVal.SENSITIVITY: sensitivity,
            NamesVal.THRESHOLD: threshold,
        }
    )
    # return
    return results


# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# calibration function
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
def get_calibration(
    data: pd.DataFrame,
    observed: str,
    predicted: str,
    alpha: float = 0.05,
    bins: int | None = None,
    kwargs_ci: dict | None = None,
) -> CalibrationResults:
    """
    Estimate the calibration slope and calibration-in-the-large assuming a
    binomial data generating model. A binomial model is generally appropriate
    if the predicted risk reflects an event occurring at a fixed moment in time,
    e.g. after 1 year or 1 hour.
    
    Parameters
    ----------
    data : `pd.DataFrames`
        A table with the columns `observed` and `predicted`.
    observed : `str`
        a column name in `data` referencing the binary outcome column.
    predicted : `str`
        a column name in `data` referencing the logit predicted risk.
    bins : `int`, optional, default `NoneType`
        an optional integer used to create equally sized bins of the predicted
        logit risk and returns the average predicted and observed risk in
        each bin (an observed vs expected table)
    alpha: `float`, default 0.05
        the (1-alpha)% confidence interval. Used for the observed risk when
        bin is supplied.
    kwargs_ci : `dict` [`str`, `any`] or `None`, default `None`.
        Any keyword arguments for `beta_confidence_interval`. For example,
        to supply `integer = False`.
    
    Returns
    -------
    CalibrationResults
        A calibration intercept and slope estimates, and an optional
        observed and expected table.
    
    Notes
    -----
    The model assumes the predicted value is on the log-odds scale (logit).
    If your model outputs probabilities, use the `logit` transform beforehand.
    
    The calibration intercept is computed using a GLM with an offset, and the
    slope via standard logistic regression.
    
    The grouped calibration table is based on quantile binning of predicted
    risks.
    
    The function DOES not presuppose the predicted risk is derived from a
    logistic (binomial) regression model. ANY model predicting risk at a fixed
    moment in time is acceptable, including models that typically provide
    discrete predictions such a classification trees. Some/many models or rules
    may only provide predicted risk not the logit risk, in such cases simply
    call the `logit` function to derive the appropriate variable.
    """
    # Constants
    CS     = NamesVal.CAL_SLOPE
    CS_SE  = NamesVal.CAL_SLOPE_SE
    CIL    = NamesVal.CAL_INTERCEPT
    CIL_SE = NamesVal.CAL_INTERCEPT_SE
    # Check input
    is_type(observed, str, 'observed')
    is_type(predicted, str, 'predicted')
    is_type(alpha, float, 'alpha')
    is_type(kwargs_ci, (type(None), dict))
    # is alpha between 0 and 1
    if alpha <= 0 or alpha >= 1:
        raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
    dt = data.copy()
    is_df(dt)
    are_columns_in_df(dt, [observed] + [predicted])
    # empty kwargs
    kwargs_ci = kwargs_ci or {}
    # outcome and predictor
    out = dt[observed]
    s = sm.add_constant(dt[predicted])
    # calculating the calibration slope with standard errors
    cs_model = sm.Logit(out, s)
    try:
        fit_cs = cs_model.fit(disp=0)
    except Exception as exc:
        raise RuntimeError(f"Recalibration error: {exc}")
    # extracting data
    res_dict = {CS: fit_cs.params.iloc[1], CS_SE: fit_cs.bse.iloc[1]}
    # calculating the calibration slope with standard errors
    # NOTE the sm.Logit function ignored the offset argument
    cil_model = sm.GLM(out, s[s.columns[0]],
                       offset=s[s.columns[1]], family=sm.families.Binomial())
    try:
        fit_cil = cil_model.fit(disp=0)
    except Exception as exc:
        raise RuntimeError(f"Recalibration error: {exc}")
    # extracting data
    res_dict[CIL] =  fit_cil.params.iloc[0]
    res_dict[CIL_SE] =  fit_cil.bse.iloc[0]
    # calculating average predicted and observed risk
    if not bins == None:
        # check input
        is_type(bins, int)
        if bins > dt.shape[0]:
            raise ValueError('The number of requested bins is larger than the '
                             'number of observations ins `data`.')
        # calculate required data
        dt['bins'] = pd.qcut(dt[predicted], bins, labels=False)
        dt[NamesVal.PREDICTED_RISK] = invlogit(dt[predicted].to_numpy())
        ### calculate means
        observed_expect_table = dt.groupby('bins')[
            [NamesVal.PREDICTED_RISK, observed]].agg(['mean', 'count'])
        # remove duplicate col
        observed_expect_table.drop((observed,'count'), axis=1, inplace=True)
        # get confidence interval
        lower, upper = [], []
        for _, row in observed_expect_table.iterrows():
            kwargs_ci_new = {**kwargs_ci, **{'alpha': alpha}}
            res = beta_confidence_interval(
                row.iloc[2], int(row.iloc[1]),
                **kwargs_ci_new,
                # integer=True,
                # alpha=alpha,
            )
            l, u = res.__getattribute__(NamesI.VALUES)
            lower.append(l)
            upper.append(u)
        observed_expect_table[NamesVal.AVG_OBSERVED_RISK_LB] = lower
        observed_expect_table[NamesVal.AVG_OBSERVED_RISK_UB] = upper
        # rename
        observed_expect_table.columns = [
            NamesVal.AVG_PREDICTED_RISK, NamesVal.NO_SUBJECTS,
            NamesVal.AVG_OBSERVED_RISK, NamesVal.AVG_OBSERVED_RISK_LB,
            NamesVal.AVG_OBSERVED_RISK_UB,
        ]
        # reorder
        observed_expect_table = observed_expect_table[
            [NamesVal.AVG_PREDICTED_RISK, NamesVal.AVG_OBSERVED_RISK,
             NamesVal.AVG_OBSERVED_RISK_LB, NamesVal.AVG_OBSERVED_RISK_UB,
             NamesVal.NO_SUBJECTS]
        ]
        res_dict[NamesVal.CAL_TABLE] = observed_expect_table
    # ignoring class warnings due to potential missing results
    warnings.filterwarnings("ignore", category=UserWarning)
    res_dict[CLASS_NAME] = 'get_calibration'
    results = CalibrationResults(**res_dict)
    warnings.resetwarnings()
    # returns results class
    return results


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
def recalibrate(
    data: pd.DataFrame,
    score: str,
    observed: str,
    model: str = 'binomial',
) -> RecalibrationResults:
    """
    The original `score` will be re-trained on observed outcome data, improving
    the agreement between the observed and predicted risk/outcome.
    
    Parameters
    ----------
    data : `pd.DataFrame`
        A table with columns `score` and `observed`.
    observed : `str`
        a column name in `data`
    score : `str`
        a column name in `data`. Note that contrary to the calibration function
        the score can be in any format depending on intended use. For binomial
        model one would typically want to supply a `logit risk`.
    model :  `str`, default `binomial`
        which model should be used (default: 'binomial', or 'gaussian')
    
    Returns
    -------
    RecalibrationResults
        The recalibration intercept and slope, as well as a table with the
        recalibrated predictions.
    """
    # Constants
    BINOM = NamesVal.RECAL_BINOMIAL; GAUSS = NamesVal.RECAL_GAUSSIAN
    SLOPE = NamesVal.RECAL_SLOPE; INTERCEPT = NamesVal.RECAL_INTERCEPT
    # Check input
    dt = data.copy()
    is_df(dt)
    are_columns_in_df(dt, [observed, score])
    if not any([i in model for i in [BINOM, GAUSS]]):
        raise NameError("Error: model should equal {0} or {1}.".format(
            BINOM, GAUSS))
    # Recalibrate the score
    out = dt[observed]
    s = sm.add_constant(dt[score])
    if model == "binomial":
        # recalibrating the model
        recal_model = sm.Logit(out, s)
        try:
            fit = recal_model.fit(disp=0)
        except Exception as exc:
            raise RuntimeError(f"Recalibration error: {exc}")
    if model == "gaussian":
        # recalibrating the model
        recal_model = sm.OLS(out, s)
        try:
            fit = recal_model.fit(disp=0)
        except Exception as exc:
            raise RuntimeError(f"Recalibration error {exc}")
    # results
    dt_recal = pd.DataFrame({score: fit.predict()})
    results = {INTERCEPT: fit.params.iloc[0], SLOPE: fit.params.iloc[1],
               NamesVal.RECAL_TABLE: dt_recal}
    # returning stuff
    results[CLASS_NAME] = 'recalibrate'
    return RecalibrationResults(**results)


# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

[docs]
class ConcordanceCorrelationCoefficient(object):
    """
    Compute Lin's Concordance Correlation Coefficient (CCC) between two
    continuous variables.
    
    Parameters
    ----------
    x : `np.ndarray`
        First set of measurements.
    y : `np.ndarray`
        Second set of measurements.
    
    Attributes
    ----------
    ccc : `float`
        Concordance correlation coefficient.
    ccc_ci : `tuple` [`float`, `float`]
    pearson_correlation : `float`
        Pearson correlation coefficient between x and y.
    bias_correction : `float`
        Bias correction factor accounting for scale and location mismatch.
    scale_factor : `float`
        The vertical scaling constant (σ_X / σ_Y).
    translation_constant : `float`
        The vertical translation constant ((μ_X - μ_Y) / sqrt(σ_X σ_Y)).
    
    Notes
    -----
    The CCC is defined as:
    
        CCC = 2ρ σ_X σ_Y / (σ_X² + σ_Y² + (μ_X - μ_Y)²)
    
    where ρ is the Pearson correlation, σ_X and σ_Y are the standard deviations
    of x and y, and μ_X, μ_Y their respective means.
    
    The CCC combines measures of precision and accuracy to assess agreement
    between paired measurements. It is often used in method comparison studies,
    biometrics, and reliability analysis.
    
    It penalises for both lack of correlation and systematic bias (e.g. shifts
    in location or scale), in contrast to the Pearson correlation which captures
    only linear association.
    
    The CCC is conceptually related to the Intraclass Correlation Coefficient
    (ICC).  Both are measures of agreement, but the CCC is specifically
    designed for paired continuous data and includes a built-in bias penalty.
    The ICC is more general and often used in the context of multiple raters or
    repeated measurements under different conditions.
    
    References
    ----------
    .. [2] Lin, L. I.-K. (1989). A concordance correlation coefficient to
           evaluate reproducibility. *Biometrics*, 45(1), 255–268.
    """
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def __init__(self, x: np.ndarray, y: np.ndarray) -> None:
        """Initialise a ConcordanceCorrelationCoefficient instance."""
        # ### check input
        is_type(x, np.ndarray)
        is_type(y, np.ndarray)
        # confirm length
        same_len(x, y, ['x', 'y'])
        # ### assign
        self.x = np.asarray(x)
        self.y = np.asarray(y)
        # None
        setattr(self, NamesVal.CCC_S, np.nan)
        setattr(self, NamesVal.CCC_S_CI, (np.nan, np.nan))
        setattr(self, NamesVal.CCC_S_COR, np.nan)
        setattr(self, NamesVal.CCC_S_BIAS, np.nan)
        setattr(self, NamesVal.CCC_S_SCALE, np.nan)
        setattr(self, NamesVal.CCC_S_TRANS, np.nan)
        setattr(self, NamesI.COVERAGE, np.nan)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def __call__(self, alpha: float = 0.05,
                 ci_method: Literal["delta", "fisher"] = 'delta',
                 ) -> float:
        """
        Calculates the concordance correlation coefficient (CCC) and its
        confidence interval.
        
        Parameters
        ----------
        alpha : `float`, default `0.05`
            Significance level for the confidence interval.
        ci_method : {"delta", "fisher"}, default `delta`
            Method to compute the confidence interval, either using the delta
            method or Fisher's z-transformation
        
        Returns
        -------
        float
            The estimated concordance correlation coefficient.
        
        """
        # ### check input
        is_type(alpha, float)
        is_type(ci_method, str)
        # is alpha between 0 and 1
        if alpha <= 0 or alpha >= 1:
            raise ValueError(Error_MSG.FLOAT_LIMITS.format('alpha', 0, 1))
        # confirm ci_method name
        EXP_METHOD = [NamesVal.CCC_METHOD_F,
                      NamesVal.CCC_METHOD_D,
                      ]
        if not ci_method.lower() in EXP_METHOD:
            raise ValueError(
                Error_MSG.INCORRECT_STRING_INPUT.format(
                    'ci_method', EXP_METHOD))
        # assign
        self.alpha = alpha
        self.ci_method = ci_method.lower()
        # ### Get calculations
        res_dict = self._compute_ccc()
        # assign to self
        setattr(self, NamesVal.CCC_S, res_dict[NamesVal.CCC],)
        setattr(self, NamesVal.CCC_S_CI, res_dict[NamesVal.CCC_CI], )
        setattr(self, NamesVal.CCC_S_COR, res_dict[NamesVal.CCC_COR], )
        setattr(self, NamesVal.CCC_S_BIAS, res_dict[NamesVal.CCC_BIAS], )
        setattr(self, NamesVal.CCC_S_SCALE, res_dict[NamesVal.CCC_SCALE], )
        setattr(self, NamesVal.CCC_S_TRANS, res_dict[NamesVal.CCC_TRANS], )
        setattr(self, NamesI.COVERAGE, 1-self.alpha)
        # return
        return res_dict[NamesVal.CCC]
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def _compute_ccc(self, ddof: int = 1) -> dict[str, float]:
        """
        Calculate the Concordance Correlation Coefficient.
        
        Parameters
        ----------
        ddof : `int`, default 1
            The divisor in the (co-)variance calculations (N-ddof), passed to
            np.var and np.cov.
        
        Returns
        -------
        dict
            A dictionary with the concordance correlation cofficients along
            with confidence intervals, the Spearman correlation,
            the bias factor, and the horizontal scale and translation factor.
        """
        x, y = self.x, self.y
        n = len(x)
        var_scale = (n-1)/n
        var_x = np.var(x, ddof=ddof)*var_scale
        var_y = np.var(y, ddof=ddof)*var_scale
        # s_x = np.sqrt(var_x)
        # s_y = np.sqrt(var_y)
        # Pearson correlation
        rho = np.cov(x, y, ddof=ddof)[0, 1]/(
            np.sqrt(np.var(x,ddof=ddof)) * np.sqrt(np.var(y, ddof=ddof)))
        # the horizontal scale and translation factors
        delta = np.mean(y) -np.mean(x)
        scale = np.std(y, ddof=ddof)/np.std(x, ddof=ddof)
        trans = delta / (var_x * var_y)**(1/4)
        # The Pearson penalisation factor
        bias_correction = (2 * np.sqrt(var_x * var_y))/(var_x + var_y + delta**2)
        # getting the ccc
        ccc = rho * bias_correction
        # The critical value
        z_crit = stats.norm.ppf(1 - self.alpha / 2)
        # The confidence interval
        if self.ci_method == NamesVal.CCC_METHOD_F:
            lower, upper = self._fisher_ci(ccc, rho, trans, n, z_crit)
        else:
            lower, upper, _ = self._delta_method_ci(
                ccc, rho, trans, n, z_crit,
            )
        # return
        return {
            NamesVal.CCC       : ccc,
            NamesVal.CCC_CI    : (lower, upper),
            NamesVal.CCC_COR   : rho,
            NamesVal.CCC_BIAS  : bias_correction,
            NamesVal.CCC_SCALE : scale,
            NamesVal.CCC_TRANS : trans,
        }
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def _fisher_ci(self, ccc: float, rho: float, trans: float, n: int,
                   z_crit: float,
                   ) -> tuple[float, float]:
        """
        Calculates Fisher's z-transformation based confidence interval for the
        CCC.
        
        Parameters
        ----------
        ccc : float
            Point estimate of the CCC.
        rho : float
            Pearson correlation coefficient.
        trans : float
            Translation constant.
        n : int
            Number of observations.
        z_crit : float
            Critical value from a standard normal distribution for the desired
            confidence level.
        
        Returns
        -------
        tuple of float
            Lower and upper bounds of the CCC confidence interval.
        
        Notes
        -----
        Applies Fisher’s z-transformation to the CCC and computes symmetric
        confidence limits on the z-scale, which are then back-transformed to
        the original scale.
        
        """
        # first get the delta method se
        _, _, se_delta = self._delta_method_ci(ccc, rho, trans, n, z_crit)
        # map ccc to the z-scale
        z = 0.5 * np.log((1 + ccc) / (1 - ccc))
        se_z = se_delta/(1-ccc**2)
        z_lower = z - z_crit * se_z
        z_upper = z + z_crit * se_z
        # mapp to the original scale
        ci_lower = (np.exp(2 * z_lower) - 1) / (np.exp(2 * z_lower) + 1)
        ci_upper = (np.exp(2 * z_upper) - 1) / (np.exp(2 * z_upper) + 1)
        # return
        return ci_lower, ci_upper
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def _delta_method_ci(self, ccc: float, rho: float, trans: float, n: int,
                         z_crit: float,
                         ) -> tuple[float, float, float]:
        """
        Calculates the delta method based confidence interval for the CCC.
        
        Parameters
        ----------
        ccc : float
            Point estimate of the concordance correlation coefficient.
        rho : float
            Pearson correlation coefficient.
        trans : float
            Translation constant.
        n : int
            Number of observations.
        z_crit : float
            Critical value from a standard normal distribution for the desired
            confidence level.
        
        Returns
        -------
        tuple of floats
            Lower bound, upper bound, and standard error of the CCC.
        
        Notes
        -----
        The delta method uses a Taylor expansion to approximate the standard
        error of the CCC, from which symmetric confidence intervals are derived.
        
        """
        # calculate the standard error
        term1 = (1 - rho**2) * ccc**2 * (1-ccc**2)/rho**2
        term2 = 2 * ccc**3 * (1-ccc)*trans**2/rho
        term3 = 0.5 * ccc**4 * trans**4/rho**2
        var_ccc = (term1 + term2 - term3)/(n-2)
        se_ccc = np.sqrt(var_ccc)
        # get the lower and upper bounds
        ci_lower = ccc - z_crit * se_ccc
        ci_upper = ccc + z_crit * se_ccc
        # return
        return ci_lower, ci_upper, se_ccc
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def __str__(self) -> str:
        ci_ = getattr(self, NamesVal.CCC_S_CI)
        return (
            f"CCC: {getattr(self, NamesVal.CCC_S):.4f} "
            f"(95%CI: {ci_[0]:.4f}; {ci_[1]:.4f})\n"
            f"Pearson correlation: {getattr(self, NamesVal.CCC_S_COR):.4f}\n"
            f"Bias correction factor: {getattr(self, NamesVal.CCC_S_BIAS):.4f}\n"
            f"Scale factor: {getattr(self, NamesVal.CCC_S_SCALE):.4f}\n"
            f"Translation constants: {getattr(self, NamesVal.CCC_S_TRANS):.4f}"
        )
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    def __repr__(self) -> str:
        CLASS_NAME = type(self).__name__
        x_preview = np.array2string(self.x, precision=2, threshold=5, edgeitems=2)
        y_preview = np.array2string(self.y, precision=2, threshold=5, edgeitems=2)
        return f"{CLASS_NAME}(x={x_preview}, y={y_preview})"