"""
Statistical tests for null-hypothesis significance testing.
This module provides a set of tools for conducting null-hypothesis
significance tests, including classical approaches such as the
Kolmogorov–Smirnov test, Wald tests, and one-way ANOVA. These methods
operate on either raw or summarised data, and return structured
results encapsulated in specialised results classes.
Classes
-------
TestResults
A container for general hypothesis test results, including point estimate,
standard error, test statistic, p-value, and null value.
AnovaTestResults
A subclass of `TestResults` tailored for one-way ANOVA output, including
degrees of freedom and sums of squares.
Functions
---------
ks_test(data, group, values, nulldistribution='uniform')
Performs the Kolmogorov–Smirnov test for each group in the data.
wald_interaction_test(point, se, null_value=0.0)
Performs a Wald test comparing two estimates with associated standard
errors.
anova_one_way(means, variances, sizes)
Computes one-way ANOVA using only group-level summary statistics.
correlation_test(correlation, size, null_value=0.0)
Performs a Fisher z-test on a correlation coefficient.
"""
from typing import Any
import pandas as pd
import numpy as np
from scipy import stats as ss
from scipy.stats import f
from stats_misc.utils.general import (
calculate_pvalue,
calculate_pvalue_fdist,
)
from stats_misc.constants import (
Error_MSG,
NamesTest,
CLASS_NAME,
)
from stats_misc.errors import (
is_type,
same_len,
)
from stats_misc.utils.helpers import (
Results,
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class TestResults(Results):
"""
A container for results of a general statistical test.
Attributes
----------
point_estimate : `float`
The point estimate.
standard_error : `float`
The standard error of the point estimate.
test_statistic : `float`
The test statistic.
p_value : `float`
The p-value associated with the test statistic evaluated against the
null hypothesis value.
null_value : `float`
The null hypothesis value.
"""
SET_ARGS = [
CLASS_NAME,
NamesTest.STATISTIC,
NamesTest.POINT,
NamesTest.POINT_SE,
NamesTest.PVALUE,
NamesTest.NULL_VALUE,
]
def __init__(self, **kwargs) -> None:
super().__init__(set_args=self.SET_ARGS, **kwargs)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
class AnovaTestResults(TestResults):
"""
A container for one-way ANOVA test results.
Attributes
----------
test_statistic : float
The F-statistic.
p_value : float
The p-value associated with the F-statistic.
explained_sum_squares : float
The sum of squares explained by the model (between-groups sum of squares).
residual_sum_squares : float
The sum of squares of residuals (within-groups sum of squares).
df_numerator : int
Degrees of freedom for the numerator (between groups).
df_denominator : int
Degrees of freedom for the denominator (within groups).
"""
SET_ARGS = [
CLASS_NAME,
NamesTest.STATISTIC,
NamesTest.PVALUE,
NamesTest.ESS,
NamesTest.RSS,
NamesTest.DF_NUM,
NamesTest.DF_DENUM,
]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def ks_test(data:pd.DataFrame, group:str, values:str,
nulldistribution:str='uniform') -> dict[str, Any]:
"""
Perform the Kolmogorov–Smirnov test across grouped data.
Applies a one-sample Kolmogorov–Smirnov test to subgroups within a
DataFrame, assessing whether each group's distribution differs from a
specified null distribution.
Parameters
----------
data : `pd.DataFrame`
A table containing column names `group` and `values` refer to.
group : `str`
Column name in `data` used to define group membership.
values : `str`
Column name in `data` containing the values to be tested.
nulldistribution : str, default `uniform`
The reference distribution for the KS test. Must be a valid name
from `scipy.stats`.
Returns
-------
dict
A dictionary mapping each group label to the corresponding KS test
result object.
"""
ks_res = {}
for c in data[group].unique():
temp = data[data[group] == c][values]
ks_res[c] = ss.kstest(temp[np.isnan(temp) == False], nulldistribution)
# return
return ks_res
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def wald_interaction_test(point:tuple[float, float] | list[float],
se:tuple[float, float] | list[float],
null_value:float=0.0,
) -> TestResults:
"""
Perform a Wald test on the difference between two point estimates.
Tests whether the observed difference between two point estimates
significantly deviates from a specified null value, using a normal
approximation.
Parameters
----------
point : `tuple` or `list` [`float`, `float`]
Two point estimates, for example the mean difference or log odds ratio.
se : `tuple` or `list` [`float`, `float`]
The two standard errors of the point estimates.
null_value : `float`, default 0.0
The hypothesised difference under the null hypothesis.
Returns
-------
TestResults
A results object containing the test statistic, p-value,
estimated difference, standard error, and null value
"""
# check type
is_type(point, (tuple, list, np.ndarray))
is_type(se, (tuple, list, np.ndarray))
is_type(null_value, (int, float))
if len(point) != 2:
raise ValueError(
Error_MSG.INVALID_EXACT_LENGTH.format('point', 2, len(point)))
if len(se) != 2:
raise ValueError(
Error_MSG.INVALID_EXACT_LENGTH.format('se', 2, len(se)))
# get the z-statistic
diff = point[0] - point[1]
se = np.sqrt(se[0]**2 + se[1]**2)
zstat = (diff - null_value)/se
# get p-value
pvalue = calculate_pvalue(zstat)
# return
return TestResults(**{
CLASS_NAME : 'wald_interaction_test',
NamesTest.POINT : diff,
NamesTest.POINT_SE : se,
NamesTest.STATISTIC : zstat,
NamesTest.PVALUE : pvalue,
NamesTest.NULL_VALUE : null_value,
})
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def anova_one_way(means:list[int|float], variances:list[int|float],
sizes:list[int],
) -> AnovaTestResults:
"""
Perform one-way ANOVA using summary statistics.
Computes the F-statistic and corresponding p-value for a one-way
analysis of variance, given group-level means, variances, and sample sizes.
Parameters
----------
means : `list` [`int` | `float`]
Sample means for each group.
variances : `list` [`int | `float`]
Sample variances for each group.
sizes : `list` [`int`]
Sample sizes of each group.
Returns
-------
AnovaTestResults
A results object containing the F-statistic, p-value, sum of
squares (explained and residual), and degrees of freedom.
Notes
-----
This implementation is suitable when only group-level summary
statistics are available, in contrast to the standard ANOVA
method which requires individual-level observations.
"""
# ### confirm input
is_type(means, list)
is_type(variances, list)
is_type(sizes, list)
# check length
same_len(means, variances, ['means', 'variances'])
same_len(means, sizes, ['means', 'sizes'])
# ### get dervied information
grand_mean = sum(m*s for m, s in zip(means, sizes))/sum(sizes)
df_num = len(sizes) - 1
df_den = sum(sizes) - len(sizes)
# explained sum of squares
ess = sum(s*(m-grand_mean)**2 for m,s in zip(means, sizes))
# residual sum of squares
rss = sum(v*(s-1) for v,s in zip(variances, sizes))
# ### calculate the test statistic and p-value
fstat = (ess/df_num)/(rss/df_den)
pval = calculate_pvalue_fdist(fstat, df_num, df_den,)
# return
return AnovaTestResults(**{
CLASS_NAME : 'anova_one_way',
NamesTest.STATISTIC : fstat,
NamesTest.PVALUE : pval,
NamesTest.ESS : ess,
NamesTest.RSS : rss,
NamesTest.DF_NUM : df_num,
NamesTest.DF_DENUM : df_den,
})
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[docs]
def correlation_test(
correlation: float,
size: int,
null_value: float = 0.0,
) -> TestResults:
"""
Perform a Fisher z-test on a correlation coefficient.
Uses the Fisher z-transformation, which is asymptotically valid for
both zero and non-zero null hypotheses.
Parameters
----------
correlation : `float`
The correlation coefficient.
size : `int`
Sample size. Must be >= 5.
null_value : `float`, default 0.0
Null hypothesis value for the correlation.
Returns
-------
TestResults
A results object containing the test statistic, p-value,
point estimate, and null value
Notes
-----
The Fisher z-statistic is computed as:
.. math::
z = (\\text{arctanh}(r) - \\text{arctanh}(\\rho_0)) \\times \\sqrt{n - 3}
and evaluated against a standard normal distribution.
The results assymptotically valid for reasonably large sample
size of 20 or larger. For smaller sample sizes try a permutation test.
"""
# input validation
is_type(correlation, float)
is_type(size, (float, int))
is_type(null_value, (float, int))
if size < 5:
raise ValueError("`size` must be >= 5 for the Fisher z-test.")
# the actual test
z_stat = (np.arctanh(correlation) - np.arctanh(null_value)) *\
np.sqrt(size - 3)
pvalue = calculate_pvalue(z_stat)
return TestResults(**{
CLASS_NAME : 'correlation_test',
NamesTest.POINT : correlation,
NamesTest.POINT_SE : None,
NamesTest.STATISTIC : z_stat,
NamesTest.PVALUE : pvalue,
NamesTest.NULL_VALUE : null_value,
})