"""Inference for Statistical Models."""
from pyhf.infer import utils
from pyhf import get_backend
from pyhf import exceptions
def _check_hypotest_prerequisites(pdf, data, init_pars, par_bounds, fixed_params):
if pdf.config.poi_index is None:
raise exceptions.UnspecifiedPOI(
'No POI is defined. A POI is required to run a hypothesis test.'
)
if not utils.all_pois_floating(pdf, fixed_params):
raise exceptions.InvalidModel(
f'POI at index [{pdf.config.poi_index}] is set as fixed, which makes profile likelihood ratio based inference impossible. Please unfix the POI to continue.'
)
[docs]
def hypotest(
poi_test,
data,
pdf,
init_pars=None,
par_bounds=None,
fixed_params=None,
calctype="asymptotics",
return_tail_probs=False,
return_expected=False,
return_expected_set=False,
return_calculator=False,
**kwargs,
):
r"""
Compute :math:`p`-values and test statistics for a single value of the parameter of interest.
See :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` and :py:class:`~pyhf.infer.calculators.ToyCalculator` on additional keyword arguments to be specified.
Example:
>>> import pyhf
>>> pyhf.set_backend("numpy")
>>> model = pyhf.simplemodels.uncorrelated_background(
... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
... )
>>> observations = [51, 48]
>>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
>>> mu_test = 1.0
>>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest(
... mu_test, data, model, return_expected_set=True, test_stat="qtilde"
... )
>>> CLs_obs
array(0.05251497)
>>> CLs_exp_band
[array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]
Args:
poi_test (Number or Tensor): The value of the parameter of interest (POI)
data (Number or Tensor): The data considered
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``
init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization.
par_bounds (:obj:`tensor`): The extrema of values the model parameters
are allowed to reach in the fit.
The shape should be ``(n, 2)`` for ``n`` model parameters.
fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter
constant to its starting value during minimization.
calctype (:obj:`str`): The calculator to create. Choose either 'asymptotics' (default) or 'toybased'.
return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
return_calculator (:obj:`bool`): Bool for returning calculator.
Returns:
Tuple of Floats and lists of Floats and
a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance:
- :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
defined in :xref:`arXiv:1007.1727` as
.. math::
\mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}}
to protect against excluding signal models in which there is little
sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha`
the given signal model is excluded.
- :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The
signal + background model hypothesis :math:`p`-value
.. math::
\mathrm{CL}_{s+b} = p_{s+b}
= p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right)
= \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq
= 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right)
and 1 minus the background only model hypothesis :math:`p`-value
.. math::
\mathrm{CL}_{b} = 1- p_{b}
= p\left(q \geq q_{\mathrm{obs}}\middle|b\right)
= 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq
= 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right)
for signal strength :math:`\mu` and model hypothesis signal strength
:math:`\mu'`, where the cumulative density functions
:math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57)
and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test
statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`.
Only returned when ``return_tail_probs`` is ``True``.
.. note::
The definitions of the :math:`\mathrm{CL}_{s+b}` and
:math:`\mathrm{CL}_{b}` used are based on profile likelihood
ratio test statistics.
This procedure is common in the LHC-era, but differs from
procedures used in the LEP and Tevatron eras, as briefly
discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`.
- :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}`
value corresponding to the test statistic under the background
only hypothesis :math:`\left(\mu=0\right)`.
Only returned when ``return_expected`` is ``True``.
- :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected
:math:`\mathrm{CL}_{s}` values corresponding to the median
significance of variations of the signal strength from the
background only hypothesis :math:`\left(\mu=0\right)` at
:math:`(-2,-1,0,1,2)\sigma`.
That is, the :math:`p`-values that satisfy Equation (89) of
:xref:`arXiv:1007.1727`
.. math::
\mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma
for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`.
These values define the boundaries of an uncertainty band sometimes
referred to as the "Brazil band".
Only returned when ``return_expected_set`` is ``True``.
- a calculator: The calculator instance used in the computation of the :math:`p`-values.
Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
or :py:class:`~pyhf.infer.calculators.ToyCalculator`,
depending on the value of ``calctype``.
Only returned when ``return_calculator`` is ``True``.
"""
init_pars = init_pars or pdf.config.suggested_init()
par_bounds = par_bounds or pdf.config.suggested_bounds()
fixed_params = fixed_params or pdf.config.suggested_fixed()
_check_hypotest_prerequisites(pdf, data, init_pars, par_bounds, fixed_params)
calc = utils.create_calculator(
calctype,
data,
pdf,
init_pars,
par_bounds,
fixed_params,
**kwargs,
)
teststat = calc.teststatistic(poi_test)
sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test)
tb, _ = get_backend()
CLsb_obs, CLb_obs, CLs_obs = tuple(
tb.astensor(pvalue)
for pvalue in calc.pvalues(
teststat, sig_plus_bkg_distribution, bkg_only_distribution
)
)
CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues(
sig_plus_bkg_distribution, bkg_only_distribution
)
is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0'
_returns = [CLsb_obs if is_q0 else CLs_obs]
if return_tail_probs:
if is_q0:
_returns.append([CLb_obs])
else:
_returns.append([CLsb_obs, CLb_obs])
pvalues_exp_band = [
tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp)
]
if return_expected_set:
if return_expected:
_returns.append(tb.astensor(pvalues_exp_band[2]))
_returns.append(pvalues_exp_band)
elif return_expected:
_returns.append(tb.astensor(pvalues_exp_band[2]))
if return_calculator:
_returns.append(calc)
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]
from pyhf.infer import intervals
__all__ = ["hypotest", "calculators", "intervals", "mle", "test_statistics", "utils"]
def __dir__():
return __all__