Source code for pyhf.infer

"""Inference for Statistical Models."""

from pyhf.infer import utils
from pyhf import get_backend
from pyhf import exceptions


def _check_hypotest_prerequisites(pdf, data, init_pars, par_bounds, fixed_params):
    if pdf.config.poi_index is None:
        raise exceptions.UnspecifiedPOI(
            'No POI is defined. A POI is required to run a hypothesis test.'
        )

    if not utils.all_pois_floating(pdf, fixed_params):
        raise exceptions.InvalidModel(
            f'POI at index [{pdf.config.poi_index}] is set as fixed, which makes profile likelihood ratio based inference impossible. Please unfix the POI to continue.'
        )


[docs] def hypotest( poi_test, data, pdf, init_pars=None, par_bounds=None, fixed_params=None, calctype="asymptotics", return_tail_probs=False, return_expected=False, return_expected_set=False, return_calculator=False, **kwargs, ): r""" Compute :math:`p`-values and test statistics for a single value of the parameter of interest. See :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` and :py:class:`~pyhf.infer.calculators.ToyCalculator` on additional keyword arguments to be specified. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata) >>> mu_test = 1.0 >>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest( ... mu_test, data, model, return_expected_set=True, test_stat="qtilde" ... ) >>> CLs_obs array(0.05251497) >>> CLs_exp_band [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)] Args: poi_test (Number or Tensor): The value of the parameter of interest (POI) data (Number or Tensor): The data considered pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json`` init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization. par_bounds (:obj:`tensor`): The extrema of values the model parameters are allowed to reach in the fit. The shape should be ``(n, 2)`` for ``n`` model parameters. fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter constant to its starting value during minimization. calctype (:obj:`str`): The calculator to create. Choose either 'asymptotics' (default) or 'toybased'. return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}` return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}` return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band" return_calculator (:obj:`bool`): Bool for returning calculator. Returns: Tuple of Floats and lists of Floats and a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance: - :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to the given threshold :math:`\alpha`, typically taken to be :math:`0.05`, defined in :xref:`arXiv:1007.1727` as .. math:: \mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}} to protect against excluding signal models in which there is little sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha` the given signal model is excluded. - :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The signal + background model hypothesis :math:`p`-value .. math:: \mathrm{CL}_{s+b} = p_{s+b} = p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right) = \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right) and 1 minus the background only model hypothesis :math:`p`-value .. math:: \mathrm{CL}_{b} = 1- p_{b} = p\left(q \geq q_{\mathrm{obs}}\middle|b\right) = 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right) for signal strength :math:`\mu` and model hypothesis signal strength :math:`\mu'`, where the cumulative density functions :math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57) and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`. Only returned when ``return_tail_probs`` is ``True``. .. note:: The definitions of the :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}` used are based on profile likelihood ratio test statistics. This procedure is common in the LHC-era, but differs from procedures used in the LEP and Tevatron eras, as briefly discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`. - :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}` value corresponding to the test statistic under the background only hypothesis :math:`\left(\mu=0\right)`. Only returned when ``return_expected`` is ``True``. - :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. That is, the :math:`p`-values that satisfy Equation (89) of :xref:`arXiv:1007.1727` .. math:: \mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`. These values define the boundaries of an uncertainty band sometimes referred to as the "Brazil band". Only returned when ``return_expected_set`` is ``True``. - a calculator: The calculator instance used in the computation of the :math:`p`-values. Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator` or :py:class:`~pyhf.infer.calculators.ToyCalculator`, depending on the value of ``calctype``. Only returned when ``return_calculator`` is ``True``. """ init_pars = init_pars or pdf.config.suggested_init() par_bounds = par_bounds or pdf.config.suggested_bounds() fixed_params = fixed_params or pdf.config.suggested_fixed() _check_hypotest_prerequisites(pdf, data, init_pars, par_bounds, fixed_params) calc = utils.create_calculator( calctype, data, pdf, init_pars, par_bounds, fixed_params, **kwargs, ) teststat = calc.teststatistic(poi_test) sig_plus_bkg_distribution, bkg_only_distribution = calc.distributions(poi_test) tb, _ = get_backend() CLsb_obs, CLb_obs, CLs_obs = tuple( tb.astensor(pvalue) for pvalue in calc.pvalues( teststat, sig_plus_bkg_distribution, bkg_only_distribution ) ) CLsb_exp, CLb_exp, CLs_exp = calc.expected_pvalues( sig_plus_bkg_distribution, bkg_only_distribution ) is_q0 = kwargs.get('test_stat', 'qtilde') == 'q0' _returns = [CLsb_obs if is_q0 else CLs_obs] if return_tail_probs: if is_q0: _returns.append([CLb_obs]) else: _returns.append([CLsb_obs, CLb_obs]) pvalues_exp_band = [ tb.astensor(pvalue) for pvalue in (CLsb_exp if is_q0 else CLs_exp) ] if return_expected_set: if return_expected: _returns.append(tb.astensor(pvalues_exp_band[2])) _returns.append(pvalues_exp_band) elif return_expected: _returns.append(tb.astensor(pvalues_exp_band[2])) if return_calculator: _returns.append(calc) # Enforce a consistent return type of the observed CLs return tuple(_returns) if len(_returns) > 1 else _returns[0]
from pyhf.infer import intervals __all__ = ["hypotest", "calculators", "intervals", "mle", "test_statistics", "utils"] def __dir__(): return __all__