Source code for pyhf.infer.calculators

"""
Calculators for Hypothesis Testing.

The role of the calculators is to compute test statistic and
provide distributions of said test statistic under various
hypotheses.

Using the calculators hypothesis tests can then be performed.
"""

from pyhf.infer.mle import fixed_poi_fit
from pyhf import get_backend
from pyhf.infer import utils
import tqdm

from dataclasses import dataclass
import logging

log = logging.getLogger(__name__)

__all__ = [
    "AsymptoticCalculator",
    "AsymptoticTestStatDistribution",
    "EmpiricalDistribution",
    "ToyCalculator",
    "generate_asimov_data",
]


def __dir__():
    return __all__


[docs] def generate_asimov_data( asimov_mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False ): """ Compute Asimov Dataset (expected yields at best-fit values) for a given POI value. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> pyhf.infer.calculators.generate_asimov_data(mu_test, data, model, None, None, None) array([ 60.61229858, 56.52802479, 270.06832542, 48.31545488]) >>> pyhf.infer.calculators.generate_asimov_data( ... mu_test, data, model, None, None, None, return_fitted_pars=True ... ) (array([ 60.61229858, 56.52802479, 270.06832542, 48.31545488]), array([1. , 0.97224597, 0.87553894])) Args: asimov_mu (:obj:`float`): The value for the parameter of interest to be used. data (:obj:`tensor`): The observed data. pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``. init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization. par_bounds (:obj:`tensor`): The extrema of values the model parameters are allowed to reach in the fit. The shape should be ``(n, 2)`` for ``n`` model parameters. fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter constant to its starting value during minimization. return_fitted_pars (:obj:`bool`): Return the best-fit parameter values for the given ``asimov_mu``. Returns: A Tensor or a Tuple of two Tensors: - The Asimov dataset. - The Asimov parameters. Only returned if ``return_fitted_pars`` is ``True``. """ bestfit_nuisance_asimov = fixed_poi_fit( asimov_mu, data, pdf, init_pars, par_bounds, fixed_params ) asimov_data = pdf.expected_data(bestfit_nuisance_asimov) if return_fitted_pars: return asimov_data, bestfit_nuisance_asimov return asimov_data
[docs] class AsymptoticTestStatDistribution: r""" The distribution the test statistic in the asymptotic case. Note: These distributions are in :math:`-\hat{\mu}/\sigma` space. In the ROOT implementation the same sigma is assumed for both hypotheses and :math:`p`-values etc are computed in that space. This assumption is necessarily valid, but we keep this for compatibility reasons. In the :math:`-\hat{\mu}/\sigma` space, the test statistic (i.e. :math:`\hat{\mu}/\sigma`) is normally distributed with unit variance and its mean at the :math:`-\mu'`, where :math:`\mu'` is the true poi value of the hypothesis. """
[docs] def __init__(self, shift, cutoff=float("-inf")): """ Asymptotic test statistic distribution. Args: shift (:obj:`float`): The displacement of the test statistic distribution. Returns: ~pyhf.infer.calculators.AsymptoticTestStatDistribution: The asymptotic distribution of test statistic. """ self.shift = shift self.cutoff = cutoff
[docs] def cdf(self, value): """ Compute the value of the cumulative distribution function for a given value of the test statistic. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0) >>> bkg_dist.cdf(0.0) 0.5 Args: value (:obj:`float`): The test statistic value. Returns: Float: The integrated probability to observe a test statistic less than or equal to the observed ``value``. """ tensorlib, _ = get_backend() return tensorlib.normal_cdf(value - self.shift)
[docs] def pvalue(self, value): r""" The :math:`p`-value for a given value of the test statistic corresponding to signal strength :math:`\mu` and Asimov strength :math:`\mu'` as defined in Equations (59) and (57) of :xref:`arXiv:1007.1727` .. math:: p_{\mu} = 1-F\left(q_{\mu}\middle|\mu'\right) = 1- \Phi\left(\sqrt{q_{\mu}} - \frac{\left(\mu-\mu'\right)}{\sigma}\right) with Equation (29) .. math:: \frac{(\mu-\mu')}{\sigma} = \sqrt{\Lambda}= \sqrt{q_{\mu,A}} given the observed test statistics :math:`q_{\mu}` and :math:`q_{\mu,A}`. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0) >>> bkg_dist.pvalue(0.0) array(0.5) Args: value (:obj:`float`): The test statistic value. Returns: Tensor: The integrated probability to observe a value at least as large as the observed one. """ tensorlib, _ = get_backend() # computing cdf(-x) instead of 1-cdf(x) for right-tail p-value for improved numerical stability return_value = tensorlib.normal_cdf(-(value - self.shift)) invalid_value = tensorlib.ones(tensorlib.shape(return_value)) * float("nan") return tensorlib.where( tensorlib.astensor(value >= self.cutoff, dtype="bool"), return_value, invalid_value, )
[docs] def expected_value(self, nsigma): """ Return the expected value of the test statistic. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> bkg_dist = pyhf.infer.calculators.AsymptoticTestStatDistribution(0.0) >>> n_sigma = pyhf.tensorlib.astensor([-2, -1, 0, 1, 2]) >>> bkg_dist.expected_value(n_sigma) array([-2., -1., 0., 1., 2.]) Args: nsigma (:obj:`int` or :obj:`tensor`): The number of standard deviations. Returns: Float: The expected value of the test statistic. """ tensorlib, _ = get_backend() return tensorlib.where( tensorlib.astensor(self.shift + nsigma > self.cutoff, dtype="bool"), tensorlib.astensor(self.shift + nsigma), tensorlib.astensor(self.cutoff), )
[docs] @dataclass(frozen=True) class HypoTestFitResults: """ Fitted model parameters of the fits in :py:meth:`AsymptoticCalculator.teststatistic <pyhf.infer.calculators.AsymptoticCalculator.teststatistic>` """ # ignore "F821 undefined name 'Tensor'" so as to avoid typing.Any asimov_pars: 'Tensor' # noqa: F821 free_fit_to_data: 'Tensor' # noqa: F821 free_fit_to_asimov: 'Tensor' # noqa: F821 fixed_poi_fit_to_data: 'Tensor' # noqa: F821 fixed_poi_fit_to_asimov: 'Tensor' # noqa: F821
[docs] class AsymptoticCalculator: """The Asymptotic Calculator."""
[docs] def __init__( self, data, pdf, init_pars=None, par_bounds=None, fixed_params=None, test_stat="qtilde", calc_base_dist="normal", ): r""" Asymptotic Calculator. Args: data (:obj:`tensor`): The observed data. pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``. init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization. par_bounds (:obj:`tensor`): The extrema of values the model parameters are allowed to reach in the fit. The shape should be ``(n, 2)`` for ``n`` model parameters. fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter constant to its starting value during minimization. test_stat (:obj:`str`): The test statistic to use as a numerical summary of the data: ``'qtilde'``, ``'q'``, or ``'q0'``. * ``'qtilde'``: (default) performs the calculation using the alternative test statistic, :math:`\tilde{q}_{\mu}`, as defined under the Wald approximation in Equation (62) of :xref:`arXiv:1007.1727` (:func:`~pyhf.infer.test_statistics.qmu_tilde`). * ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}` (:func:`~pyhf.infer.test_statistics.qmu`). * ``'q0'``: performs the calculation using the discovery test statistic :math:`q_{0}` (:func:`~pyhf.infer.test_statistics.q0`). calc_base_dist (:obj:`str`): The statistical distribution, ``'normal'`` or ``'clipped_normal'``, to use for calculating the :math:`p`-values. * ``'normal'``: (default) use the full Normal distribution in :math:`\hat{\mu}/\sigma` space. Note that expected limits may correspond to unphysical test statistics from scenarios with the expected :math:`\hat{\mu} > \mu`. * ``'clipped_normal'``: use a clipped Normal distribution in :math:`\hat{\mu}/\sigma` space to avoid expected limits that correspond to scenarios with the expected :math:`\hat{\mu} > \mu`. This will properly cap the test statistic at ``0``, as noted in Equation (14) and Equation (16) in :xref:`arXiv:1007.1727`. The choice of ``calc_base_dist`` only affects the :math:`p`-values for expected limits, and the default value will be changed in a future release. Returns: ~pyhf.infer.calculators.AsymptoticCalculator: The calculator for asymptotic quantities. """ self.data = data self.pdf = pdf self.init_pars = init_pars or pdf.config.suggested_init() self.par_bounds = par_bounds or pdf.config.suggested_bounds() self.fixed_params = fixed_params or pdf.config.suggested_fixed() self.test_stat = test_stat self.calc_base_dist = calc_base_dist self.sqrtqmuA_v = None self.fitted_pars = None
[docs] def distributions(self, poi_test): r""" Probability distributions of the test statistic, as defined in :math:`\S` 3 of :xref:`arXiv:1007.1727` under the Wald approximation, under the signal + background and background-only hypotheses. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde") >>> _ = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test) (array(0.00219262), array(0.15865525)) Args: poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest. Returns: Tuple (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distributions under the hypotheses. """ if self.sqrtqmuA_v is None: raise RuntimeError("need to call .teststatistic(poi_test) first") if self.calc_base_dist == "normal": cutoff = float("-inf") elif self.calc_base_dist == "clipped_normal": cutoff = -self.sqrtqmuA_v else: raise ValueError( f"unknown base distribution for asymptotics {self.calc_base_dist}" ) sb_dist = AsymptoticTestStatDistribution(-self.sqrtqmuA_v, cutoff) b_dist = AsymptoticTestStatDistribution(0.0, cutoff) return sb_dist, b_dist
[docs] def teststatistic(self, poi_test): r""" Compute the test statistic for the observed data under the studied model. The fitted parameters of the five fits that are implicitly run for each call of this method are afterwards accessible through the ``fitted_pars`` attribute, which is a :py:class:`~pyhf.infer.calculators.HypoTestFitResults` instance. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator(data, model, test_stat="qtilde") >>> asymptotic_calculator.teststatistic(mu_test) array(0.14043184) >>> asymptotic_calculator.fitted_pars HypoTestFitResults(asimov_pars=array([0. , 1.0030482 , 0.96264534]), free_fit_to_data=array([0. , 1.0030512 , 0.96266961]), free_fit_to_asimov=array([0. , 1.00304893, 0.96263365]), fixed_poi_fit_to_data=array([1. , 0.97224597, 0.87553894]), fixed_poi_fit_to_asimov=array([1. , 0.97276864, 0.87142047])) >>> asymptotic_calculator.fitted_pars.free_fit_to_asimov # best-fit parameters to Asimov dataset array([0. , 1.00304893, 0.96263365]) Args: poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest. Returns: Tensor: The value of the test statistic. """ tensorlib, _ = get_backend() teststat_func = utils.get_test_stat(self.test_stat) qmu_v, (mubhathat, muhatbhat) = teststat_func( poi_test, self.data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, return_fitted_pars=True, ) sqrtqmu_v = tensorlib.sqrt(qmu_v) asimov_mu = 1.0 if self.test_stat == 'q0' else 0.0 asimov_data, asimov_mubhathat = generate_asimov_data( asimov_mu, self.data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, return_fitted_pars=True, ) qmuA_v, (mubhathat_A, muhatbhat_A) = teststat_func( poi_test, asimov_data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, return_fitted_pars=True, ) self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v) self.fitted_pars = HypoTestFitResults( asimov_pars=asimov_mubhathat, free_fit_to_data=muhatbhat, free_fit_to_asimov=muhatbhat_A, fixed_poi_fit_to_data=mubhathat, fixed_poi_fit_to_asimov=mubhathat_A, ) if self.test_stat in ["q", "q0"]: # qmu or q0 teststat = sqrtqmu_v - self.sqrtqmuA_v else: # qtilde def _true_case(): teststat = sqrtqmu_v - self.sqrtqmuA_v return teststat def _false_case(): qmu = tensorlib.power(sqrtqmu_v, 2) qmu_A = tensorlib.power(self.sqrtqmuA_v, 2) teststat = (qmu - qmu_A) / (2 * self.sqrtqmuA_v) return teststat # Use '<=' rather than '<' to avoid Issue #1992 teststat = tensorlib.conditional( (sqrtqmu_v <= self.sqrtqmuA_v), _true_case, _false_case ) return tensorlib.astensor(teststat)
[docs] def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( ... data, model, test_stat="qtilde" ... ) >>> q_tilde = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) >>> CLsb, CLb, CLs = asymptotic_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) >>> CLsb, CLb, CLs (array(0.02332502), array(0.4441594), array(0.05251497)) Args: teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tensorlib, _ = get_backend() CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = bkg_only_distribution.pvalue(teststat) CLs = tensorlib.astensor(CLsb / CLb) return CLsb, CLb, CLs
[docs] def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. Example: >>> import pyhf >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> asymptotic_calculator = pyhf.infer.calculators.AsymptoticCalculator( ... data, model, test_stat="qtilde" ... ) >>> _ = asymptotic_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = asymptotic_calculator.distributions(mu_test) >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = asymptotic_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)] Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.AsymptoticTestStatDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ # Calling pvalues is easier then repeating the CLs calculation here tb, _ = get_backend() return list( map( list, zip( *( self.pvalues( test_stat, sig_plus_bkg_distribution, bkg_only_distribution ) for test_stat in [ bkg_only_distribution.expected_value(n_sigma) for n_sigma in [2, 1, 0, -1, -2] ] ) ), ) )
[docs] class EmpiricalDistribution: """ The empirical distribution of the test statistic. Unlike :py:class:`~pyhf.infer.calculators.AsymptoticTestStatDistribution` where the distribution for the test statistic is normally distributed, the :math:`p`-values etc are computed from the sampled distribution. """
[docs] def __init__(self, samples): """ Empirical distribution. Args: samples (:obj:`tensor`): The test statistics sampled from the distribution. Returns: ~pyhf.infer.calculators.EmpiricalDistribution: The empirical distribution of the test statistic. """ tensorlib, _ = get_backend() self.samples = tensorlib.ravel(samples)
[docs] def pvalue(self, value): """ Compute the :math:`p`-value for a given value of the test statistic. Examples: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> mean = pyhf.tensorlib.astensor([5]) >>> std = pyhf.tensorlib.astensor([1]) >>> normal = pyhf.probability.Normal(mean, std) >>> samples = normal.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples) >>> dist.pvalue(7) array(0.02) >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> init_pars = model.config.suggested_init() >>> par_bounds = model.config.suggested_bounds() >>> fixed_params = model.config.suggested_fixed() >>> mu_test = 1.0 >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars)) >>> samples = pdf.sample((100,)) >>> test_stat_dist = pyhf.infer.calculators.EmpiricalDistribution( ... pyhf.tensorlib.astensor( ... [pyhf.infer.test_statistics.qmu_tilde(mu_test, sample, model, init_pars, par_bounds, fixed_params) for sample in samples] ... ) ... ) >>> test_stat_dist.pvalue(test_stat_dist.samples[9]) array(0.3) Args: value (:obj:`float`): The test statistic value. Returns: Tensor: The integrated probability to observe a value at least as large as the observed one. """ tensorlib, _ = get_backend() return tensorlib.astensor( tensorlib.sum( tensorlib.where( self.samples >= value, tensorlib.astensor(1), tensorlib.astensor(0) ) ) / tensorlib.shape(self.samples)[0] )
[docs] def expected_value(self, nsigma): """ Return the expected value of the test statistic. Examples: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> mean = pyhf.tensorlib.astensor([5]) >>> std = pyhf.tensorlib.astensor([1]) >>> normal = pyhf.probability.Normal(mean, std) >>> samples = normal.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution(samples) >>> dist.expected_value(nsigma=1) 6.15094381... >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> init_pars = model.config.suggested_init() >>> par_bounds = model.config.suggested_bounds() >>> fixed_params = model.config.suggested_fixed() >>> mu_test = 1.0 >>> pdf = model.make_pdf(pyhf.tensorlib.astensor(init_pars)) >>> samples = pdf.sample((100,)) >>> dist = pyhf.infer.calculators.EmpiricalDistribution( ... pyhf.tensorlib.astensor( ... [ ... pyhf.infer.test_statistics.qmu_tilde( ... mu_test, sample, model, init_pars, par_bounds, fixed_params ... ) ... for sample in samples ... ] ... ) ... ) >>> n_sigma = pyhf.tensorlib.astensor([-2, -1, 0, 1, 2]) >>> dist.expected_value(n_sigma) array([0.00000000e+00, 0.00000000e+00, 5.53671231e-04, 8.29987137e-01, 2.99592664e+00]) Args: nsigma (:obj:`int` or :obj:`tensor`): The number of standard deviations. Returns: Float: The expected value of the test statistic. """ tensorlib, _ = get_backend() return tensorlib.percentile( self.samples, tensorlib.normal_cdf(nsigma) * 100, interpolation="linear" )
[docs] class ToyCalculator: """The Toy-based Calculator."""
[docs] def __init__( self, data, pdf, init_pars=None, par_bounds=None, fixed_params=None, test_stat="qtilde", ntoys=2000, track_progress=True, ): r""" Toy-based Calculator. Args: data (:obj:`tensor`): The observed data. pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``. init_pars (:obj:`tensor` of :obj:`float`): The starting values of the model parameters for minimization. par_bounds (:obj:`tensor`): The extrema of values the model parameters are allowed to reach in the fit. The shape should be ``(n, 2)`` for ``n`` model parameters. fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter constant to its starting value during minimization. test_stat (:obj:`str`): The test statistic to use as a numerical summary of the data: ``'qtilde'``, ``'q'``, or ``'q0'``. * ``'qtilde'``: (default) performs the calculation using the alternative test statistic, :math:`\tilde{q}_{\mu}`, as defined under the Wald approximation in Equation (62) of :xref:`arXiv:1007.1727` (:func:`~pyhf.infer.test_statistics.qmu_tilde`). * ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}` (:func:`~pyhf.infer.test_statistics.qmu`). * ``'q0'``: performs the calculation using the discovery test statistic :math:`q_{0}` (:func:`~pyhf.infer.test_statistics.q0`). ntoys (:obj:`int`): Number of toys to use (how many times to sample the underlying distributions). track_progress (:obj:`bool`): Whether to display the `tqdm` progress bar or not (outputs to `stderr`). Returns: ~pyhf.infer.calculators.ToyCalculator: The calculator for toy-based quantities. """ self.ntoys = ntoys self.data = data self.pdf = pdf self.init_pars = init_pars or pdf.config.suggested_init() self.par_bounds = par_bounds or pdf.config.suggested_bounds() self.fixed_params = fixed_params or pdf.config.suggested_fixed() self.test_stat = test_stat self.track_progress = track_progress
[docs] def distributions(self, poi_test, track_progress=None): """ Probability distributions of the test statistic value under the signal + background and background-only hypotheses. These distributions are produced by generating pseudo-data ("toys") with the nuisance parameters set to their conditional maximum likelihood estimators at the corresponding value of the parameter of interest for each hypothesis, following the joint recommendations of the ATLAS and CMS experiments in |LHC Higgs search combination procedure|_. .. _LHC Higgs search combination procedure: https://inspirehep.net/literature/1196797 .. |LHC Higgs search combination procedure| replace:: *Procedure for the LHC Higgs boson search combination in Summer 2011* Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) >>> sig_plus_bkg_dist.pvalue(mu_test), bkg_dist.pvalue(mu_test) (array(0.14), array(0.79)) Args: poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest. track_progress (:obj:`bool`): Whether to display the `tqdm` progress bar or not (outputs to `stderr`) Returns: Tuple (~pyhf.infer.calculators.EmpiricalDistribution): The distributions under the hypotheses. """ tensorlib, _ = get_backend() sample_shape = (self.ntoys,) signal_pars = fixed_poi_fit( poi_test, self.data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, ) signal_pdf = self.pdf.make_pdf(signal_pars) signal_sample = signal_pdf.sample(sample_shape) bkg_pars = fixed_poi_fit( 1.0 if self.test_stat == 'q0' else 0.0, self.data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, ) bkg_pdf = self.pdf.make_pdf(bkg_pars) bkg_sample = bkg_pdf.sample(sample_shape) teststat_func = utils.get_test_stat(self.test_stat) tqdm_options = dict( total=self.ntoys, leave=False, disable=not ( track_progress if track_progress is not None else self.track_progress ), unit='toy', ) signal_teststat = [] for sample in tqdm.tqdm(signal_sample, **tqdm_options, desc='Signal-like'): signal_teststat.append( teststat_func( poi_test, sample, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, ) ) bkg_teststat = [] for sample in tqdm.tqdm(bkg_sample, **tqdm_options, desc='Background-like'): bkg_teststat.append( teststat_func( poi_test, sample, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, ) ) s_plus_b = EmpiricalDistribution(tensorlib.astensor(signal_teststat)) b_only = EmpiricalDistribution(tensorlib.astensor(bkg_teststat)) return s_plus_b, b_only
[docs] def pvalues(self, teststat, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`p`-values for the observed test statistic under the signal + background and background-only model hypotheses. Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> q_tilde = toy_calculator.teststatistic(mu_test) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) >>> CLsb, CLb, CLs = toy_calculator.pvalues(q_tilde, sig_plus_bkg_dist, bkg_dist) >>> CLsb, CLb, CLs (array(0.03), array(0.37), array(0.08108108)) Args: teststat (:obj:`tensor`): The test statistic. sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tensorlib, _ = get_backend() CLsb = sig_plus_bkg_distribution.pvalue(teststat) CLb = bkg_only_distribution.pvalue(teststat) CLs = tensorlib.astensor(CLsb / CLb) return CLsb, CLb, CLs
[docs] def expected_pvalues(self, sig_plus_bkg_distribution, bkg_only_distribution): r""" Calculate the :math:`\mathrm{CL}_{s}` values corresponding to the median significance of variations of the signal strength from the background only hypothesis :math:`\left(\mu=0\right)` at :math:`(-2,-1,0,1,2)\sigma`. Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> sig_plus_bkg_dist, bkg_dist = toy_calculator.distributions(mu_test) >>> CLsb_exp_band, CLb_exp_band, CLs_exp_band = toy_calculator.expected_pvalues(sig_plus_bkg_dist, bkg_dist) >>> CLs_exp_band [array(0.), array(0.), array(0.08403955), array(0.21892596), array(0.86072977)] Args: sig_plus_bkg_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the signal + background hypothesis. bkg_only_distribution (~pyhf.infer.calculators.EmpiricalDistribution): The distribution for the background-only hypothesis. Returns: Tuple (:obj:`tensor`): The :math:`p`-values for the test statistic corresponding to the :math:`\mathrm{CL}_{s+b}`, :math:`\mathrm{CL}_{b}`, and :math:`\mathrm{CL}_{s}`. """ tb, _ = get_backend() pvalues = tb.astensor( [ self.pvalues( test_stat, sig_plus_bkg_distribution, bkg_only_distribution ) for test_stat in bkg_only_distribution.samples ] ) # percentiles for -2, -1, 0, 1, 2 standard deviations of the Normal distribution normal_percentiles = tb.astensor( [2.27501319, 15.86552539, 50.0, 84.13447461, 97.72498681] ) pvalues_exp_band = tb.transpose( tb.percentile(pvalues, normal_percentiles, axis=0) ) return [[tb.astensor(pvalue) for pvalue in band] for band in pvalues_exp_band]
[docs] def teststatistic(self, poi_test): """ Compute the test statistic for the observed data under the studied model. Example: >>> import pyhf >>> import numpy.random as random >>> random.seed(0) >>> pyhf.set_backend("numpy") >>> model = pyhf.simplemodels.uncorrelated_background( ... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0] ... ) >>> observations = [51, 48] >>> data = observations + model.config.auxdata >>> mu_test = 1.0 >>> toy_calculator = pyhf.infer.calculators.ToyCalculator( ... data, model, ntoys=100, track_progress=False ... ) >>> toy_calculator.teststatistic(mu_test) array(3.93824492) Args: poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest. Returns: Tensor: The value of the test statistic. """ teststat_func = utils.get_test_stat(self.test_stat) teststat = teststat_func( poi_test, self.data, self.pdf, self.init_pars, self.par_bounds, self.fixed_params, ) return teststat