{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GoF from binned cost functions\n",
    "\n",
    "The builtin cost functions for binned data, `BinnedNLL` and `ExtendedBinnedNLL` have a minimum value which is asymptotically chi2-distributed and thus can be used as a goodness-of-fit statistic. This example shows, that one still needs a large number of entries in each bin to reach the asymptotic regime."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%config InlineBackend.figure_formats = ['svg']\n",
    "from iminuit import Minuit\n",
    "from iminuit.cost import BinnedNLL, ExtendedBinnedNLL\n",
    "import numpy as np\n",
    "from numba_stats import norm, expon\n",
    "import matplotlib.pyplot as plt\n",
    "import joblib\n",
    "from scipy.stats import chi2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate(n, seed):\n",
    "    rng = np.random.default_rng(seed)\n",
    "    s = rng.normal(1, 0.1, size=rng.poisson(n))\n",
    "    b = rng.exponential(size=rng.poisson(n))\n",
    "    x = np.append(s, b)\n",
    "    return x[(x > 0) & (x < 2)]\n",
    "\n",
    "\n",
    "x = generate(1000, 1)\n",
    "plt.hist(x, bins=20, range=(0, 2));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@joblib.delayed\n",
    "def run(n, seed):\n",
    "    x = generate(n, seed)\n",
    "    xrange = (0, 2)\n",
    "    w, xe = np.histogram(x, bins=20, range=xrange)\n",
    "\n",
    "    def model1(x, z, mu, sigma, tau):\n",
    "        return z * norm.cdf(x, mu, sigma) / np.diff(norm.cdf(xrange, mu, sigma)) + (\n",
    "            1 - z\n",
    "        ) * expon.cdf(x, 0, tau) / np.diff(expon.cdf(xrange, 0, tau))\n",
    "\n",
    "    def model2(x, s, b, mu, sigma, tau):\n",
    "        return s * n * norm.cdf(x, mu, sigma) + b * n * expon.cdf(x, 0, tau)\n",
    "\n",
    "    m = [\n",
    "        Minuit(BinnedNLL(w, xe, model1), z=0.5, mu=0.5, sigma=0.5, tau=0.5),\n",
    "        Minuit(ExtendedBinnedNLL(w, xe, model2), s=1, b=1, mu=0.5, sigma=0.5, tau=0.5),\n",
    "    ]\n",
    "    for mi in m:\n",
    "        mi.limits[\"mu\"] = (0, 2)\n",
    "        mi.limits[\"sigma\", \"tau\"] = (0.1, None)\n",
    "    m[0].limits[\"z\"] = (0, 1)\n",
    "    m[1].limits[\"s\", \"b\"] = (0, None)\n",
    "    r = []\n",
    "    for mi in m:\n",
    "        mi.migrad()\n",
    "        if mi.valid:\n",
    "            pvalue = chi2(mi.fcn._fcn.ndata - mi.nfit).sf(mi.fval)\n",
    "            r.append(pvalue)\n",
    "        else:\n",
    "            r.append(np.nan)\n",
    "    return r\n",
    "\n",
    "\n",
    "pvalues = {}\n",
    "for n in (20, 100, 1000, 10000):\n",
    "    pvalues[n] = np.array(joblib.Parallel(-1)(run(n, i) for i in range(500)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(2, 4, figsize=(8, 4), constrained_layout=True)\n",
    "for i, (ni, vi) in enumerate(pvalues.items()):\n",
    "    ax[0, i].hist(vi[:, 0])\n",
    "    ax[1, i].hist(vi[:, 1])\n",
    "    ax[0, i].set_title(\n",
    "        f\"n = {ni}, failed fits = {np.sum(np.isnan(vi[:, 0]))}\", fontsize=\"small\"\n",
    "    )\n",
    "    ax[1, i].set_title(\n",
    "        f\"n = {ni}, failed fits = {np.sum(np.isnan(vi[:, 1]))}\", fontsize=\"small\"\n",
    "    )\n",
    "fig.supxlabel(\"pvalue\");"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The top row shows the p-values of the test statistic for `BinnedNLL` and the bottom row for `ExtendedBinnedNLL`. If the test statistic was perfectly chi-square-distributed, the p-value distribution should be uniform."
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "bdbf20ff2e92a3ae3002db8b02bd1dd1b287e934c884beb29a73dced9dbd0fa3"
  },
  "kernelspec": {
   "display_name": "Python 3.8.12 ('venv': venv)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}