All Comparisons No Model Uncertainty

Model All Comparisons No Model Uncertainty

Imports
import hist
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from hist import Hist

import mplhep as mh
Setup
# Set seed for reproducible demo
np.random.seed(42)

# Create demo histograms directly
bins = np.linspace(-9, 12, 51)

# Background components with different shapes
bkg1_data = np.random.normal(0, 2.5, 4000)  # Broad background
bkg2_data = np.random.normal(3, 1.2, 2000)  # Narrower peak
bkg3_data = np.random.normal(-1, 1.8, 1500)  # Another component

# Data = backgrounds + signal peak + some deficit
data_data = np.concatenate(
    [
        np.random.normal(0, 2.5, 3500),  # Less background here
        np.random.normal(3, 1.2, 1800),  # Similar background
        np.random.normal(-1, 1.8, 1400),  # Similar background
        np.random.normal(5, 0.8, 800),  # Clear signal peak
        np.random.normal(-3, 0.5, 200),  # Some deficit here (under-predicted)
    ]
)

# Create histograms
data_hist = Hist(hist.axis.Regular(50, -8, 8), storage=hist.storage.Weight())
data_hist.fill(data_data)

background_hists = [
    hist.new.Regular(50, -8, 8).Weight().fill(bkg1_data),
    hist.new.Regular(50, -8, 8).Weight().fill(bkg2_data),
    hist.new.Regular(50, -8, 8).Weight().fill(bkg3_data),
]

# Scale backgrounds to match data
total_bkg = sum(background_hists).sum().value
data_total = data_hist.sum().value
scale_factor = data_total / total_bkg
background_hists = [scale_factor * h for h in background_hists]

# Labels and colors
background_labels = ["Broad BG", "Peak BG", "Offset BG"]
background_colors = sns.color_palette("cubehelix", 3)

# Scale backgrounds to match data
total_bkg = sum(background_hists).sum().value
data_total = data_hist.sum().value
scale_factor = data_total / total_bkg
background_hists = [scale_factor * h for h in background_hists]

# Labels and colors
background_labels = ["Background 1", "Background 2", "Background 3"]
background_colors = sns.color_palette("cubehelix", 3)

Code

# Create comparison plots
fig, axes = plt.subplots(
    nrows=6,
    figsize=(6, 13),
    gridspec_kw={"height_ratios": [3, 1, 1, 1, 1, 1]},
)
fig.subplots_adjust(hspace=0.3)
for ax in axes[:-1]:
    ax.xaxis.set_ticklabels([])
    ax.set_xlabel(" ")
background_sum = sum(background_hists)

mh.plot_data_model_comparison(
    data_hist=data_hist,
    stacked_components=background_hists,
    stacked_labels=background_labels,
    stacked_colors=background_colors,
    xlabel="",
    ylabel="Entries",
    model_uncertainty=False,  # <--
    comparison="ratio",
    fig=fig,
    ax_main=axes[0],
    ax_comparison=axes[1],
)

mh.add_text(
    r"Multiple data-model comparisons, $\mathbf{without}$ model uncertainty",
    ax=axes[0],
    loc="over left",
    fontsize="small",
)
mh.add_text(
    r'  $\mathbf{→}$ comparison = "ratio"', ax=axes[1], loc="over left", fontsize=13
)

for k_comp, comparison in enumerate(
    ["split_ratio", "pull", "relative_difference", "difference"], start=2
):
    ax_comparison = axes[k_comp]

    # Copy the original histogram and set the uncertainties of the copy to 0.
    background_sum_copy = background_sum.copy()
    background_sum_copy[:] = np.c_[
        background_sum_copy.values(), np.zeros_like(background_sum_copy.values())
    ]

    mh.plot_comparison(
        data_hist,
        background_sum_copy,
        ax=ax_comparison,
        comparison=comparison,
        xlabel="",
        h1_label="Data",
        h2_label="Pred.",
        h1_w2method="poisson",
    )
    if comparison == "pull":
        # Since the uncertainties of the model are neglected, the pull label is "(Data - Pred.)/sigma_Data"
        ax_comparison.set_ylabel(r"$\frac{Data-Pred.}{\sigma_{Data}}$")
    mh.add_text(
        rf'  $\mathbf{{}}$ comparison = "{comparison}"',
        ax=ax_comparison,
        loc="over left",
        fontsize=13,
    )
    mh.set_fitting_ylabel_fontsize(ax_comparison)

axes[-1].set_xlabel("Observable")
Full code
import hist
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from hist import Hist

import mplhep as mh


# Set seed for reproducible demo
np.random.seed(42)

# Create demo histograms directly
bins = np.linspace(-9, 12, 51)

# Background components with different shapes
bkg1_data = np.random.normal(0, 2.5, 4000)  # Broad background
bkg2_data = np.random.normal(3, 1.2, 2000)  # Narrower peak
bkg3_data = np.random.normal(-1, 1.8, 1500)  # Another component

# Data = backgrounds + signal peak + some deficit
data_data = np.concatenate(
    [
        np.random.normal(0, 2.5, 3500),  # Less background here
        np.random.normal(3, 1.2, 1800),  # Similar background
        np.random.normal(-1, 1.8, 1400),  # Similar background
        np.random.normal(5, 0.8, 800),  # Clear signal peak
        np.random.normal(-3, 0.5, 200),  # Some deficit here (under-predicted)
    ]
)

# Create histograms
data_hist = Hist(hist.axis.Regular(50, -8, 8), storage=hist.storage.Weight())
data_hist.fill(data_data)

background_hists = [
    hist.new.Regular(50, -8, 8).Weight().fill(bkg1_data),
    hist.new.Regular(50, -8, 8).Weight().fill(bkg2_data),
    hist.new.Regular(50, -8, 8).Weight().fill(bkg3_data),
]

# Scale backgrounds to match data
total_bkg = sum(background_hists).sum().value
data_total = data_hist.sum().value
scale_factor = data_total / total_bkg
background_hists = [scale_factor * h for h in background_hists]

# Labels and colors
background_labels = ["Broad BG", "Peak BG", "Offset BG"]
background_colors = sns.color_palette("cubehelix", 3)

# Scale backgrounds to match data
total_bkg = sum(background_hists).sum().value
data_total = data_hist.sum().value
scale_factor = data_total / total_bkg
background_hists = [scale_factor * h for h in background_hists]

# Labels and colors
background_labels = ["Background 1", "Background 2", "Background 3"]
background_colors = sns.color_palette("cubehelix", 3)

# Create comparison plots
fig, axes = plt.subplots(
    nrows=6,
    figsize=(6, 13),
    gridspec_kw={"height_ratios": [3, 1, 1, 1, 1, 1]},
)
fig.subplots_adjust(hspace=0.3)
for ax in axes[:-1]:
    ax.xaxis.set_ticklabels([])
    ax.set_xlabel(" ")
background_sum = sum(background_hists)

mh.plot_data_model_comparison(
    data_hist=data_hist,
    stacked_components=background_hists,
    stacked_labels=background_labels,
    stacked_colors=background_colors,
    xlabel="",
    ylabel="Entries",
    model_uncertainty=False,  # <--
    comparison="ratio",
    fig=fig,
    ax_main=axes[0],
    ax_comparison=axes[1],
)

mh.add_text(
    r"Multiple data-model comparisons, $\mathbf{without}$ model uncertainty",
    ax=axes[0],
    loc="over left",
    fontsize="small",
)
mh.add_text(
    r'  $\mathbf{→}$ comparison = "ratio"', ax=axes[1], loc="over left", fontsize=13
)

for k_comp, comparison in enumerate(
    ["split_ratio", "pull", "relative_difference", "difference"], start=2
):
    ax_comparison = axes[k_comp]

    # Copy the original histogram and set the uncertainties of the copy to 0.
    background_sum_copy = background_sum.copy()
    background_sum_copy[:] = np.c_[
        background_sum_copy.values(), np.zeros_like(background_sum_copy.values())
    ]

    mh.plot_comparison(
        data_hist,
        background_sum_copy,
        ax=ax_comparison,
        comparison=comparison,
        xlabel="",
        h1_label="Data",
        h2_label="Pred.",
        h1_w2method="poisson",
    )
    if comparison == "pull":
        # Since the uncertainties of the model are neglected, the pull label is "(Data - Pred.)/sigma_Data"
        ax_comparison.set_ylabel(r"$\frac{Data-Pred.}{\sigma_{Data}}$")
    mh.add_text(
        rf'  $\mathbf{{}}$ comparison = "{comparison}"',
        ax=ax_comparison,
        loc="over left",
        fontsize=13,
    )
    mh.set_fitting_ylabel_fontsize(ax_comparison)

axes[-1].set_xlabel("Observable")