Source code for higgs_dna.tools.photonid_mva

import warnings
from typing import List, Optional, Tuple

import awkward
import numpy
import xgboost


[docs]def load_photonid_mva(fname: str) -> Optional[xgboost.Booster]:
    try:
        photonid_mva = xgboost.Booster()
        photonid_mva.load_model(fname)
    except xgboost.core.XGBoostError:
        warnings.warn(f"SKIPPING photonid_mva, could not find: {fname}")
        photonid_mva = None
    return photonid_mva


[docs]def calculate_photonid_mva(
    mva: Tuple[Optional[xgboost.Booster], List[str]],
    photon: awkward.Array,
) -> awkward.Array:
    """Recompute PhotonIDMVA on-the-fly. This step is necessary considering that the inputs have to be corrected
    with the QRC process. Following is the list of features (barrel has 12, endcap two more):
    EB:
        events.Photon.energyRaw
        events.Photon.r9
        events.Photon.sieie
        events.Photon.etaWidth
        events.Photon.phiWidth
        events.Photon.sieip
        events.Photon.s4
        events.Photon.pfPhoIso03
        events.Photon.pfChargedIsoPFPV
        events.Photon.pfChargedIsoWorstVtx
        events.Photon.eta
        events.fixedGridRhoAll

    EE: EB +
        events.Photon.esEffSigmaRR
        events.Photon.esEnergyOverRawE
    """
    photonid_mva, var_order = mva

    if photonid_mva is None:
        return awkward.ones_like(photon.pt)

    bdt_inputs = {}
    bdt_inputs = numpy.column_stack(
        [awkward.to_numpy(photon[name]) for name in var_order]
    )
    tempmatrix = xgboost.DMatrix(bdt_inputs, feature_names=var_order)

    mvaID = photonid_mva.predict(tempmatrix)

    # Only needed to compare to TMVA
    # mvaID = 1.0 - 2.0 / (1.0 + numpy.exp(2.0 * mvaID))

    # the previous transformation was not working correctly, peakin at about 0.7
    # since we can't really remember why that functional form was picked in the first place we decided
    # to switch to a simpler stretch of the output that works better, even though not perfectly.
    # Open for changes/ideas
    mvaID = -1 + 2 * mvaID

    return mvaID