Basic Example
This short notebook shows how to get started with HiggsDNA and Coffea.
[1]:
from higgs_dna.utils.logger_utils import setup_logger
from higgs_dna.workflows import DYStudiesProcessor
from coffea import processor
import json
from importlib import resources
[6]:
fileset = {
"DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8": [
"samples/skimmed_nano/DYJetsToLL--UL17_v6-v2_109_Skim.root"
]
}
[7]:
with resources.open_text("higgs_dna.metaconditions", "Era2017_legacy_xgb_v1.json") as f:
metaconditions = json.load(f)
[8]:
processor_instance = DYStudiesProcessor(
metaconditions=metaconditions,
do_systematics=False,
apply_trigger=True,
output_location="output/basics"
)
[9]:
iterative_run = processor.Runner(
executor = processor.IterativeExecutor(compression=None),
schema=processor.NanoAODSchema,
)
out = iterative_run(
fileset,
treename="Events",
processor_instance=processor_instance,
)
/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:216: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
output[f"{prefix}_{subfield}"] = awkward.to_numpy(
/work/gallim/devel/HiggsDNA/higgs_dna/workflows/base.py:220: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
output[field] = awkward.to_numpy(diphotons[field])