Previous page
b-Jet charge ID using DNN
Overview
This page provide useful information about implementation and training of DNN used to identify charge of jets
On this page:
Usefull Links
Production of inputs
DeepNTuples
The following code used to produce frat ntuples used for traning of the neural net
cmsrel CMSSW_10_6_16
cd CMSSW_10_6_16/src/
cmsenv
git cms-init
git clone https://github.com/CMSDeepFlavour/DeepNTuples
cd DeepNTuples
git checkout 94X
# Add JetToolBox
git submodule init
git submodule update
#compile
scram b -j 4
cd CMSSW_10_6_16/src
cmsenv
- to run local tests execute:
file=/store/mc/RunIISummer19UL17MiniAOD/TTJets_TuneCP5_13TeV-amcatnloFXFX-pythia8/MINIAODSIM/106X_mc2017_realistic_v6-v2/60000/F566340F-0440-D94D-B710-BA694EFFB6FB.root
cmsRun $CMSSW_BASE/src/DeepNTuples/DeepNtuplizer/production/DeepNtuplizer.py inputFiles=$file maxEvents=2000 skipEvents=1000 outputFile=output1
Full list of files can be found
here
. If file doesn't load copy it locally.
xrdcp root://cms-xrd-global.cern.ch/$file .
#OR
site=T1_IT_CNAF_MSS
lfn=$file
pfl=`curl -ks "https://cmsweb.cern.ch/phedex/datasvc/perl/prod/lfn2pfn?node=${site}&lfn=${lfn}&protocol=srmv2" | grep PFN | cut -d "'" -f4`
env -i X509_USER_PROXY=/tmp/x509up_u58751 gfal-copy -n 1 $pfl "file:///`pwd`/miniAOD.root"
file="file:///`pwd`/miniAOD.root"
Before running the code, add extra variables (such as jet/hadron pdgId) to the package:
update DeepNtuplizer_cff.py
jetFlavourInfos = cms.InputTag("slimmedGenJetsFlavourInfos"),
update DeepNtuplizer.cc
jetinfo->setJetInfoToken(
consumes<reco::JetFlavourInfoMatchingCollection>(
iConfig.getParameter<edm::InputTag>("jetFlavourInfos")));
update ntuple_JetInfo.h
#include "SimDataFormats/JetMatching/interface/JetFlavourInfo.h"
#include "SimDataFormats/JetMatching/interface/JetFlavourInfoMatching.h"
void setJetInfoToken(edm::EDGetTokenT<reco::JetFlavourInfoMatchingCollection> jetFlavourInfosToken) {
jetFlavourInfosToken_ = jetFlavourInfosToken;
}
edm::EDGetTokenT<reco::JetFlavourInfoMatchingCollection> jetFlavourInfosToken_;
edm::Handle<reco::JetFlavourInfoMatchingCollection> jetFlavourInfos;
// labels (MC truth)
int gen_parton_pdgid_;
int gen_hadron_pdgid_;
float gen_hadron_pt_;
update ntuple_JetInfo.cc
// truth labels
addBranch(tree,"gen_parton_pdgid" ,&gen_parton_pdgid_ ,"gen_parton_pdgid_/I" );
addBranch(tree,"gen_hadron_pdgid" ,&gen_hadron_pdgid_ ,"gen_hadron_pdgid_/I" );
addBranch(tree,"gen_hadron_pt" ,&gen_hadron_pt_ ,"gen_hadron_pt_/f" );
iEvent.getByToken(jetFlavourInfosToken_, jetFlavourInfos);
genDecay_ = -1.;
// jet charge implementation:
gen_parton_pdgid_ = 0; gen_hadron_pdgid_ = 0; gen_hadron_pt_ = 0;
for (const reco::JetFlavourInfoMatching & jetFlavourInfoMatching : *jetFlavourInfos) {
if (deltaR(jet.p4(), jetFlavourInfoMatching.first->p4()) > 0.4) continue;
gen_parton_pdgid_ = jetFlavourInfoMatching.second.getPartonFlavour();
const reco::GenParticleRefVector & bHadrons = jetFlavourInfoMatching.second.getbHadrons();
if (bHadrons.size()==0) continue;
gen_hadron_pdgid_= bHadrons.at(0)->pdgId();
gen_hadron_pt_= bHadrons.at(0)->pt();
break;
}
//if(jet.genParton()) gen_parton_pdgid_ = int(jet.genParton()->pdgId());
//if(jet.jetFlavourInfo().getbHadrons().size()) {
// gen_hadron_pdgid_ = jet.jetFlavourInfo().getbHadrons().at(0)->pdgId();
// gen_hadron_pt_ = jet.jetFlavourInfo().getbHadrons().at(0)->pt();
// }
slim ntuples:
To slim the ntuples, modify the
ntuple_JetInfo::fillBranches()
function to skip unwanted jets. For example:
- Skim only leptonic b-jets:
if(isPhysLeptonicB_ && isPhysLeptonicB_C_) returnval=false;
- Skim only hadronic b-jets:
if(isPhysB_==0 && isPhysBB_==0) returnval=false;
running on condor:
To use local condor batch to analyze files located at remote sites add
use_x509userproxy = true
in condor jdl file and setup proxy in your run file (recommended to set the proxy path first):
export X509_USER_PROXY=${HOME}/private/.x509up_${UID}
echo YOURPASSWORD | voms-proxy-init -voms cms -rfc -out ${HOME}/private/.x509up_${UID} -valid 192:00
Output files
A small portion of ntuplized ttbar sample is available in:
/eos/cms/store/cmst3/group/top/bjetcharge/TTJets_TuneCP5_13TeV-amcatnloFXFX-pythia8
Training
The training is performed using the
DeepJetCore
package, which can be exported with the
Singularity
container:
env -i PATH=/usr/bin/ SINGULARITY_CACHEDIR="/tmp/$(whoami)/singularity" singularity run -B /home -B /eos -B /afs --bind /etc/krb5.conf:/etc/krb5.conf --bind /proc/fs/openafs/afs_ioctl:/proc/fs/openafs/afs_ioctl --bind /usr/vice/etc:/usr/vice/etc /eos/home-j/jkiesele/singularity/images/deepjetcore3_latest.sif
Build a new subpackage:
createSubpackage.py BJetChargeID --data
cd BJetChargeID; source env.sh
Once you have the data in the
example_data
folder, convert it to format that will be used by the ML algorithm (modify
modules/datastructure/TrainData_example.py
to setup correct inputs and labels):
// labels
truth = np.expand_dims((urfile.array("gen_parton_pdgid")/5+1)/2, axis=1)
//inputs:
lep_pt=[]; lep_charge=[]
pt=charge=0
muons_pt = urfile.array("muons_pt")
electrons_pt = urfile.array("electrons_pt")
muons_charge = urfile.array("muons_charge")
electrons_charge = urfile.array("electrons_charge")
for i in range(urfile.numentries):
nmu=len(muons_pt[i]); nel=len(electrons_pt[i])
if nmu:
pt=muons_pt[i][0]
charge=muons_charge[i][0]
if nel and (pt < electrons_pt[i][0]):
pt=electrons_pt[i][0]
charge=electrons_charge[i][0]
elif nel:
pt=electrons_pt[i][0]
charge=electrons_charge[i][0]
lep_pt.append(pt)
lep_charge.append(charge)
feature_array = np.concatenate([
np.expand_dims(lep_pt, axis=1),
np.expand_dims(lep_charge, axis=1)
],axis=1)
convertFromSource.py -i example_data/train_files.txt -o train_data -c TrainData_example
The training is done using the following script (modify the
Train/training_example.py
script to set your own model):
python3 Train/training_example.py train_data/dataCollection.djcdc result_train --gpu none
The results will appear in
result_train
, to predict output for a new sample run:
predict.py result_train/KERAS_check_best_model.h5 result_train/trainsamples.djcdc example_data/test_files.txt result_test --gpu none
The output file in
result_test/
will contain predicted values
To compare predicted to truth you can do the following:
root -l example_data/out_bjet_lep_10.root
tree->AddFriend("tree","result_test/pred_out_bjet_lep_10.root")
tree->Draw("gen_parton_pdgid : tree.prob_isA","","box")
--
MichaelPitt - 2020-01-23