Difference: CRAB3AdvancedTutorial (19 vs. 20)

Revision 202015-07-07 - AndresTanasijczuk

Line: 1 to 1
 
META TOPICPARENT name="SWGuideCrab"
<!-- /ActionTrackerPlugin -->
Line: 342 to 342
  # Production Info process.configurationMetadata = cms.untracked.PSet(
Changed:
<
<
version = cms.untracked.string('$Revision: 1.19 $'),
>
>
version = cms.untracked.string('$Revision: 1.20 $'),
  annotation = cms.untracked.string('MinBias_8TeV_cfi nevts:10'), name = cms.untracked.string('Applications') )
Line: 390 to 390
  'PARP(77)=1.016 ! CR', 'PARP(78)=0.538 ! CR', 'PARP(80)=0.1 ! Prob. colored parton from BBR',
Changed:
<
<
'PARP(83)=0.356 ! Mult
>
>
'PARP(83)=0.356 ! Multiple interactions: matter distribution parameter', 'PARP(84)=0.651 ! Multiple interactions: matter distribution parameter', 'PARP(62)=1.025 ! ISR cutoff', 'MSTP(91)=1 ! Gaussian primordial kT', 'PARP(93)=10.0 ! primordial kT-max', 'MSTP(81)=21 ! multiple parton interactions 1 is Pythia default', 'MSTP(82)=4 ! Defines the multi-parton model'), processParameters = cms.vstring('MSEL=0 ! User defined processes', 'MSUB(11)=1 ! Min bias process', 'MSUB(12)=1 ! Min bias process', 'MSUB(13)=1 ! Min bias process', 'MSUB(28)=1 ! Min bias process', 'MSUB(53)=1 ! Min bias process', 'MSUB(68)=1 ! Min bias process', 'MSUB(92)=1 ! Min bias process, single diffractive', 'MSUB(93)=1 ! Min bias process, single diffractive', 'MSUB(94)=1 ! Min bias process, double diffractive', 'MSUB(95)=1 ! Min bias process'), parameterSets = cms.vstring('pythiaUESettings', 'processParameters') ) )

# Path and EndPath definitions process.generation_step = cms.Path(process.pgen) process.simulation_step = cms.Path(process.psim) process.genfiltersummary_step = cms.EndPath(process.genFilterSummary) process.endjob_step = cms.EndPath(process.endOfProcess) process.RAWSIMoutput_step = cms.EndPath(process.RAWSIMoutput)

# Schedule definition process.schedule = cms.Schedule( process.generation_step, process.genfiltersummary_step, process.simulation_step, process.endjob_step, process.RAWSIMoutput_step )

# Filter all path with the production filter sequence for path in process.paths: getattr(process,path)._seq = process.generator * getattr(process,path)._seq </>

<!--/twistyPlugin-->
Show Hide CRAB configuration file.
<!--/twistyPlugin twikiMakeVisibleInline-->
from CRABClient.UserUtilities import config
config = config()

config.General.requestName = 'CRAB3_Advanced_Tutorial_May2015_Exercise4A'

config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'pset_tutorial_MC_generation.py'
config.JobType.scriptExe = 'myscript.sh'

config.Data.primaryDataset = 'MinBias'
config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 10
config.Data.totalUnits = 30
config.Data.publication = True
config.Data.publishDataName = config.General.requestName

config.Site.storageSite = <site where the user has permission to write>
<!--/twistyPlugin-->
Show Hide user script.
<!--/twistyPlugin twikiMakeVisibleInline-->
echo "================= CMSRUN starting ===================="
cmsRun -j FrameworkJobReport.xml -p PSet.py
echo "================= CMSRUN finished ===================="
<!--/twistyPlugin-->

4.B) Run a task as in 4.A, but save the messages into a text file. Consider this text file as an additional output file that should be transferred to the destination storage. Once transferred, retrieve them with crab getoutput and check that the messages are there.

Help:

Show Hide CRAB configuration file.
<!--/twistyPlugin twikiMakeVisibleInline-->
from CRABClient.UserUtilities import config
config = config()

config.General.requestName = 'CRAB3_Advanced_Tutorial_May2015_Exercise4B'

config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'pset_tutorial_MC_generation.py'
config.JobType.scriptExe = 'myscript.sh'
config.JobType.outputFiles = ['output.txt']

config.Data.primaryDataset = 'MinBias'
config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 10
config.Data.totalUnits = 30
config.Data.publication = True
config.Data.publishDataName = config.General.requestName

config.Site.storageSite = <site where the user has permission to write>
<!--/twistyPlugin-->
Show Hide user script.
<!--/twistyPlugin twikiMakeVisibleInline-->
echo "================= CMSRUN starting ====================" >> output.txt
cmsRun -j FrameworkJobReport.xml -p PSet.py
echo "================= CMSRUN finished ====================" >> output.txt
<!--/twistyPlugin-->

4.C) Run a task as in 4.A, but write the messages in a local text file, include this file in the input sandbox, and make your script read the messages from that file.

Help:

Show Hide CRAB configuration file.
<!--/twistyPlugin twikiMakeVisibleInline-->
from CRABClient.UserUtilities import config
config = config()

config.General.requestName = 'CRAB3_Advanced_Tutorial_May2015_Exercise4B'

config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'pset_tutorial_MC_generation.py'
config.JobType.scriptExe = 'myscript.sh'
config.JobType.inputFiles = ['input.txt']

config.Data.primaryDataset = 'MinBias'
config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 10
config.Data.totalUnits = 30
config.Data.publication = True
config.Data.publishDataName = config.General.requestName

config.Site.storageSite = <site where the user has permission to write>
<!--/twistyPlugin-->
Show Hide user script.
<!--/twistyPlugin twikiMakeVisibleInline-->
while read line
do
    if [ "$line" == "Before" ]; then
        continue
    elif [ "$line" == "After" ]; then
        cmsRun -j FrameworkJobReport.xml -p PSet.py
    else
        echo $line
    fi
done < input.txt
<!--/twistyPlugin-->
Show Hide local text file.
<!--/twistyPlugin twikiMakeVisibleInline-->
Before
================= CMSRUN starting ====================
After
================= CMSRUN finished ====================
<!--/twistyPlugin-->

4.D) Run a task as in 4.A, but pass the messages as arguments to your script.

Help:

Show Hide CRAB configuration file.
<!--/twistyPlugin twikiMakeVisibleInline-->
from CRABClient.UserUtilities import config
config = config()

config.General.requestName = 'CRAB3_Advanced_Tutorial_May2015_Exercise4B'

config.JobType.pluginName = 'PrivateMC'
config.JobType.psetName = 'pset_tutorial_MC_generation.py'
config.JobType.scriptExe = 'myscript.sh'
# Arguments have to be in the form param=value, without white spaces, quotation marks nor additional equal signs (=).
config.JobType.scriptArgs = ['Before=CMSRUN-starting', 'After=CMSRUN-finished']

config.Data.primaryDataset = 'MinBias'
config.Data.splitting = 'EventBased'
config.Data.unitsPerJob = 10
config.Data.totalUnits = 30
config.Data.publication = True
config.Data.publishDataName = config.General.requestName

config.Site.storageSite = <site where the user has permission to write>
<!--/twistyPlugin-->
Show Hide user script.
<!--/twistyPlugin twikiMakeVisibleInline-->
beforemsg=""
aftermsg=""
for i in "$@"
do
case $i in
    Before=*)
    beforemsg="${i#*=}"
    ;;
    After=*)
    aftermsg="${i#*=}"
    ;;
esac
done
echo "================= $beforemsg ===================="
cmsRun -j FrameworkJobReport.xml -p PSet.py
echo "================= $aftermsg ===================="
<!--/twistyPlugin-->

4.E) Run a task as in 4.A, but defining the exit code of your script as 80500 (pass the exit code to CRAB; don't do exit 80500 in the script). Once the task finishes (it should fail) check the status to see if the jobs are indeed reported as failed with exit code 80500.

Help:

Show Hide user script.
<!--/twistyPlugin twikiMakeVisibleInline-->
echo "================= CMSRUN starting ===================="
cmsRun -j FrameworkJobReport.xml -p PSet.py
echo "================= CMSRUN finished ===================="

exitCode=80500
exitMessage="This is a test to see if I can pass exit code 80500 to CRAB."
errorType=""

if [ -e FrameworkJobReport.xml ]
then
    cat << EOF > FrameworkJobReport.xml.tmp
<FrameworkJobReport>
<FrameworkError ExitStatus="$exitCode" Type="$errorType" >
$exitMessage
</FrameworkError>
EOF
    tail -n+2 FrameworkJobReport.xml >> FrameworkJobReport.xml.tmp
    mv FrameworkJobReport.xml.tmp FrameworkJobReport.xml
else
    cat << EOF > FrameworkJobReport.xml
<FrameworkJobReport>
<FrameworkError ExitStatus="$exitCode" Type="$errorType" >
$exitMessage
</FrameworkError>
</FrameworkJobReport>
EOF
fi
<!--/twistyPlugin-->

Exercise 5 - LHE

5.A) The objective of this exercise it to run a MC generation on LHE files. Using the Running MC generation on LHE files twiki as a pointer, prepare (but not submit yet!) a CRAB3 configuration to run on the Grid using the pset and the LHE file you can find here /afs/cern.ch/cms/ccs/wm/scripts/Crab/CRAB3/AdvTutorial/Exercise_5/. Use config.JobType.inputFiles to pass the LHE file to the jobs. The target is to have 10 jobs that will run for 8 hours. Run your job locally on 1000 events before submitting it with CRAB (should take less than a minute), and use the Timing service of CMSSW to get a time per event estimation. Everything should use CMSSW_5_3_22. Publication should be enabled in your configuration.

Help:

Show Hide Adding Timing service to the PSet.
<!--/twistyPlugin twikiMakeVisibleInline-->
##### These are the lines you can add to your pset get the estimates in the FrameworkJobReport file
process.Timing = cms.Service("Timing",
    summaryOnly = cms.untracked.bool(True)
)

##### For your information CRAB3 also add the following two lines in addition to the previous three:
#process.CPU = cms.Service("CPU")

#process.SimpleMemoryCheck = cms.Service("SimpleMemoryCheck")
<!--/twistyPlugin-->
Show Hide Runing cmsRun locally to estimate the time per event.
<!--/twistyPlugin twikiMakeVisibleInline-->
cmsRun -j FrameworkJobReport.xml -p pset_MC_generation_LHE.py
You can open the FrameworkJobReport.xml file now and look for the AvgEventTime, which is what CRAB3 currently looks for the estimation of the walltimes.
<!--/twistyPlugin-->
Show Hide CRAB configuration file.
<!--/twistyPlugin twikiMakeVisibleInline-->
from CRABClient.UserUtilities import config
config = config()

config.General.requestName = 'CRAB3_Advanced_Tutorial_May2015_Exercise5A'

config.JobType.pluginName = 'PrivateMC'
config.JobType.generator = 'lhe'
config.JobType.psetName = 'pset_MC_generation_LHE.py'
config.JobType.inputFiles = ['dynlo.lhe']

config.Data.primaryDataset = 'TutorialMay2015Exercise5A'
config.Data.splitting = 'EventBased'
JOB_WALLTIME = 8*3600
TIME_PER_EVENT = 0.25
config.Data.unitsPerJob = int(JOB_WALLTIME / TIME_PER_EVENT)
NJOBS = 10
config.Data.totalUnits = config.Data.unitsPerJob * NJOBS
config.Data.publication = True
config.Data.publishDataName = 'MC_generation_LHE'

config.Site.storageSite = <storage-site>
<!--/twistyPlugin-->

5.B) Try to submit the task. Is it there a problem? Try to explain what is going on.

Help:

Show Hide answer.
<!--/twistyPlugin twikiMakeVisibleInline-->
You are probably getting the following error, because passing the LHE file directly in the input sandbox causes the sandbox to go over the 
allowed limit of 100Mb:
Will use CRAB configuration file /afs/cern.ch/user/m/mmascher/tutorial/Exercise_5/crabConfig.py

Error contacting the server.
Server answered with: Invalid input parameter
Reason is: File is bigger then allowed limit of 10485760B
<!--/twistyPlugin-->

5.C) Copy the dynlo.lhe file to your destination storage under /store/user/<your-username>/dynlo.lhe and try to submit the task again removing the JobType.inputFiles parameter and accessing the file locally. Remember to modify the LHEInputSource and whitelist your site in the CRAB configuration so that your jobs will only run there.

Help:

Show Hide Get the PFN for the LHEInputSource
<!--/twistyPlugin twikiMakeVisibleInline-->
/afs/cern.ch/user/b/belforte/public/mybin/lfn2pfn.sh <site> [<LFN>]
<!--/twistyPlugin-->
Show Hide CRAB configuration file
<!--/twistyPlugin twikiMakeVisibleInline-->
#config.JobType.inputFiles = ['dynlo.lhe']
config.Site.whitelist = ['T2_CH_CERN']
<!--/twistyPlugin-->
Show Hide PSet configuration file
<!--/twistyPlugin twikiMakeVisibleInline-->
process.source = cms.Source("LHESource",
    fileNames = cms.untracked.vstring('gsi
ftp://eoscmsftp.cern.ch//eos/cms/store/user/<username>/dynlo.lhe ')
)
<!--/twistyPlugin-->

5.D) Remove the whitelist and have the jobs run all the sites of your storage element nation. For example, if you are storing files to T2_IT_Legnaro you are going to run jobs only on italian sites (T2_IT*). Can you individuate the differences between the logfiles between 5C and this exercise ?

Help:

Show Hide CRAB configuration file
<!--/twistyPlugin twikiMakeVisibleInline-->
#config.JobType.inputFiles = ['dynlo.lhe']
config.Site.whitelist = ['T2_CH*']
<!--/twistyPlugin-->

Exercise 6 - CRABAPI library

6) Using the same CMSSW parameter-set configuration as in exercise 2, submit four identical tasks to analyze the following four MC input datasets:

  • /DoubleMuParked/Run2012A-22Jan2013-v1/AOD
  • /DoubleMuParked/Run2012B-22Jan2013-v1/AOD
  • /DoubleMuParked/Run2012C-22Jan2013-v1/AOD
  • /DoubleMuParked/Run2012D-22Jan2013-v1/AOD
Don't do crab submit four times; instead use the crabCommand API from the CRABAPI library in a script. There is no need to analyze the whole datasets; just a few lumis or files is enough. Use the CRABAPI library to check the status, resubmit failed jobs, get the report and retrieve the output files, from the tasks you submitted.

Help:

Show Hide suggestion.
<!--/twistyPlugin twikiMakeVisibleInline-->
Create a python script named "multicrab" (with permissions 744) which you should be able to run in the following way:
     ./multicrab --crabCmd CMD [--workArea WAD --crabCmdOpts OPTS]
where CMD is the crab command, WAD is a work area directory with many CRAB project directories inside and OPTS are options for the crab 
command. 
<!--/twistyPlugin-->
Show Hide multicrab script skeleton.
<!--/twistyPlugin twikiMakeVisibleInline-->
#!/usr/bin/env python
"""
This is a small script that does the equivalent of multicrab.
"""
import os
from optparse import OptionParser


def getOptions():
    """
    Parse and return the arguments provided by the user.
    """
    usage = ("Usage: %prog --crabCmd CMD [--workArea WAD --crabCmdOpts OPTS]"
             "\nThe multicrab command executes 'crab CMD OPTS' for each project directory contained in WAD"
             "\nUse multicrab -h for help")

    parser = OptionParser(usage=usage)

    parser.add_option('-c', '--crabCmd',
                      dest = 'crabCmd',
                      default = '',
                      help = "crab command",
                      metavar = 'CMD')

    parser.add_option('-w', '--workArea',
                      dest = 'workArea',
                      default = '',
                      help = "work area directory (only if CMD != 'submit')",
                      metavar = 'WAD')

    parser.add_option('-o', '--crabCmdOpts',
                      dest = 'crabCmdOpts',
                      default = '',
                      help = "options for crab command CMD",
                      metavar = 'OPTS')

    (options, arguments) = parser.parse_args()

    if arguments:
        parser.error("Found positional argument(s): %s." % (arguments))
    if not options.crabCmd:
        parser.error("(-c CMD, --crabCmd=CMD) option not provided.")
    if options.crabCmd != 'submit':
        if not options.workArea:
            parser.error("(-w WAR, --workArea=WAR) option not provided.")
        if not os.path.isdir(options.workArea):
            parser.error("'%s' is not a valid directory." % (options.workArea))

    return options


def main():

    options = getOptions()

    # Do something. 

 
if __name__ == '__main__':
    main()
<!--/twistyPlugin-->
Show Hide multicrab script.
<!--/twistyPlugin twikiMakeVisibleInline-->
#!/usr/bin/env python
"""
This is a small script that does the equivalent of multicrab.
"""
import os
from optparse import OptionParser

from CRABAPI.RawCommand import crabCommand
from CRABClient.ClientExceptions import ClientException
from httplib import HTTPException


def getOptions():
    """
    Parse and return the arguments provided by the user.
    """
    usage = ("Usage: %prog --crabCmd CMD [--workArea WAD --crabCmdOpts OPTS]"
             "\nThe multicrab command executes 'crab CMD OPTS' for each project directory contained in WAD"
             "\nUse multicrab -h for help")

    parser = OptionParser(usage=usage)

    parser.add_option('-c', '--crabCmd',
                      dest = 'crabCmd',
                      default = '',
                      help = "crab command",
                      metavar = 'CMD')

    parser.add_option('-w', '--workArea',
                      dest = 'workArea',
                      default = '',
                      help = "work area directory (only if CMD != 'submit')",
                      metavar = 'WAD')

    parser.add_option('-o', '--crabCmdOpts',
                      dest = 'crabCmdOpts',
                      default = '',
                      help = "options for crab command CMD",
                      metavar = 'OPTS')

    (options, arguments) = parser.parse_args()

    if arguments:
        parser.error("Found positional argument(s): %s." % (arguments))
    if not options.crabCmd:
        parser.error("(-c CMD, --crabCmd=CMD) option not provided.")
    if options.crabCmd != 'submit':
        if not options.workArea:
            parser.error("(-w WAR, --workArea=WAR) option not provided.")
        if not os.path.isdir(options.workArea):
            parser.error("'%s' is not a valid directory." % (options.workArea))

    return options


def main():

    options = getOptions()

    # The submit command needs special treatment.
    if options.crabCmd == 'submit':

        #--------------------------------------------------------
        # This is the base config:
        #--------------------------------------------------------
        from CRABClient.UserUtilities import config
        config = config()

        config.General.requestName = None
        config.General.workArea = 'CRAB3_Advanced_Tutorial_May2015_Exercise6'

        config.JobType.pluginName = 'Analysis'
        config.JobType.psetName = 'pset_tutorial_analysis.py'

        config.Data.inputDataset = None
        config.Data.splitting = 'LumiBased'
        config.Data.unitsPerJob = 10
        config.Data.totalUnits = 30
        config.Data.publishDataName = None

        config.Site.storageSite = None # Choose your site. 
        #--------------------------------------------------------

        # Will submit one task for each of these input datasets.
        inputDatasets = [
                         '/DoubleMuParked/Run2012A-22Jan2013-v1/AOD',
                         '/DoubleMuParked/Run2012B-22Jan2013-v1/AOD',
                         '/DoubleMuParked/Run2012C-22Jan2013-v1/AOD',
                         '/DoubleMuParked/Run2012D-22Jan2013-v1/AOD',
                        ]

        for inDS in inputDatasets:
            # inDS is of the form /A/B/C. Since B is unique for each inDS, use this in the CRAB request name.
            config.General.requestName = inDS.split('/')[2]
            config.Data.inputDataset = inDS
            config.Data.publishDataName = '%s_%s' % (config.General.workArea, config.General.requestName)
            # Submit.
            try:
                print "Submitting for input dataset %s" % (inDS)
                crabCommand(options.crabCmd, config = config, *options.crabCmdOpts.split())
            except HTTPException as hte:
                print "Submission for input dataset %s failed: %s" % (inDS, hte.headers)
            except ClientException as cle:
                print "Submission for input dataset %s failed: %s" % (inDS, cle)

    # All other commands can be simply executed.
    elif options.workArea:

        for dir in os.listdir(options.workArea):
            projDir = os.path.join(options.workArea, dir)
            if not os.path.isdir(projDir):
                continue
            # Execute the crab command.
            msg = "Executing (the equivalent of): crab %s --dir %s %s" % (options.crabCmd, projDir, options.crabCmdOpts)
            print "-"*len(msg)
            print msg
            print "-"*len(msg)
            try:
                crabCommand(options.crabCmd, dir = projDir, *options.crabCmdOpts.split())
            except HTTPException as hte:
                print "Failed executing command %s for task %s: %s" % (options.crabCmd, projDir, hte.headers)
            except ClientException as cle:
                print "Failed executing command %s for task %s: %s" % (options.crabCmd, projDir, cle)


if __name__ == '__main__':
    main()
<!--/twistyPlugin-->

-- AndresTanasijczuk - 2015-05-19

 
This site is powered by the TWiki collaboration platform Powered by PerlCopyright & 2008-2019 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback