Sample SLURM script for serial job

For most applications, you should only need to change items indicated in red. Items shown in amber may also warrant your attention.

#!/bin/bash

###############################################################
#                                                             #
#    Bourne shell script for submitting a serial job to the   #
#    SLURM queue using the sbatch command.                    #
#                                                             #
###############################################################

#     Remarks: A line beginning with # is a comment.
#	       A line beginning with #SBATCH is a SLURM directive.
#              SLURM directives must come first.
#
   
##########################
#                        #
# The SLURM directives   #
#                        #
##########################

#          Set the name of the job.

#SBATCH -J JobName

#          Specify the queue.  The CMU cluster currently has three queues:
#          "green", "blue" and "red".  

#SBATCH -p blue

#          Specify the maximum run wall time. Format:  
#                  hhhh:mm:ss   hours:minutes:seconds
#          Be sure to specify a reasonable value here.
#          If the job does not finish by the time reached,
#          the job is terminated.

#SBATCH -t 0:40:00

#          Specify the number of nodes requested and the
#          number of processes (tasks) per node. 

#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1

#          By default, the standard output and error streams are sent
#          to the same file with a default name. Use the directives 
#          below to change the files to which the standard output and 
#          error streams are sent.

#SBATCH --output=log22
#SBATCH --error=errlog22    (include only if want errors to go to different file)

#          Specify the maximum amount of physical memory required.
#          kb for kilobytes, mb for megabytes, gb for gigabytes.
#          Take some care in setting this value.  Setting it too large
#          can result in your job waiting in the queue for sufficient
#          resources to become available.  Setting it too small
#          can cause your job to be killed if too much memory
#          is requested by the job.

#SBATCH --mem=1400mb

#          Specify if the job should notify you by email.  Common event
#          types are   NONE, BEGIN, END, FAIL, REQUEUE, ALL

#SBATCH --mail-type=ALL
#SBATCH --mail-user=someuser@somedomain

#  If you have threaded code with OpenMP, might need line below

export OMP_NUM_THREADS=3

##########################################
#                                        #
#   Output some useful job information.  #
#                                        #
##########################################

echo ------------------------------------------------------
echo Job identifier is $SLURM_JOB_ID
echo Job name is $SLURM_JOB_NAME
echo -n 'Job is running on node '; cat $SLURM_JOB_NODELIST
echo ------------------------------------------------------

##############################################################
#                                                            #
#   The prologue script automatically makes a directory      #
#   on the local disks for you.  The name of this directory  #
#   depends on the job id, but you need only refer to it     #
#   using ${WORKDIR}.                                        #
#                                                            #
##############################################################

WORKDIR=/scratch/slurm_$SLURM_JOBID
SCP=/usr/bin/scp
SSH=/usr/bin/ssh

###############################################################
#                                                             #
#    Transfer files from server to local disk.  Use scp       #
#    or if NFS mounted, just cp.                              #
#                                                             #
###############################################################

stagein()
{
 echo ' '
 echo Transferring files from server to compute node
 echo Writing files in node directory  ${WORKDIR}
 cd ${WORKDIR}

  ${SCP} /path/input_file .

 echo Files in node work directory are as follows:
 ls -l
}

############################################################
#                                                          #
#    Execute the run.  Do not run in the background.       #
#                                                          #
############################################################

runprogram()
{
 program_executable < input_file > output_file


 # NOTE: the run multiple serial executables in a single job,
 # use "srun" in the background, followed by "wait"
 
 srun program_executable < input_file1 > output_file1 &
 srun program_executable < input_file2 > output_file2 &
 srun program_executable < input_file3 > output_file3 &
 wait

}

###########################################################
#                                                         #
#   Copy necessary files back to permanent directory.     #
#                                                         #
###########################################################

stageout()
{
 echo ' '
 echo Transferring files from compute nodes to server
 echo Writing files in permanent directory
 cd ${WORKDIR}

 ${SCP} output_file  /path/outputfiles

 echo Final files in permanent data directory:
 }

#####################################################################
#                                                                   #
#  The "scancel" command is used to kill a running job.  It first   #
#  sends a SIGTERM signal, then after a delay, it sends a SIGKILL   #
#  signal which eradicates the job.  During the time between the    #
#  SIGTERM and SIGKILL signals, the "cleanup" function below is     #
#  run. You should include in this function commands to copy files  #
#  from the local disk back to your home directory.  Note: if you   #
#  need to transfer very large files which make take longer than    #
#  60 seconds, be sure to use the -W option of scancel.             #
#                                                                   #
#####################################################################

early()
{
 echo ' '
 echo ' ############ WARNING:  EARLY TERMINATION #############'
 echo ' '
 }

trap 'early; stageout' 2 9 15


##################################################
#                                                #
#   Staging in, running the job, and staging out #
#   were specified above as functions.  Now      #
#   call these functions to perform the actual   #
#   file transfers and program execution.        #
#                                                #
##################################################

stagein
runprogram
stageout 

###############################################################
#                                                             #
#   The epilogue script automatically deletes the directory   #
#   created on the local disk (including all files contained  #
#   therein.                                                  #
#                                                             #
###############################################################

exit