Sample SLURM script for serial job
For most applications, you should only need to change items indicated in
red. Items shown in amber
may also warrant your attention.
#!/bin/bash
###############################################################
# #
# Bourne shell script for submitting a serial job to the #
# SLURM queue using the sbatch command. #
# #
###############################################################
# Remarks: A line beginning with # is a comment.
# A line beginning with #SBATCH is a SLURM directive.
# SLURM directives must come first.
#
##########################
# #
# The SLURM directives #
# #
##########################
# Set the name of the job.
#SBATCH -J JobName
# Specify the queue. The CMU cluster currently has three queues:
# "green", "blue" and "red".
#SBATCH -p blue
# Specify the maximum run wall time. Format:
# hhhh:mm:ss hours:minutes:seconds
# Be sure to specify a reasonable value here.
# If the job does not finish by the time reached,
# the job is terminated.
#SBATCH -t 0:40:00
# Specify the number of nodes requested and the
# number of processes (tasks) per node.
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
# By default, the standard output and error streams are sent
# to the same file with a default name. Use the directives
# below to change the files to which the standard output and
# error streams are sent.
#SBATCH --output=log22
#SBATCH --error=errlog22 (include only if want errors to go to different file)
# Specify the maximum amount of physical memory required.
# kb for kilobytes, mb for megabytes, gb for gigabytes.
# Take some care in setting this value. Setting it too large
# can result in your job waiting in the queue for sufficient
# resources to become available. Setting it too small
# can cause your job to be killed if too much memory
# is requested by the job.
#SBATCH --mem=1400mb
# Specify if the job should notify you by email. Common event
# types are NONE, BEGIN, END, FAIL, REQUEUE, ALL
#SBATCH --mail-type=ALL
#SBATCH --mail-user=someuser@somedomain
# If you have threaded code with OpenMP, might need line below
export OMP_NUM_THREADS=3
##########################################
# #
# Output some useful job information. #
# #
##########################################
echo ------------------------------------------------------
echo Job identifier is $SLURM_JOB_ID
echo Job name is $SLURM_JOB_NAME
echo -n 'Job is running on node '; cat $SLURM_JOB_NODELIST
echo ------------------------------------------------------
##############################################################
# #
# The prologue script automatically makes a directory #
# on the local disks for you. The name of this directory #
# depends on the job id, but you need only refer to it #
# using ${WORKDIR}. #
# #
##############################################################
WORKDIR=/scratch/slurm_$SLURM_JOBID
SCP=/usr/bin/scp
SSH=/usr/bin/ssh
###############################################################
# #
# Transfer files from server to local disk. Use scp #
# or if NFS mounted, just cp. #
# #
###############################################################
stagein()
{
echo ' '
echo Transferring files from server to compute node
echo Writing files in node directory ${WORKDIR}
cd ${WORKDIR}
${SCP} /path/input_file .
echo Files in node work directory are as follows:
ls -l
}
############################################################
# #
# Execute the run. Do not run in the background. #
# #
############################################################
runprogram()
{
program_executable < input_file > output_file
# NOTE: the run multiple serial executables in a single job,
# use "srun" in the background, followed by "wait"
srun program_executable < input_file1 > output_file1 &
srun program_executable < input_file2 > output_file2 &
srun program_executable < input_file3 > output_file3 &
wait
}
###########################################################
# #
# Copy necessary files back to permanent directory. #
# #
###########################################################
stageout()
{
echo ' '
echo Transferring files from compute nodes to server
echo Writing files in permanent directory
cd ${WORKDIR}
${SCP} output_file /path/outputfiles
echo Final files in permanent data directory:
}
#####################################################################
# #
# The "scancel" command is used to kill a running job. It first #
# sends a SIGTERM signal, then after a delay, it sends a SIGKILL #
# signal which eradicates the job. During the time between the #
# SIGTERM and SIGKILL signals, the "cleanup" function below is #
# run. You should include in this function commands to copy files #
# from the local disk back to your home directory. Note: if you #
# need to transfer very large files which make take longer than #
# 60 seconds, be sure to use the -W option of scancel. #
# #
#####################################################################
early()
{
echo ' '
echo ' ############ WARNING: EARLY TERMINATION #############'
echo ' '
}
trap 'early; stageout' 2 9 15
##################################################
# #
# Staging in, running the job, and staging out #
# were specified above as functions. Now #
# call these functions to perform the actual #
# file transfers and program execution. #
# #
##################################################
stagein
runprogram
stageout
###############################################################
# #
# The epilogue script automatically deletes the directory #
# created on the local disk (including all files contained #
# therein. #
# #
###############################################################
exit
|