#!/bin/bash ############################################################### # # # Bourne shell script for submitting a serial job to the # # SLURM queue using the sbatch command. # # # ############################################################### # Remarks: A line beginning with # is a comment. # A line beginning with #SBATCH is a SLURM directive. # SLURM directives must come first. # ########################## # # # The SLURM directives # # # ########################## # Set the name of the job. #SBATCH -J JobName # Specify the queue. The CMU cluster currently has three queues: # "green", "blue" and "red". #SBATCH -p blue # Specify the maximum run wall time. Format: # hhhh:mm:ss hours:minutes:seconds # Be sure to specify a reasonable value here. # If the job does not finish by the time reached, # the job is terminated. #SBATCH -t 0:40:00 # Specify the number of nodes requested and the # number of processes (tasks) per node. #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 # By default, the standard output and error streams are sent # to the same file with a default name. Use the directives # below to change the files to which the standard output and # error streams are sent. #SBATCH --output=log22 #SBATCH --error=errlog22 (include only if want errors to go to different file) # Specify the maximum amount of physical memory required. # kb for kilobytes, mb for megabytes, gb for gigabytes. # Take some care in setting this value. Setting it too large # can result in your job waiting in the queue for sufficient # resources to become available. Setting it too small # can cause your job to be killed if too much memory # is requested by the job. #SBATCH --mem=1400mb # Specify if the job should notify you by email. Common event # types are NONE, BEGIN, END, FAIL, REQUEUE, ALL #SBATCH --mail-type=ALL #SBATCH --mail-user=someuser@somedomain export OMP_NUM_THREADS=2 ########################################## # # # Output some useful job information. # # # ########################################## echo ------------------------------------------------------ echo Job identifier is $SLURM_JOB_ID echo Job name is $SLURM_JOB_NAME echo Job is running on node $SLURM_JOB_NODELIST echo ------------------------------------------------------ ############################################################## # # # The prologue script automatically makes a directory # # on the local disks for you. The name of this directory # # depends on the job id, but you need only refer to it # # using ${WORKDIR}. # # # ############################################################## WORKDIR=/scratch/slurm_$SLURM_JOBID SCP=/usr/bin/scp SSH=/usr/bin/ssh ############################################################### # # # Transfer files from server to local disk. Use scp # # or if NFS mounted, just cp. # # # ############################################################### stagein() { echo ' ' echo Transferring files from server to compute node echo Writing files in node directory ${WORKDIR} cd ${WORKDIR} ${SCP} /path/input_file . echo Files in node work directory are as follows: ls -l } ############################################################ # # # Execute the run. Do not run in the background. # # # ############################################################ runprogram() { program_executable < input_file > output_file # NOTE: the run multiple serial executables in a single job, # use "srun" in the background, followed by "wait" srun program_executable < input_file1 > output_file1 & srun program_executable < input_file2 > output_file2 & srun program_executable < input_file3 > output_file3 & wait } ########################################################### # # # Copy necessary files back to permanent directory. # # # ########################################################### stageout() { echo ' ' echo Transferring files from compute nodes to server echo Writing files in permanent directory cd ${WORKDIR} ${SCP} output_file /path/outputfiles echo Final files in permanent data directory: } ##################################################################### # # # The "scancel" command is used to kill a running job. It first # # sends a SIGTERM signal, then after a delay, it sends a SIGKILL # # signal which eradicates the job. During the time between the # # SIGTERM and SIGKILL signals, the "cleanup" function below is # # run. You should include in this function commands to copy files # # from the local disk back to your home directory. Note: if you # # need to transfer very large files which make take longer than # # 60 seconds, be sure to use the -W option of scancel. # # # ##################################################################### early() { echo ' ' echo ' ############ WARNING: EARLY TERMINATION #############' echo ' ' } trap 'early; stageout' 2 9 15 ################################################## # # # Staging in, running the job, and staging out # # were specified above as functions. Now # # call these functions to perform the actual # # file transfers and program execution. # # # ################################################## stagein runprogram stageout ############################################################### # # # The epilogue script automatically deletes the directory # # created on the local disk (including all files contained # # therein. # # # ############################################################### exit