Here are a set of scripts I wrote to manage jobs on a Sun Grid Engine cluster. They allow one-line job submission, including looping of repetitive jobs. They also allow a user to set up her or his own queue on top of the SGE queue to limit the number of jobs submitted at one time (requires running a perl script on a node that can qalter jobs). All scripts are released under the GNU Public License V. 2.

ezsub: submit one job at a time: "ezsub program datafile"
ezloop: submit multiple repetitive jobs: "ezsub number_of_reps program datafile". Note that any instance of "REP" [all caps] in the datafile will be replaced by the replicate number: "log file=runREP.log" is changed to "log file=run3.log" on the third replicate.
ezdel: delete all queued and held jobs
nicesub, niceloop: same as ezsub and ezloop, but starts jobs with user holds on them. Nicestart then removes the user holds
nicestart: converts held jobs to running jobs: "nicestart minnum maxnum freenodes". Minnum (maxnum) specifies the minimum (maximum) number of jobs to be actively queued or run regardless of cluster activity. Freenodes specifies the number of nodes to remain free: jobs will be submitted until there are no more than freenodes still free (currently written for an 88 node cluster) or until maxnum is hit.
nicewatch: a script launched by nicestart
niceqstat: greps qstat output to strip user-held jobs
nicecount: uses qstat to count the number of running jobs of various types



ezsub
#! /bin/bash
# Shell script to automatically create a simple shell script
# and submit it to sge using qsub. To use this, just type
# ezsub followed by the name and options for your job. I.e.,

#
# ezsub paup -n batchfile.nex
# ezsub ./configure --disable-shared
# ezsub mb adh.nex
# ezsub tar -xvf gsl-1.8.tar
#
# The one constraint is that you can't have pipes or redirects ( >, <, |), as
# they would use the output of the ezsub command (which is just your job number)

# instead of the output of whatever program you're calling
#
# Brian O'Meara 17 Nov 2006
# http://www.brianomeara.info
# Released under GPL v2

echo "#!/bin/bash" >tempqrun.sh
echo "#$ -cwd" >>tempqrun.sh #use current directory as working directory


########### Change email settings #################
echo "#$ -M me@mycollege.edu" >>tempqrun.sh #use your own email address. Please.
echo "#$ -m as" >>tempqrun.sh #send email about the job.

# "b"=when job begins
# "e"=when job ends
# "a"=when job aborts
# "s"=when job suspended (someone kicks you off)
# "n" alone means don't send mail
##################################################

##################### Job name ############################
echo "#$ -N EZsub" >>tempqrun.sh #job name, currently EZsub

###########################################################

########### Don't modify this bit #################
echo "#$ -r y">>tempqrun.sh #makes job rerunable #
echo "#$ -S /bin/bash">>tempqrun.sh #

echo "">>tempqrun.sh #
until [ -z "$1" ] #

do #
echo -n "$1 " >> temporaryqrunstrings #

shift #
done #
tr '\n' ' ' < temporaryqrunstrings >> tempqrun.sh #

###################################################
############ Cleanup #########################
#delete the "#" at the beginning of the following line if you want to remove the output files
#echo "rm EZsub.*" >> tempqrun.sh
#############################################
###### Don't modify this bit ###########
sleep 1 #

chmod 777 tempqrun.sh #
cp tempqrun.sh tempqrun2.sh #
qsub tempqrun2.sh #

sleep 1 #
rm temporaryqrunstrings #
rm tempqrun.sh #
rm tempqrun2.sh #

########################################



ezloop
#!/usr/bin/perl -w
# Shell script to automatically start multiple repetitive jobs.
# Examples would be doing 100 bootstrap replicates by sending
# five jobs of 20 reps each or doing 4 mrbayes runs at once.
# Basically, the script expects there to be a batch file that
# you're using; it will replace any instance of REP (all caps)

# in that batch file with the replicate number. To use this
# script, you type ezloop followed by the number of loops,
# then the program you want to run, any options, and the
# name of the batch file. For example,
#
# ezloop 5 paup -n bootbatch.nex
# ezloop 4 mb adh.nex
#
# A sample batch file for something like paup might be:

#
# #nexus
# begin paup;
# log start file=bootREP.log;
# execute primates.nex;
# bootstrap nreps=20 treefile=bootREP.tre brlen=yes / start=nj;
# quit;
# end;
#

# Then, if submitted using the command "ezloop 5 paup -n bootbatch.nex",
# the output would be boot1.log, boot1.tre, boot2.log, boot2.tre,...
# boot5.log, boot5.tre. Each tree file would have trees from twenty
# bootstrap replicates; you could then load them all into paup,
# making sure to store tree weights, and get a majority rule
# consensus tree using tree weights to compute the bootstrap tree
# from 100 bootstrap replicates.

# The one constraint is that you can't have pipes or redirects ( >, <, |), as

# they would use the output of the ezloop command (which is just your job number)
# instead of the output of whatever program you're calling
#
# Brian O'Meara 17 Nov 2006
# http://www.brianomeara.info
# Released under GPL v2

use diagnostics;

use strict;
if ($#ARGV < 2 || $ARGV[0]!~m/\d+/) {

print "usage: ezloop #reps command [options] filename\n";
exit;
}
my $outputstring="";

my $maxcmdnum=$#ARGV;
for (my $arg=1;$arg<$maxcmdnum;$arg++) {

$outputstring="$outputstring "."$ARGV[$arg]";
}
for (my $rep=1;$rep<=$ARGV[0];$rep++) {

my $filein="$ARGV[$#ARGV]";
open(IN,"$filein") or die("where is $filein ?");

open(OUT,">$rep.$filein");
while(<IN>) {

my $inline=$_;
chomp $inline;
$inline=~s/REP/$rep/g; #convert REP to $rep. Case sensitive

print OUT "$inline\n";
}
close IN;

close OUT;
my $totaloutput="ezsub $outputstring "."$rep.$filein";
sleep(5); #just to give the cluster a break, give us time to abort if something's wrong, etc.

system("$totaloutput");
}

ezdel
#!/usr/bin/perl -w
#A quick script to delete all your queued and held jobs.
#Not often useful, but handy when you do need it (something has gone terribly wrong)

use diagnostics;
use strict;
my $username="bcomeara";

while (`qstat | grep "$username" | grep -c "qw"`>0) { #while you still have queued and/or held jobs
my $jobtokillline=`qstat | grep "$username" | grep -m1 "qw"`;

$jobtokillline=~m/^[\s]*([\d]+)/;

my $jobtokill=$1;
system("qdel $jobtokill");
sleep(1);

}


nicesub
#! /bin/bash
# Shell script to automatically create a simple shell script
# and submit it to sge using qsub. To use this, just type
# ezsub followed by the name and options for your job. I.e.,
#
# nicesub paup -n batchfile.nex
# nicesub ./configure --disable-shared
# nicesub mb adh.nex

# nicesub tar -xvf gsl-1.8.tar
#
# The one constraint is that you can't have pipes or redirects ( >, <, |), as
# they would use the output of the ezsub command (which is just your job number)
# instead of the output of whatever program you're calling
#
# Brian O'Meara 17 Nov 2006
# http://www.brianomeara.info

# Released under GPL v2

echo "#!/bin/bash" >tempqrun.sh
echo "#$ -cwd" >>tempqrun.sh #use current directory as working directory


########### Change email settings #################
echo "#$ -M me@mycollege.edu" >>tempqrun.sh #use your own email address. Please.
echo "#$ -m as" >>tempqrun.sh #send email about the job.

# "b"=when job begins
# "e"=when job ends
# "a"=when job aborts
# "s"=when job suspended (someone kicks you off)
# "n" alone means don't send mail
##################################################

##################### Job name ############################
echo "#$ -N niceWAIT" >>tempqrun.sh #job name

###########################################################

########### Don't modify this bit #################
echo "#$ -r y">>tempqrun.sh #makes job rerunable #
echo "#$ -S /bin/bash">>tempqrun.sh #

echo "#$ -h">>tempqrun.sh #
echo "">>tempqrun.sh #

until [ -z "$1" ] #
do #

echo -n "$1 " >> temporaryqrunstrings #
shift #

done #
tr '\n' ' ' < temporaryqrunstrings >> tempqrun.sh #
###################################################

############ Cleanup #########################
#delete the "#" at the beginning of the following line if you want to remove the output files
#echo "rm EZsub.*" >> tempqrun.sh
#############################################
###### Don't modify this bit ###########
sleep 1 #

chmod 777 tempqrun.sh #
cp tempqrun.sh tempqrun2.sh #
qsub tempqrun2.sh #

sleep 1 #
rm temporaryqrunstrings #
rm tempqrun.sh #
rm tempqrun2.sh #

########################################



niceloop
#!/usr/bin/perl -w
# Shell script to automatically start multiple repetitive jobs.
# Examples would be doing 100 bootstrap replicates by sending
# five jobs of 20 reps each or doing 4 mrbayes runs at once.
# Basically, the script expects there to be a batch file that

# you're using; it will replace any instance of REP (all caps)
# in that batch file with the replicate number. To use this
# script, you type niceloop followed by the number of loops,
# then the program you want to run, any options, and the
# name of the batch file. For example,
#
# niceloop 5 paup -n bootbatch.nex
# niceloop 4 mb adh.nex
#

# A sample batch file for something like paup might be:
#
# #nexus
# begin paup;
# log start file=bootREP.log;
# execute primates.nex;
# bootstrap nreps=20 treefile=bootREP.tre brlen=yes / start=nj;
# quit;
# end;

#
# Then, if submitted using the command "niceloop 5 paup -n bootbatch.nex",
# the output would be boot1.log, boot1.tre, boot2.log, boot2.tre,...
# boot5.log, boot5.tre. Each tree file would have trees from twenty
# bootstrap replicates; you could then load them all into paup,
# making sure to store tree weights, and get a majority rule
# consensus tree using tree weights to compute the bootstrap tree
# from 100 bootstrap replicates.

# The one constraint is that you can't have pipes or redirects ( >, <, |), as
# they would use the output of the niceloop command (which is just your job number)
# instead of the output of whatever program you're calling
#
# Brian O'Meara 17 Nov 2006
# http://www.brianomeara.info
# Released under GPL v2

use diagnostics;
use strict;
if ($#ARGV < 2 || $ARGV[0]!~m/\d+/) {

print "usage: ezloop #reps command [options] filename\n";
exit;
}
my $outputstring="";

my $maxcmdnum=$#ARGV;
for (my $arg=1;$arg<$maxcmdnum;$arg++) {

$outputstring="$outputstring "."$ARGV[$arg]";
}
for (my $rep=1;$rep<=$ARGV[0];$rep++) {

my $filein="$ARGV[$#ARGV]";
open(IN,"$filein") or die("where is $filein ?");

open(OUT,">$rep.$filein");
while(<IN>) {

my $inline=$_;
chomp $inline;
$inline=~s/REP/$rep/g; #convert REP to $rep. Case sensitive

print OUT "$inline\n";
}
close IN;

close OUT;
my $totaloutput="nicesub $outputstring "."$rep.$filein";
sleep(rand(4)); #just to give the cluster a break, give us time to abort if something's wrong, etc.

system("$totaloutput");
}

nicestart
#!/usr/bin/perl -w

# nicestart: start submitting your nicely-submitted jobs to SGE. It takes three numbers as arguments.
# MinNum: You will have at least MinNum jobs running or in the official queue at a time,
# even if this leaves no nodes free for other users
# MaxNum: You will have no more than MaxNum jobs running or in the official queue at a time,
# even there are oodles of other nodes free.
# FreeNum: The number of nodes you will leave free for other users.
#
# usage: nicestart <MinNum> <MaxNum> <FreeNum>

# example: nicestart 2 30 40
# will keep between 2 and 30 of your jobs running or actively queued, while keeping at least
# 40 nodes unused, regardless of how many other people are using the cluster
#
# Niceloop and nicesub effectively put jobs in your own queue. Nicestart starts a script that moves jobs
# from your own queue into the general queue. You submit jobs until there are only FreeNum nodes available
# (but you keep at least MinNum jobs in the general queue). The general idea is that if the cluster isn't
# being heavily used, you submit many jobs, but if the cluster is getting fuller, you submit jobs at a slower
# rate to make sure to leave nodes available for other users. This is most useful for cases where you have

# many jobs (>50) to submit but don't want to block cluster use. It can be especially appropriate if the
# jobs complete very quickly, so your number of active jobs can change very quickly as cluster use changes.
# Setting MinNum>0 means that you will always have MinNum jobs submitted or running, even if this means that
# fewer than FreeNum nodes are left available.
#
# If you want to change your limits, just run nicestart again with the new limits.
#
# Jobs that are held in your own separate queue will have state "hqw" and job name "niceWAIT" -- when
# they are submitted to the general queue, their state will be "qw" or "r" and their name changed to

# "niceRUN".
#
# Brian O'Meara
# http://www.brianomeara.info
# Nov. 16, 2006
# Released under GNU Public License V. 2
use diagnostics;

use strict;
my $username=`whoami`;
chomp $username;

my $nicewatchcount=`top -b -n1 | grep $username | grep -c nicewatch`;
if ($nicewatchcount>0) {

my @nicewatchlist=`top -b -n1 | grep $username | grep nicewatch`;
foreach my $job (@nicewatchlist) {

$job=~m/^\s*(\d+)\s+/;

my $pid=$1;
system("kill $pid");
}

}
system("nohup nicewatch $ARGV[0] $ARGV[1] $ARGV[2] > /dev/null &");

nicewatch
#!/usr/bin/perl -w
#takes settings minnum_active_queued_or_running, maxnum_active_queued_or_running (both for individual user), number of slots to leave free
# Brian O'Meara
# http://www.brianomeara.info
# Nov. 27, 2006
# Released under GNU Public License V. 2

use diagnostics;
use strict;
sleep(3);
my $username=`whoami`;

chomp $username;
my $minnumqueuedtorun=$ARGV[0];
my $maxnumqueuedtorun=$ARGV[1];

my $freeslots=$ARGV[2];
while (`qstat | grep "$username" | grep -c "hqw"`>0) { #while there are still queued, held jobs

my @ezcountarray=split(/ /,`nicecount`);

my $countall=$ezcountarray[0];
my $countRme=$ezcountarray[6];

my $countRall=$ezcountarray[1];
my $countQWme=$ezcountarray[7];

my $countQWall=$ezcountarray[2];
my $countHme=$ezcountarray[8];

my $countHall=$ezcountarray[3];
my $countSall=$ezcountarray[4];

my $countSme=$ezcountarray[9];
my $countEall=$ezcountarray[5];

my $countEme=$ezcountarray[10];
my $reactivate=0;

if (($countRme+$countQWme)<$minnumqueuedtorun) {
$reactivate=1;

}
if ((88-($countQWall+$countRall))>$freeslots) {

if (($countRme+$countQWme)<$maxnumqueuedtorun ) {
$reactivate=1;

}
}
if ($reactivate==1) {

my $jobtostartline=`qstat | grep "$username" | grep -m1 "hqw"`;
$jobtostartline=~m/^[\s]*([\d]+)/;

my $jobtostart=$1;
system("qalter -h U -N niceRUN $jobtostart");
}

sleep(rand(10)); #Don't change this. This tells the script how long to wait before
# reactivating. If you get rid of this line, the script will
# keep rerunning, taking up far too much time on the head node.

}

niceqstat
#!/bin/bash
#This just returns qstat's normal output, omitting jobs held in individual user queues
# Brian O'Meara
# http://www.brianomeara.info

# Nov. 16, 2006
# Released under GNU Public Licence v 2
qstat | grep -v "hqw"

nicecount
#!/usr/bin/perl -w
use diagnostics;
use strict;

my @qstatin=`qstat`;
my $username=`whoami`;

chomp $username;
my $countall=0;
my $countRme=0;

my $countRall=0;
my $countQWme=0;
my $countQWall=0;

my $countHme=0;
my $countHall=0;
my $countSall=0;

my $countSme=0;
my $countEall=0;
my $countEme=0;

foreach my $qstatline (@qstatin) {
chomp $qstatline;

if ($qstatline=~m/\s+(\d+)\s+([\d\.]+)\s+([\S\d\.]+)\s+(\S+)\s+(\S+)\s+([\d\/]+)\s+([\d\:]+)\s+(\S*)\s+(\d+)/i) {

my $jobid=$1;
my $prior=$2;
my $jobname=$3;

my $user=$4;
my $state=$5;
my $subdate=$6;

my $subtime=$7;
my $queue=$8;
my $slots=$9;

$countall++;
if ($state=~m/r/i) {

$countRall++;
if ($user eq $username) {

$countRme++;
}
}
if ($state=~m/^qw$/i) {

$countQWall++;
if ($user eq $username) {

$countQWme++;
}
}
if ($state=~m/h/i) {

$countHall++;
if ($user eq $username) {

$countHme++;
}
}
if ($state=~m/s/i) {

$countSall++;
if ($user eq $username) {

$countSme++;
}
}
if ($state=~m/e/i) {

$countEall++;
if ($user eq $username) {

$countEme++;
}
}
}
}

print "$countall $countRall $countQWall $countHall $countSall $countEall $countRme $countQWme $countHme $countSme $countEme";