#!/bin/sh
# dgblast-ll.sh
# blast a bunch of genomes
# sample script for bioinformatics data-grid methods (parallelize by data splitting)
# using ibm LoadLeveler

blbin=$HOME/bio/blast/bin
ncbibin=$HOME/bio/ncbi/bin
mpiblast=$HOME/bio/iuparblast/ncbi/bin
SHARED=$HOME/scratch

## huge dyak results, 10x others : all due to chrU repetitive parts?
#ssplist="dana dere dgri dpse dvir "
#ssplist="dpse"
#ssplist="dmoj"
#ssplist="dwil"
#ssplist="dmel"
#ssplist="dsim"
#ssplist="dsec"
#ssplist="dper"
ssplist="dyak"

qsplist="dana dere  dgri  dmel  dmoj  dper  dpse  dsec  dsim  dvir  dwil  dyak"
## redo ones with bad-blast errors; or all?
#qsplist="dvir dsec dgri"

for sspp in $ssplist
{
for qspp in $qsplist
{
 [ $qspp == $sspp ] && continue; 
 
 $blbin/dgfasplit.pl -n 10 -in $SHARED/chrs/$qspp/${qspp}_*.fa
 
 for qpart in $SHARED/chrs/$qspp/${qspp}_*.fa-??
 {
   qp=`echo  $qpart| sed -e's/.*.fa.//;'`
   jobn=run-${qspp}-${sspp}.$qp 
   outn=$SHARED/out/chrs/${qspp}-${sspp}c.$qp
   cat > $jobn <<EOJ
#!/bin/sh 
## IBM LoadLeveler job, llsubmit
# @ class = MED
# @ error   = $outn.err
# @ output  = $outn.out
# @ queue

$ncbibin/blastall \\
  -a 1 -p blastn -m 9 -e 0.001 -b 10 -v 10 \\
  -o $outn.blout \\
  -d $SHARED/chrs/$sspp/$sspp-chromosome \\
  -i $qpart

EOJ

  llsubmit $jobn
  }
  # break
}
}
