#!/usr/bin/perl # ll_augjob use strict; use warnings; use Cwd; use Getopt::Long ; #qw(:config no_ignore_case bundling); use constant NASV1 => 0; my $ACCOUNT="none"; my $CLOCKTIME="06:00:00"; my $aug_bin="augustus"; # fixme: param? my $debug=0; my( $base_dir, $oldbase, $aug_dir, $extrinsicCfgFile, $species, $hintsfile, $genome, $output_file_name, $partitions, $part_dir, ) = ("") x 20; $output_file_name="augustus.gff"; $partitions="partitions.list"; $extrinsicCfgFile="config/augrun.cfg"; # if(NASV1) { # $species="nasonia_vitripennis"; # $hintsfile="nasv_all.hints"; # $genome="nasv_genome.fa"; # $partitions="nasv_partitions.list"; # $oldbase="/export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/"; # $base_dir="/N/gpfsbr/gilbertd/chrs/aug/nasv1/"; # FIX # $aug_dir="/N/u/gilbertd/BigRed/bio/augustus"; # } my $optok= &GetOptions ( "hintsfile=s" => \$hintsfile, "extrinsicCfgFile=s" => \$extrinsicCfgFile, "species=s"=>\$species, "genome=s"=>\$genome, "aug_dir=s"=>\$aug_dir, "aug_bin=s" => \$aug_bin, "partitions=s"=>\$partitions, "output_file_name=s"=>\$output_file_name, "ACCOUNT=s"=>\$ACCOUNT, "CLOCKTIME=s"=>\$CLOCKTIME, "base_dir=s"=>\$base_dir, "oldbase=s"=>\$oldbase, "debug!"=>\$debug, ); my $USAGE= <<"USAGE"; usage: ll_augjob --species=nasonia_vitripennis \\ --aug_dir=/path/to/augustus \\ --partitions=partitions.list \\ --genome nasv_genome.fa \\ --hintsfile=nasv_all.hints --extrinsicCfgFile=aug_dir/config/augrun.cfg \\ --output augustus.gff \\ optional: --ACCOUNT=$ACCOUNT --CLOCKTIME=$CLOCKTIME USAGE ### need these only if partion on one path, move to another path ## --base_dir=/path/to/genome --oldbase=/old/path # $base_dir = cwd() unless($base_dir); die "missing --aug_dir=$aug_dir".$USAGE unless (-d $aug_dir); $partitions= "$base_dir/partitions" unless (-f $partitions); die "missing --partitions=$partitions".$USAGE unless (-f $partitions); die $USAGE unless($optok and $species and $genome and $output_file_name); $aug_dir =~ s,/$,,; if($hintsfile) { $extrinsicCfgFile="${aug_dir}/$extrinsicCfgFile" unless($extrinsicCfgFile =~ m,^/,); die "missing --extrinsicCfgFile=$extrinsicCfgFile".$USAGE unless (-f $extrinsicCfgFile); } my $partn=0; open (my $fh, $partitions) or die "Error, cannot open $partitions"; while (<$fh>) { next unless(/^\w/); chomp; my ($accession, $acc_dir, $is_partitioned, $partition_dir) = split (/\t/); next unless($acc_dir); $partn++; $part_dir = $acc_dir; if ($is_partitioned eq "Y") { $part_dir = $partition_dir; } $part_dir =~ s,$oldbase,$base_dir, if($oldbase); # die if fail? $part_dir =~ s,/$,,; if (-d $part_dir) { submit_augustus($partn, $part_dir); } else { die "Error, cannot locate data directory $part_dir \n"; } } # generalize this in package? or instead use ergatis, other workflow tool? # submit_job_augustus_llsubmit # submit_job_snap_gridrun sub submit_augustus { my($partn, $part_dir)= @_; my $jobn="augrun.$partn"; my $addhints= ($hintsfile) ? " --hintsfile=${part_dir}/$hintsfile --extrinsicCfgFile=$extrinsicCfgFile " : ""; my $job = <<"EOF"; #! /bin/bash -l ## $jobn: IBM LoadLeveler job, llsubmit # @ class = NORMAL # @ account_no = $ACCOUNT # @ wall_clock_limit = $CLOCKTIME # @ error = $part_dir/$jobn.err ### output = $part_dir/$jobn.out # @ notification = always # @ environment=COPY_ALL; # @ queue ${aug_dir}/bin/${aug_bin} --gff3=on --uniqueGeneId=true \\ --species=$species $addhints \\ --AUGUSTUS_CONFIG_PATH=$aug_dir/config/ \\ ${part_dir}/$genome \\ > ${part_dir}/$output_file_name EOF open(J,">$jobn") or die "$jobn"; print J $job; close(J); if($debug) { # DEBUG off print "llsubmit -q $jobn\n"; } else { my $res=`llsubmit -q $jobn`; chomp($res); print "# llsubmit $jobn : $res\n"; } # unlink $jobn; } __END__ parts list SCAFFOLD20 /export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/SCAFFOLD20 Y /export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/SCAFFOLD20/SCAFFOLD20_1-2000000 SCAFFOLD20 /export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/SCAFFOLD20 Y /export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/SCAFFOLD20/SCAFFOLD20_1950001-2035339 SCAFFOLD33 /export/home/bio/bio-grid/mb/EVidenceModeler/nasv1/SCAFFOLD33 N ll_augjob --species=nasonia_vitripennis \ --extrinsicCfgFile=$aug_dir/config/augrun.cfg \ --hintsfile=$base_dir/SCAFFOLD33/nasv_all.hints \ $base_dir/SCAFFOLD33/nasv_genome.fa \ > $base_dir/SCAFFOLD33/augustus.gff >>>>> ll_augjob \ --base_dir=$base_dir \ --species=nasonia_vitripennis \ --extrinsicCfgFile=$aug_dir/config/augrun.cfg \ --hintsfile=SCAFFOLD33/nasv_all.hints \ --genome SCAFFOLD33/nasv_genome.fa \ --output SCAFFOLD33/augustus.gff