#!/usr/local/bin/perl # sfsectionlib.pl # # generate subsection libs for use w/ fasta # ### Server specific vars $tmpdir= "/tmp/"; # this temp must lie in Gopher kingdom (below $gopath) $SRSROOT = "/b4/srs"; ## path to SRS software #$SRSROOT = "/b6/tmp/srst"; $subpath = "$SRSROOT/tmp/"; # path to where .indices & .names files are $subsetlib = "srslib$$"; # temp name # # SRS Databanks to use # @nadatabanks=('genbank','gbnew', 'gbest'); $nadatalabel= "GenBank, Genbank EST & GenBank updates"; #@nadatabanks=('genbank'); ## add 'gbest' #$nadatalabel= "GenBank"; @aadatabanks=('pir','swissprot', 'swissnew'); $aadatalabel= "PIR, SwissProt & SwissProt updates"; # # SRS queries to generate predefined subset libraries # # format is 'data-field=srs-query=index-file-name=description' @queries = (); push(@queries, 'org=arabidopsis=arabidopsis'); push(@queries, 'org=caenorhabditis=caenorhabditis'); push(@queries, 'org=drosophila=drosophila=fruit fly'); #push(@queries, 'org=homo sapiens=human=human'); ## too big! ~2/3 of genbank push(@queries, 'org=oryza=oryza=rice'); push(@queries, 'org=poaceae=poaceae=wheat,corn,rice and related'); push(@queries, 'org=rattus=rattus=rat'); push(@queries, 'org=murinae=murinae=rat,mouse and related'); push(@queries, 'org=saccharomyces=saccharomyces=yeast'); ### external subroutines #require "getopts.pl"; require "ctime.pl"; ### static vars $p = "'"; $q = '"'; $doqueries = 0; $isamino= 0; $progname= 'sfsectionlib.pl'; $wc = 'wc'; ## local wordcount function # write counts for html dialog in this form: # 6415 drosophila $outfmt= "%7d %-15s %s\n"; $tabletop = "
\nentries section from $datalabel\n"; $tableend = "\n"; ## ## MAIN ## ### read command-line ##&Getopts('ah:q'); --DAMN THIS IS BOMBING w/ Perl5 ! ! ! $nocommand= 1; foreach $i ( 0 .. $#ARGV) { $_= $ARGV[$i]; if (/^-a/) { $opt_a= 1; $nocommand= 0; } elsif (/^-h/) { $opt_h= $ARGV[$i+1]; $i++; $nocommand= 0; } elsif (/^-q/) { $opt_q= 1; $nocommand= 0; } } if ($opt_q) { $doqueries = 1; } if ($opt_a) { $isamino= 1; } &usage() if $nocommand; ####### # # Do analysis # if ($isamino) { @databanks= @aadatabanks; $datalabel = $aadatalabel; $tabsec= "aa"; } else { @databanks= @nadatabanks; $datalabel = $nadatalabel; $tabsec= "na"; } if ($doqueries) { # ! 1st remove old indices, as getz -fse will APPEND to any current indices system( "/bin/rm $subpath/*-$tabsec.indices"); system( "/bin/rm $subpath/*-$tabsec.names"); } @htable= (); foreach $qry (@queries) { $srsq= ''; $desc=''; $filen=''; $query=''; $field=''; ($field,$query,$filen, $desc) = split(/=/,$qry); $srsq = "-l $p" . join(' ',@databanks) . "$p [SQ-$field:$query*]"; $subsetlib= $filen . '-' . $tabsec; if ($doqueries) { &SRSQuery( $srsq, $subsetlib, "/dev/null"); } $subfile= "$subpath$subsetlib.indices"; $indexcount = `$wc $subfile`; if ($indexcount > 0) { push(@htable, sprintf( $outfmt, $indexcount, $filen, $subsetlib, $desc)); } } ($def,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($subfile); $subfiletime= &ctime($mtime); if ($opt_h) { #print STDERR "edit file $opt_h\n"; $htemp = "$tmpdir/hf$$"; open(HF, $opt_h) || die "Can't open $opt_h"; open(OUT, "> $htemp") || die "Can't create $htemp"; $doout= 1; while (
\nentries section from $datalabel \n";
print OUT @htable;
}
elsif (/<\!$tabsec-table-end>/) {
$doout= 1;
print OUT "Subset indices updated on $subfiletime";
print OUT $tableend, $_;
}
}
close(OUT);
close(HF);
system("/bin/mv $htemp $opt_h");
}
else {
print @htable;
}
exit(0);
#-------------------------------
sub usage
{
print < $outfile";
print("$cmd\n");
system("$cmd");
}
# try wgetz instead of getz... this is input form for wgetz -
# libList=GENBANK&retrieveFieldName1=Organism&retrieveStr1=drosophilidae
# &retrieveFieldName2=AllText&retrieveStr2=esterase&retrieveFieldName3=AllText
# &retrieveStr3=
# &retrieveFieldName4=AllText&retrieveStr4=&makeWild=on&operator=AND
# &userId%7C%7CdoQuery=810692647_129.79.18.11%7C%7Cyes&seqFormat=PIR
# &entryType=entries&listEntriesChunkSize=50&viewEntriesChunkSize=10