#!/usr/local/bin/perl # sfsubsearch.pl # # form variable names & values # liblist: genbank gbnew pir swissprot swissnew # query1 : words # class1 : AllText, ID, Accession, Date, Definition, Keywords, # Organism, Authors, Title, Reference, Comment, Features, SeqLength # or1 : or and butnot # query2 : words # class2 : like class 1 # searchapp: fasta tfasta # ktup : number # seqtype: nucleic protein # inputseq: text block # seqname: words # ### Server specific vars $maxload = '2'; # 2, whatever, system load level $maxlib = '100000'; # max subset library entries (for test service) $tmpdir= "/tmp/"; # this temp must lie in Gopher kingdom (below $gopath) $SRSROOT = "/b4/srs"; #$SRSROOT = "/b6/tmp/srst"; $subpath = "$SRSROOT/tmp/"; $subsetlib = "srslib$$"; $logfile = "/usr/tmp/srsfasta.log"; $sysloadaverage = "/usr/ucb/uptime"; #$seqsearchpath = 'nice /b4/work/fasta/bin/'; $seqsearchpath = 'nice /b4/srsfasta/bin/'; $SRSserver = "http://iubio.bio.indiana.edu:81/srs/srsc"; $srsIdLinkFmt = "%s"; $libname= "genbank"; # genbank gbnew pir swissprot swissnew ### Data-specific vars $title = "SRS-FastA Sequence Subset Search"; #$title = "GenBank Subset Search"; #$title = "Sequence Databank Subset Similarity Search"; ### static vars $p = "'"; $q = '"'; $dohtml= 1; $goplus = 0; $isdialog= 1; $or1='or'; $srsq= ""; ### derived vars $seqfile = $tmpdir . "gbsub$$.seq"; $subsetoutput = $tmpdir . "gbsub$$.list"; ### external subroutines require "getopts.pl"; require "ctime.pl"; ## ## MAIN ## ### read command-line &Getopts('dhv:'); if ($opt_d) { $isdialog = 1; } if ($ENV{'CONTENT_TYPE'} ne "") { $opt_v= $ENV{'CONTENT_TYPE'};} if ($ENV{'CONTENT_LENGTH'} ne "") { $conlen= $ENV{'CONTENT_LENGTH'};} if ($opt_v eq "application/gopher+-menu") { $goplus= 1; } if ($opt_v eq "text/html") { $dohtml= 1; } if ($0 =~ "html" || $opt_h) { $dohtml = 3; } if ($dohtml) { print "Content-type: text/html\n\n
\n";
  #print "Content-type: text/plain\n\n";

  if ($isdialog) {
    ## all parameters are on commandline w/ switch names
    ## !! POST method puts them in STDIN
    if ($#ARGV>-1) { $inputs= join(' ',@ARGV); }
    else { $inputs= ""; }

    if ($conlen) {
       # ! for n-httpd
       #print "reading $conlen bytes\n";
	     read (STDIN, $inputs, $conlen);
       @parms= split(/[ &]/, $inputs);
	    }
    else {
      while () { 
       chop($_); 
       if ($_ ne "") { $inputs= join(' ',$inputs,$_); }
       }
     @parms= split(/[ ]/, $inputs);
     }
    #print "

params= ",join(' ',@parms),"

\n"; foreach (@parms) { if (/=/) { ($name,$data)= split(/=/); if ($data ne "") { $data= &unescapeUrl( $data); if ($flds{$name} ne "") { $s= " "; } else { $s = ""; } $flds{$name} .= $s . $data; } } ##elsif ($_ ne "") { $flds{$name} .= $_ . " "; } } $liblist = $flds{"liblist"}; $query1 = $flds{"query1"}; $class1 = $flds{"class1"}; $or1 = $flds{"or1"}; $query2 = $flds{"query2"}; $class2 = $flds{"class2"}; $sublib= $flds{"sublib"}; # gbsectionsearch $searchapp= $flds{"searchapp"}; $ktup = $flds{"ktup"}; $seqtype = $flds{"seqtype"}; $seqname = $flds{"seqname"}; $inputseq = $flds{"inputseq"}; } else { #$query= join(' ',@ARGV); die "Must use dialog entry format"; } } else { if ($isdialog) { #chop( $query = ); die "Gopher+ dialog format not ready"; } else { #$query= join(' ',@ARGV); die "Must use dialog entry format"; } } if ( length($inputseq) < 1) { print "No input sequence.\n"; die; } if ( length($liblist) + length( $sublib) < 1) { print "No libraries to search.\n"; die; }; # # Check system load & refuse if too high... # &CheckLoad(); if ($sublib) { $subsetlib= $sublib; print <$title Databank Section: $sublib Sequence name: $seqname Similarity search function: $searchapp TEOF ; if (open(LOG,">> $logfile")) { chop( $date = &ctime(time)); $caller= $ENV{"REMOTE_HOST"}; print LOG "$date\t$caller\t$sublib\t-9\n"; close(LOG); } goto SEQSEARCH; } $srsq= &BuildQuery( $liblist, $query1, $or1, $query2); if (! $inputseq) { die "No input sequence." } if (! $srsq) { die "No subset query." } $slen= length($inputseq); # # Do analysis # $_= $srsq; s/\\//g; ## strip \ for display print <$title Subset selection query: $_ Sequence name: $seqname Similarity search function: $searchapp TEOF ; # call srs/getz w/ -fse subset list option &SRSQuery( $srsq, $subsetlib, $subsetoutput); ## ?? Test $subsetoutput for line count - is it too big to allow on exp. server? ($libcount, $rest) = split(' ', `wc $subsetoutput`); if (open(LOG,">> $logfile")) { chop( $date = &ctime(time)); $caller= $ENV{"REMOTE_HOST"}; print LOG "$date\t$caller\t$srsq\t$libcount\n"; close(LOG); } if ($libcount > $maxlib) { print <$seqfile"); print SEQ ">$seqname\n"; print SEQ "$inputseq\n"; close(SEQ); # $seqtype -- do what with it? # call (t)fasta search app $ENV{'LIBTYPE'} = "12"; # select subset libtype $fopts = "-Q -h"; $cmd= "$seqsearchpath$searchapp $fopts $seqfile $p$subpath$subsetlib 12$p $ktup"; #print "$cmd\n"; @searchresult = `$cmd`; # # Output results # if (!$dohtml) { print @searchresult; } else { # do href markup of searchresult to link sequence matches to databanks $bestmark= 0; foreach (@searchresult) { if ($bestmark < 1) { if (/The best scores are:/) { $bestmark= 1; print "


\n"; } print; } elsif ($bestmark == 1) { if (/^\w/) { ($name, $rest) = split(/ /,$_,2); ($lib,$id) = split(/:/,$name,2); if (!$lib) { $lib= $libname; $id= $name; } $href= sprintf("$srsIdLinkFmt ", $lib, $id, $name); print $href, $rest; $links{$name} = $href; } else { $bestmark= 2; print; } } else { if (/^\w+\:\w+ /) { ($name, $rest) = split(/ /,$_,2); $href= $links{$name}; if ($href) { print "
", $href, $rest; } else { print;} } else { print;} } } } # optionally provide $subsetoutput if ($dohtml) { print "\n
\n"; } unlink($seqfile); unlink($subsetoutput); unlink("$subpath$subsetlib"); exit(0); #------------------------------- sub unescapeUrl { ## convert "%7Cbob%3c+%5Cjoe%3a" to "|bob< \joe:"; local($_) = @_; s/\+/ /g; # undo space-escape before hexes s/\%(..)/pack(H2,$1)/eg; $_; } sub CheckLoad { $_ = `$sysloadaverage`; if (/load average: ([\d\.]+), ([\d\.]+), ([\d\.]+)/) { $load1= $1; $load5= $2; $load15= $3; ##print "sys loads: $load1 -- $load5 -- $load15 \n"; if ($load1 > $maxload) { print < $outfile\n"); system("set noglob; nice $SRSEXE/$srsapp -fse $subsetlib $params > $outfile"); } # try wgetz instead of getz... this is input form for wgetz - # libList=GENBANK&retrieveFieldName1=Organism&retrieveStr1=drosophilidae # &retrieveFieldName2=AllText&retrieveStr2=esterase&retrieveFieldName3=AllText # &retrieveStr3= # &retrieveFieldName4=AllText&retrieveStr4=&makeWild=on&operator=AND # &userId%7C%7CdoQuery=810692647_129.79.18.11%7C%7Cyes&seqFormat=PIR # &entryType=entries&listEntriesChunkSize=50&viewEntriesChunkSize=10