#!/usr/local/bin/perl # sfsubsearch.pl # # form variable names & values # liblist: genbank gbnew pir swissprot swissnew # query1 : words # class1 : AllText, ID, Accession, Date, Definition, Keywords, # Organism, Authors, Title, Reference, Comment, Features, SeqLength # or1 : or and butnot # query2 : words # class2 : like class 1 # searchapp: fasta tfasta # ktup : number # seqtype: nucleic protein # inputseq: text block # seqname: words # ### Server specific vars $maxload = '2'; # 2, whatever, system load level $maxlib = '100000'; # max subset library entries (for test service) $tmpdir= "/tmp/"; # this temp must lie in Gopher kingdom (below $gopath) $SRSROOT = "/b4/srs"; #$SRSROOT = "/b6/tmp/srst"; $subpath = "$SRSROOT/tmp/"; $subsetlib = "srslib$$"; $logfile = "/usr/tmp/srsfasta.log"; $sysloadaverage = "/usr/ucb/uptime"; #$seqsearchpath = 'nice /b4/work/fasta/bin/'; $seqsearchpath = 'nice /b4/srsfasta/bin/'; $SRSserver = "http://iubio.bio.indiana.edu:81/srs/srsc"; $srsIdLinkFmt = "%s"; $libname= "genbank"; # genbank gbnew pir swissprot swissnew ### Data-specific vars $title = "SRS-FastA Sequence Subset Search"; #$title = "GenBank Subset Search"; #$title = "Sequence Databank Subset Similarity Search"; ### static vars $p = "'"; $q = '"'; $dohtml= 1; $goplus = 0; $isdialog= 1; $or1='or'; $srsq= ""; ### derived vars $seqfile = $tmpdir . "gbsub$$.seq"; $subsetoutput = $tmpdir . "gbsub$$.list"; ### external subroutines require "getopts.pl"; require "ctime.pl"; ## ## MAIN ## ### read command-line &Getopts('dhv:'); if ($opt_d) { $isdialog = 1; } if ($ENV{'CONTENT_TYPE'} ne "") { $opt_v= $ENV{'CONTENT_TYPE'};} if ($ENV{'CONTENT_LENGTH'} ne "") { $conlen= $ENV{'CONTENT_LENGTH'};} if ($opt_v eq "application/gopher+-menu") { $goplus= 1; } if ($opt_v eq "text/html") { $dohtml= 1; } if ($0 =~ "html" || $opt_h) { $dohtml = 3; } if ($dohtml) { print "Content-type: text/html\n\n
\n";
#print "Content-type: text/plain\n\n";
if ($isdialog) {
## all parameters are on commandline w/ switch names
## !! POST method puts them in STDIN
if ($#ARGV>-1) { $inputs= join(' ',@ARGV); }
else { $inputs= ""; }
if ($conlen) {
# ! for n-httpd
#print "reading $conlen bytes\n";
read (STDIN, $inputs, $conlen);
@parms= split(/[ &]/, $inputs);
}
else {
while () {
chop($_);
if ($_ ne "") { $inputs= join(' ',$inputs,$_); }
}
@parms= split(/[ ]/, $inputs);
}
#print "params= ",join(' ',@parms),"
\n";
foreach (@parms) {
if (/=/) {
($name,$data)= split(/=/);
if ($data ne "") {
$data= &unescapeUrl( $data);
if ($flds{$name} ne "") { $s= " "; } else { $s = ""; }
$flds{$name} .= $s . $data;
}
}
##elsif ($_ ne "") { $flds{$name} .= $_ . " "; }
}
$liblist = $flds{"liblist"};
$query1 = $flds{"query1"};
$class1 = $flds{"class1"};
$or1 = $flds{"or1"};
$query2 = $flds{"query2"};
$class2 = $flds{"class2"};
$sublib= $flds{"sublib"}; # gbsectionsearch
$searchapp= $flds{"searchapp"};
$ktup = $flds{"ktup"};
$seqtype = $flds{"seqtype"};
$seqname = $flds{"seqname"};
$inputseq = $flds{"inputseq"};
}
else {
#$query= join(' ',@ARGV);
die "Must use dialog entry format";
}
}
else {
if ($isdialog) {
#chop( $query = );
die "Gopher+ dialog format not ready";
}
else {
#$query= join(' ',@ARGV);
die "Must use dialog entry format";
}
}
if ( length($inputseq) < 1) { print "No input sequence.\n"; die; }
if ( length($liblist) + length( $sublib) < 1) { print "No libraries to search.\n"; die; };
#
# Check system load & refuse if too high...
#
&CheckLoad();
if ($sublib) {
$subsetlib= $sublib;
print <$title
Databank Section:
$sublib
Sequence name:
$seqname
Similarity search function:
$searchapp
TEOF
;
if (open(LOG,">> $logfile")) {
chop( $date = &ctime(time));
$caller= $ENV{"REMOTE_HOST"};
print LOG "$date\t$caller\t$sublib\t-9\n";
close(LOG);
}
goto SEQSEARCH;
}
$srsq= &BuildQuery( $liblist, $query1, $or1, $query2);
if (! $inputseq) { die "No input sequence." }
if (! $srsq) { die "No subset query." }
$slen= length($inputseq);
#
# Do analysis
#
$_= $srsq; s/\\//g; ## strip \ for display
print <$title
Subset selection query:
$_
Sequence name:
$seqname
Similarity search function:
$searchapp
TEOF
;
# call srs/getz w/ -fse subset list option
&SRSQuery( $srsq, $subsetlib, $subsetoutput);
## ?? Test $subsetoutput for line count - is it too big to allow on exp. server?
($libcount, $rest) = split(' ', `wc $subsetoutput`);
if (open(LOG,">> $logfile")) {
chop( $date = &ctime(time));
$caller= $ENV{"REMOTE_HOST"};
print LOG "$date\t$caller\t$srsq\t$libcount\n";
close(LOG);
}
if ($libcount > $maxlib) {
print <$seqfile");
print SEQ ">$seqname\n";
print SEQ "$inputseq\n";
close(SEQ);
# $seqtype -- do what with it?
# call (t)fasta search app
$ENV{'LIBTYPE'} = "12"; # select subset libtype
$fopts = "-Q -h";
$cmd= "$seqsearchpath$searchapp $fopts $seqfile $p$subpath$subsetlib 12$p $ktup";
#print "$cmd\n";
@searchresult = `$cmd`;
#
# Output results
#
if (!$dohtml) {
print @searchresult;
}
else {
# do href markup of searchresult to link sequence matches to databanks
$bestmark= 0;
foreach (@searchresult) {
if ($bestmark < 1) {
if (/The best scores are:/) { $bestmark= 1; print "
\n"; }
print;
}
elsif ($bestmark == 1) {
if (/^\w/) {
($name, $rest) = split(/ /,$_,2);
($lib,$id) = split(/:/,$name,2);
if (!$lib) { $lib= $libname; $id= $name; }
$href= sprintf("$srsIdLinkFmt ", $lib, $id, $name);
print $href, $rest;
$links{$name} = $href;
}
else { $bestmark= 2; print; }
}
else {
if (/^\w+\:\w+ /) {
($name, $rest) = split(/ /,$_,2);
$href= $links{$name};
if ($href) { print "
", $href, $rest; }
else { print;}
}
else { print;}
}
}
}
# optionally provide $subsetoutput
if ($dohtml) { print "\n
\n"; }
unlink($seqfile);
unlink($subsetoutput);
unlink("$subpath$subsetlib");
exit(0);
#-------------------------------
sub unescapeUrl
{
## convert "%7Cbob%3c+%5Cjoe%3a" to "|bob< \joe:";
local($_) = @_;
s/\+/ /g; # undo space-escape before hexes
s/\%(..)/pack(H2,$1)/eg;
$_;
}
sub CheckLoad
{
$_ = `$sysloadaverage`;
if (/load average: ([\d\.]+), ([\d\.]+), ([\d\.]+)/) {
$load1= $1; $load5= $2; $load15= $3;
##print "sys loads: $load1 -- $load5 -- $load15 \n";
if ($load1 > $maxload) {
print <