#!/usr/local/bin/perl # sfsectionlib.pl # # generate subsection libs for use w/ fasta # ### Server specific vars $tmpdir= "/tmp/"; # this temp must lie in Gopher kingdom (below $gopath) $SRSROOT = "/b4/srs"; ## path to SRS software #$SRSROOT = "/b6/tmp/srst"; $subpath = "$SRSROOT/tmp/"; # path to where .indices & .names files are $subsetlib = "srslib$$"; # temp name # # SRS Databanks to use # @nadatabanks=('genbank','gbnew', 'gbest'); $nadatalabel= "GenBank, Genbank EST & GenBank updates"; #@nadatabanks=('genbank'); ## add 'gbest' #$nadatalabel= "GenBank"; @aadatabanks=('pir','swissprot', 'swissnew'); $aadatalabel= "PIR, SwissProt & SwissProt updates"; # # SRS queries to generate predefined subset libraries # # format is 'data-field=srs-query=index-file-name=description' @queries = (); push(@queries, 'org=arabidopsis=arabidopsis'); push(@queries, 'org=caenorhabditis=caenorhabditis'); push(@queries, 'org=drosophila=drosophila=fruit fly'); #push(@queries, 'org=homo sapiens=human=human'); ## too big! ~2/3 of genbank push(@queries, 'org=oryza=oryza=rice'); push(@queries, 'org=poaceae=poaceae=wheat,corn,rice and related'); push(@queries, 'org=rattus=rattus=rat'); push(@queries, 'org=murinae=murinae=rat,mouse and related'); push(@queries, 'org=saccharomyces=saccharomyces=yeast'); ### external subroutines #require "getopts.pl"; require "ctime.pl"; ### static vars $p = "'"; $q = '"'; $doqueries = 0; $isamino= 0; $progname= 'sfsectionlib.pl'; $wc = 'wc'; ## local wordcount function # write counts for html dialog in this form: # 6415 drosophila $outfmt= "%7d %-15s %s\n"; $tabletop = "
\nentries  section from $datalabel\n";
$tableend = "
\n"; ## ## MAIN ## ### read command-line ##&Getopts('ah:q'); --DAMN THIS IS BOMBING w/ Perl5 ! ! ! $nocommand= 1; foreach $i ( 0 .. $#ARGV) { $_= $ARGV[$i]; if (/^-a/) { $opt_a= 1; $nocommand= 0; } elsif (/^-h/) { $opt_h= $ARGV[$i+1]; $i++; $nocommand= 0; } elsif (/^-q/) { $opt_q= 1; $nocommand= 0; } } if ($opt_q) { $doqueries = 1; } if ($opt_a) { $isamino= 1; } &usage() if $nocommand; ####### # # Do analysis # if ($isamino) { @databanks= @aadatabanks; $datalabel = $aadatalabel; $tabsec= "aa"; } else { @databanks= @nadatabanks; $datalabel = $nadatalabel; $tabsec= "na"; } if ($doqueries) { # ! 1st remove old indices, as getz -fse will APPEND to any current indices system( "/bin/rm $subpath/*-$tabsec.indices"); system( "/bin/rm $subpath/*-$tabsec.names"); } @htable= (); foreach $qry (@queries) { $srsq= ''; $desc=''; $filen=''; $query=''; $field=''; ($field,$query,$filen, $desc) = split(/=/,$qry); $srsq = "-l $p" . join(' ',@databanks) . "$p [SQ-$field:$query*]"; $subsetlib= $filen . '-' . $tabsec; if ($doqueries) { &SRSQuery( $srsq, $subsetlib, "/dev/null"); } $subfile= "$subpath$subsetlib.indices"; $indexcount = `$wc $subfile`; if ($indexcount > 0) { push(@htable, sprintf( $outfmt, $indexcount, $filen, $subsetlib, $desc)); } } ($def,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($subfile); $subfiletime= &ctime($mtime); if ($opt_h) { #print STDERR "edit file $opt_h\n"; $htemp = "$tmpdir/hf$$"; open(HF, $opt_h) || die "Can't open $opt_h"; open(OUT, "> $htemp") || die "Can't create $htemp"; $doout= 1; while () { #print STDERR $_; #debug if ($doout) { print OUT $_; } if (/<\!$tabsec-table-start>/) { $doout= 0; ##print OUT $tabletop, @htable; print OUT "
\nentries  section from $datalabel \n";
		print OUT @htable;
		}

		elsif (/<\!$tabsec-table-end>/) { 
		$doout= 1; 
		print OUT "Subset indices updated on $subfiletime";
	  	print OUT $tableend, $_; 
	  	}
		}
	close(OUT);
	close(HF);
	system("/bin/mv $htemp $opt_h");
	}
else {
	print @htable;
	}


exit(0);

#-------------------------------

sub usage
{
	print <