/* fffasta.c 12-Feb-1984, 11-Mar-1985, 14-Oct-1985 copyright (c) 1985,1986,1987,1988,1991,1992 William R. Pearson */ /* Sept-1991 release 1.6 uses W. Miller l_band, g_band for optimization 25-Sept-1991 fixed RLOCAL_ALIGN() call in zzlgmata.c 14-Mar-87 added BIGMEM for large memory machines Mar-87 added TFASTA for translation of DNA 1-Jun-87 added LFASTA for lfasta generation from same file 3-Jun-87 added SFASTA for sfasta generation from same file 28-Jun-87 made modifications for speed a la Warren Gish changed name to flfasta.c 24-Aug-87 no more modulus counting in dhash(), 2X as fast on the SUN, now MAXDIAG = MAXTST + MAXLIB; (ALLOCN0 for modulus counting in LFASTA) 19-Oct-87 modified behavior for nbest > MAXBEST, now resorts nbest and throws out bottom 25% also modifying bestcut in the process 12-Nov-87 Added automatic detection of protein/DNA sequences 28-Feb-88 added getopt for options 21-Mar-88 added PAM scores for kfact 25-Mar-88 flag -k for using PAM scores for kfact 30-Mar-88 added menu for libraries 30-Mar-88 combine fasta/fastgb, tfasta/tfastgb 19-May-88 added options for fasta mail processing -Q - quiet, does not ask for any input with -Q, has heuristic for number of scores to display, that number will be less than mshow, which is set with the -o option. 4-Feb-89 modified libchoice for a variety of different library formats - no more automatic library setting 20-Nov-89 1.3a fixed bug in pamfact that prevented -k option from working by default with DNA 5-Jun-90 added optall option 3-Sept-90 use select() instead of sortbest() allow individual library letters to be concatenated 13-Dec-90 fixed bug in select() due to lack of sentinel. 23-Jan-91 fixed bug in aatran() for short sequences 27-Jan-91 made certain than showbest() is called with nbest>0. 20-May-91 fixed ashow to reflect nshow 7-Dec-92 added -h option to suppress histogram 13-Dec-92 added -e option to scale scores by length 26-Aug-95 added -O option to provide filename for output */ /* fastn is a derivative of dfastp for DNA sequences fastn is designed to search a DNA sequence database very rapidly. It first looks for diagonals with homology using a hashing algorithm. General structure: Read in the test DNA sequence, build a hash table using a hash length of ktup. DNA bases will be given values between 0 and 3 (2 bits) and ktup of 1 to 6 will be allowed (hash table of 4 to 1024 entries, on larger machines, 4096 (ktup=6) would be acceptable). Start reading the library. Reading the library, looking up the hash table and accumulating diagonals will all be done in one loop. The max n diagonals will be identified and scanned with the PAM matrix for each library sequence. The max PAM score and diagonal positions are then saved for comparison against the whole library. After all sequences have been examined, a mean and s.d. are calculated and the max matches displayed. */ #include #include #include #include char *refstr="\nPlease cite:\n W.R. Pearson & D.J. Lipman PNAS (1988) 85:2444-2448\n"; #define TRUE 1 #define FALSE 0 #ifndef max(a,b) #define max(a,b) (((a)>(b))?(a):(b)) #define min(a,b) (((a)<(b))?(a):(b)) #endif #ifndef BIGMEM #define BIGNUM 32000 #ifdef TFASTA #define MAXTST 1000 /* longest test sequence */ #define MAXTRN 4000 /* MAXTRN must be (MAXTST*3+MAXLIB)/3 */ #define MAXLIB 8000 #define MAXDIAG (MAXTST+MAXTRN) #else #define MAXTST 2000 /* longest test sequence */ #define MAXLIB 8000 #define MAXDIAG (MAXTST+MAXLIB) #endif #else #define BIGNUM 1000000000 #define MAXTST 10000 #define MAXLIB 50000 #define MAXDIAG (MAXTST+MAXLIB) #ifdef TFASTA #define MAXTRN 30000 #endif #endif #ifdef LFASTA #define MAXSAV 250 #endif #ifndef MAXSAV #define MAXSAV 20 /* number of best diagonals saved */ #endif #define MAXHIST 51 /* number of histogram divisions */ #ifndef BIGMEM #define MAXBEST 5000 /* number of good matches remembered */ #else #define MAXBEST 20000 #endif FILE *outfd; /* fd for output file */ int smark[4]; FILE *tmpfd; char tmpfname[80]; int dataflg=0; int revflg = 0; char *compstr="\0"; int lnsflg=0; int optwid=32, optwidf=0; /* width for band optimization */ /* changed in initparm() */ /* globals for matching */ long lmark; /* position in library file from ftell() */ long nlib, onlib; long ntt, ontt; /* number of library sequences, number of residues scanned*/ #ifdef LFASTA int oneseq; #endif #define BINARYGB 9 /* this must agree with altlib.h */ #ifndef LFASTA extern int (*getlib)(), (*ranlib)(); extern int sfnum; #define GETLIB (*getlib) #define RANLIB (*ranlib) #else #define GETLIB getlib #define RANLIB ranlib #endif char libstr[81]; /* partial title from library sequence */ char name0[11], name1[11]; /* for labeling output */ int ixstat; /* >0 if annotations displayed */ #define MAXLF 200 /* number of library names */ #ifdef BIGMEM #define MAXLN 40 /* size of a library name */ #else #define MAXLN 20 #endif char *lbnarr; /* name array of libraries to be opened in list */ char *lbnames[MAXLF]; /* names of libraries to be opened */ int nln; /* number of library files */ #ifndef LFASTA int deftype=0; /* default library type */ #endif int libfn; /* current library file being searched */ char iname[80]; /* index file name */ char ldname[80]; char *aa0, *aa1; /* amino acid sequence data */ #ifdef TFASTA char *aa10; int nframe=6; #endif int maxn, maxt; /* max space for lib sequence */ int n0, n1, nd, noff; /* length of aa0, length of aa1, n0+n1, diagonal offset */ long sq0off=1, sq1off=1; long loffset = 0l; /* offset into sequence */ struct dstruct { /* diagonal structure for saving current run */ int score; /* hash score of current match */ int start; /* start of current match */ int stop; /* end of current match */ struct beststr *dmax; /* location in vmax[] where best score data saved */ } *diag; #ifdef I86BUG #define L2DSTR 3 /* log(2) of sizeof dstruct for I86 bug in many 'C' compilers */ #endif struct beststr { int score; /* pam score with segment optimization*/ int score0; /* pam score of best single segment */ int gscore; /* opt score */ int sscore; /* score used for sort */ float zscore; float escore; int n1; /* length of library sequence */ long lseek; /* position in library file */ int dp; /* diagonal of match */ int start; /* start of match in lib seq */ int stop; /* end of match in lib seq */ int cont; /* offset into sequence */ int frame; int lib; /* library for current sequence */ } #ifndef FAR_PTR *bbp, /* pointer for fbest */ *bestptr, /* temp pointer */ **bptr, /* array of pointers for sorting */ *best, /* array of best score data */ #else huge * bbp, huge * best, huge * bestptr, /* temp pointer */ huge * huge * bptr, #endif vmax[MAXSAV], /* best matches saved for one sequence */ *vptr[MAXSAV]; /* delete this: float *Evalue; */ #ifdef LFASTA int lcrc0[MAXSAV]; int lcrc1[MAXSAV]; int ncrc; #endif int iscore, gscore; /* for displaying scores without showbest */ int cgap; /* gap threshold */ int pgap; /* gap penalty for optimized alignment of diagonals */ int nbest; /* number of sequences better than bestcut in best */ int bestcut=1; /* cut off for getting into MAXBEST */ int optcut=0; /* cut off for optimization */ /* the following are defaults for values that are read by pam.c from *.mat if SMATRIX is defined */ int histint; int bestscale=300; /* values for calculating bestcut */ int bestoff=36; /* these values are for BLOSUM50 */ int bkfact=6; int scfact=3; int bktup=2; int ktmax=2; int bestmax=50; int pamfact= 1; /* flag for using pam values for kfact */ int dnaseq = 0; /* true if DNA query sequence */ int dnasw_flg = 0; int ldnaseq = 0; int bestfull=0; int igncnt=0; int optall=1; long optcount=0l; int init1flg=0; int nsav, lowscor; /* number of saved runs, worst saved run, score of worst saved run */ long *hist; /* histogram of all score */ int min_hist, max_hist, maxh; int histflg=1; int dohist=0; #ifndef LFASTA #ifndef TFASTA int zsflag=1; #else int zsflag=1; #endif extern long num_db_entries, z_calls; float zs_to_E(), zs_to_Ec(), find_zm(), find_z(); extern float ks_dev; extern int ks_df; #else int zsflag=0; float zs_to_E() {} float zs_to_Ec() {} long num_db_entries; #endif int fact, gm; /* scoring factors */ int hmask, hmax; /* hash constants */ int *pamh2; /* pam based kfact array */ int *link, *harr; /* hash arrays */ int ktup, kshft, kt1; /* ktuple constants */ int nshow=20, mshow=50, ashow= -1; int mshow_flg=0; float e_cut=10.0; /* threshold for E value display */ int e_cut_set=0; /* flag for set */ char rline[20],sline[20]; char resfile[40]; /* output options */ int showall, llen, markx; /* show all of both sequences */ char ttitle[60]; char ltitle[60]; long tstart, tscan, tdone, stime(); extern int optind; int optcnt; #ifndef LFASTA int outtty; #endif char *libenv, *aaenv, *smptr; char smstr[40], sdstr[40]; char flstr[40]; #ifdef TPLOT char lvstr[40]; #endif #include "upam.gbl" /* includes pam array */ main(argc, argv) int argc; char **argv; { char tname[40], lname[80], llname[80], qline[40]; int itemp, iln; char *getenv(), *cptr, *bp; #ifndef LFASTA #ifdef UNIX outtty=isatty(1); #else outtty=1; #endif #endif initenv(argc,argv); if (dataflg && (tmpfd=fopen(tmpfname,"w"))==NULL) { fprintf(stderr," cannot open temp file: %s\n",tmpfname); dataflg=0; } #ifdef TFASTA aainit(); dnaseq = -1; /* force to protein */ ldnaseq = 1; if (sqtype[0]=='D') { fprintf(stderr," tfasta compares a protein to a translated\n\ DNA sequence library. Do not use a DNA scoring matrix.\n"); exit(1); } #endif if ((aa0=calloc((size_t)MAXTST+MAXLIB,sizeof(char)))==0) { fprintf(stderr," cannot allocate sequence array\n"); exit(1); } maxn = MAXTST+MAXLIB; #ifdef TFASTA if ((aa1=calloc((size_t)MAXTRN,sizeof(char)))==0) { fprintf(stderr," cannot allocate translation array\n"); exit(1); } #endif if (argc-optind < 3) { #ifndef LFASTA if (!outtty) { fprintf(stderr," too few command line arguments with -q\n"); exit(1); } #endif #ifdef LFASTA printf(" lfasta 2.0x3 [Aug, 1995] compares two sequences\n%s\n",refstr); l1: printf(" first sequence file name: "); #else #ifdef TFASTA printf(" tfasta 2.0x3 [Aug, 1995] translates and searches a sequence data bank\n%s\n",refstr); l1: printf(" test sequence file name: "); #else printf(" fasta 2.0x3 [Aug, 1995] searches a sequence data bank\n%s\n",refstr); l1: printf(" test sequence file name: "); #endif #endif fflush(stdout); if (fgets(tname,40,stdin)==NULL) exit(0); if (tname[strlen(tname)-1]=='\n') tname[strlen(tname)-1]='\0'; if (tname[0]=='\0') goto l1; if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) { fprintf(stderr," %s : %s sequence not found\n",tname,sqtype); goto l1; } resetp(dnaseq); if (revflg) { if (sqtype[0]=='D') revcomp(aa0,n0); else { fprintf(stderr," can only reverse complement DNA\n"); compstr="\0"; revflg = 0; } } #ifndef TFASTA if (dnaseq==1 && n0>(MAXTST+MAXLIB)/3) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } else if (dnaseq==0 && n0>(MAXTST+MAXLIB)/2) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } #else if (n0 > MAXTST) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } #endif #ifdef LFASTA l2: fprintf(stderr," second sequence file name: "); fflush(stdout); if (fgets(lname,40,stdin)==NULL) exit(0); if (lname[strlen(lname)-1]=='\n') lname[strlen(lname)-1]='\0'; if (*lname==0) goto l2; addfile(lname,""); #else libchoice(lname,sizeof(lname),aaenv); libselect(lname); #endif fprintf(stderr," ktup? (1 to %d) [%d] ",ktmax,ktmax); if (fgets(qline,40,stdin)==NULL) exit(0); ktup = ktmax; if (qline[0]!='\0' && qline[0]!='\n') { sscanf(qline,"%d",&ktup); if (ktup < 1 || ktup>ktmax ) { printf(" warning ktup = %d out of range, reset to %d\n",ktup,ktmax); ktup = ktmax; } } #ifndef LFASTA fprintf(stderr," use optimized scores? [%s]: ",(optall?"yes":"no")); if (fgets(qline,40,stdin)==NULL) exit(0); if (optall==1 && (*qline=='n' || *qline=='N')) optall = 0; if (optall==0 && (*qline=='y' || *qline=='Y')) optall = 1; #endif } else { strncpy(tname,argv[optind+1],sizeof(tname)); if ((n0=getseq(tname,aa0,maxn,&dnaseq))==0) { fprintf(stderr," %s : %s sequence not found\n",tname,sqtype); exit(1); } resetp(dnaseq); if (revflg) { if (dnaseq) revcomp(aa0,n0); else { fprintf(stderr," cannot reverse complement protein sequence\n"); compstr="\0"; revflg = 0; } } #ifndef TFASTA if (dnaseq==1 && n0>(MAXTST+MAXLIB)/3) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } else if (dnaseq==0 && n0>(MAXTST+MAXLIB)/2) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } #else if (n0 > MAXTST) { fprintf(stderr," query sequence is too long %d %s\n",n0,sqnam); exit(1); } #endif strncpy(lname,argv[optind+2],sizeof(lname)); #ifndef LFASTA libselect(lname); #else addfile(lname,""); #endif if (argc-optind==4) sscanf(argv[optind+3],"%d",&ktup); else ktup=ktmax; if (ktup < 1 || ktup>ktmax ) { fprintf(stderr," warning ktup = %d out of range, reset to %d\n", ktup,ktmax); ktup = ktmax; } } #ifndef LFASTA if (!outtty) fprintf(stdout," %s%s : %4d %-s\n",tname, compstr, n0, sqnam); #endif gettitle(tname,ttitle,50); if (strlen(ttitle)>0) if (*ttitle=='>') strncpy(name0,&ttitle[1],6); else strncpy(name0,ttitle,6); else strncpy(name0,tname,6); name0[6]='\0'; if (revflg) name0[5]='-'; #ifdef LFASTA if (strcmp(tname,lname)==0) oneseq=1; #else if (strlen(ttitle)>0) printf(" %s%s: %d %s\n vs %s library\n",ttitle,compstr,n0,sqnam,ltitle); else printf(" %s%s: %d %s vs %s library\n",tname,compstr,n0,sqnam,ltitle); if (dataflg) { if (strlen(ttitle)>0) fprintf(tmpfd,"; %s%s: %d %s\n; vs %s library\n", ttitle,compstr,n0,sqnam,ltitle); else fprintf(tmpfd,"; %s%s: %d %s vs %s library\n", tname,compstr,n0,sqnam,ltitle); } #endif #ifndef TFASTA aa1 = aa0 + n0 + 2; #else aa10 = aa0 + n0 + 2; #endif maxn -= n0 + 3; initpam2(); /* convert 1-d pam to 2-d pam2 */ initparm(); if (dataflg) { fprintf(tmpfd,"; ktup = %d\n",ktup); fprintf(tmpfd,"; cutoff = %d; optcut = %d; ggapval %d gdelval %d cgap %d\n", bestcut,optcut,ggapval,gdelval,cgap); if (lnsflg) fprintf(tmpfd,"; ln scaling"); } tstart = stime(); fact = ktup*scfact; hashaa(aa0,n0,ktup); /* hash test sequence */ #ifndef ALLOCN0 allocdiag(MAXDIAG); #else allocdiag(n0); #endif /* inithist(); */ /* initialize histogram, mean, sd */ initbest(MAXBEST+1); /* +1 required for select() */ for (nbest=0; nbest0) { printf(" searching %s library\n",lbnames[iln]); dhash(); } if (itemp== -9) { printf(" %8ld %s in %5ld sequences\n",ntt-ontt, sqnam,nlib-onlib); ontt=ntt; onlib=nlib; continue; } if (itemp<0) break; #ifndef LFASTA closelib(); #endif } tscan = stime(); #ifndef LFASTA if (!dohist) { if (nbest < 20) { zsflag = 0; histflg = 0; } else process_hist(n0,bptr,nbest); } if (dataflg) { fprintf(tmpfd,"; %8ld %s in %5ld sequences; scan time: ", ntt,sqnam,nlib); ptime(tmpfd,tscan-tstart); fputs("\n",tmpfd); if (optall) fprintf(tmpfd,"; %d optimizations performed\n",optcount); } prhist(stdout); /* print histogram, statistics */ #endif free(diag); freehash(); #ifndef LFASTA outfd = stdout; l3: if (outtty) {printf(" Enter filename for results : "); fflush(stdout);} rline[0]='\0'; if (outtty && fgets(rline,20,stdin)==NULL) exit(0); if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0'; if (rline[0]!='\0') strncpy(resfile,rline,sizeof(resfile)); if (resfile[0]!='\0') { if ((outfd=fopen(resfile,"w"))==0) { printf(" could not open %s\n",resfile); goto l3; } fprintf(outfd," %s%s, %d %s vs %s library\n", tname, compstr, n0, sqnam, lname); prhist(outfd); } #else #ifdef TPLOT outfd = stderr; #else outfd = stdout; #endif /* LFASTA */ fprintf(outfd," Comparison of:\n(A) %-10s %-50s%s - %d %s\n", tname,ttitle,compstr,n0,sqnam); fprintf(outfd,"(B) %-10s %-50s - %ld %s\n",lname,ltitle,ntt,sqnam); if (strlen(smptr)>0) fprintf(outfd," using matrix file %s\n",smptr); else fprintf(outfd," using %s matrix\n",sqtype); if (nbest<=0) { fprintf(outfd, " No similar regions with scores greater %d than found\n",optcut); exit(0);} #endif if (zsflag) { sortbestz(); for (itemp=0; itempescore = zs_to_E(bptr[itemp]->zscore,bptr[itemp]->n1); #else bptr[itemp]->escore = zs_to_E(bptr[itemp]->zscore,bptr[itemp]->n1); #endif } sortbeste(); } else sortbest(); #ifdef LFASTA nshow = nbest; #ifdef TPLOT openplt((long)n0,(long)ntt); if (oneseq) drawdiag((long)n0,(long)ntt); showlocal(nshow); closeplt(); #else showlocal(nshow); #endif #else /* !LFASTA */ if (nbest <= 0) { fprintf(outfd," no sequences with scores greater than %d found\n",bestcut); if (outfd != stdout) fprintf(outfd," no sequences with scores greater than %d found\n",bestcut); exit(0); } showbest(); /* display best matches */ rline[0]='Y'; if (outtty) { printf(" Display alignments also? "); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); } if (toupper(rline[0])=='Y') { if (outtty) { printf(" number of alignments [%d]? ",nshow); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); if (rline[0]!=0) sscanf(rline,"%d",&nshow); ashow=nshow; } showalign(nshow); } tdone = stime(); printf("Library scan: "); ptime(stdout,tscan-tstart); printf(" total CPU time: "); ptime(stdout,tdone-tstart); printf("\n"); if (outfd!=stdout) { fprintf(outfd,"Library scan: "); ptime(outfd,tscan-tstart); fprintf(outfd," total CPU time: "); ptime(outfd,tdone-tstart); fprintf(outfd,"\n"); } #endif exit(0); } extern int *sascii, nascii[], aascii[]; initenv(argc,argv) int argc; char **argv; { char *cptr, *getenv(); int copt, getopt(); extern char *optarg; int i; libenv="\0"; if ((aaenv=getenv("AABANK"))==NULL) aaenv="\0"; if ((cptr=getenv("FASTLIBS"))!=NULL) strncpy(flstr,cptr,sizeof(flstr)); else flstr[0]='\0'; sascii = aascii; strncpy(sdstr,"BLOSUM50",sizeof(sdstr)); smptr = sdstr; pam = abl50; sq = aa; hsq = haa; nsq = naa; dnaseq = 0; ldnaseq = 0; #ifdef TFASTA gdelval = -16; ggapval = -4; #endif showall = 0; if ((cptr=getenv("SHOWALL"))!=NULL) if (sscanf(cptr,"%d",&showall)!=1) showall = 1; if ((cptr=getenv("LINLEN"))!=NULL) sscanf(cptr,"%d",&llen); else llen = 60; if (llen>=200) llen=200; markx=0; if ((cptr=getenv("MARKX"))==NULL) markx=0; else sscanf(cptr,"%d",&markx); if ((cptr=getenv("GAPCUT"))!=NULL) sscanf(cptr,"%d",&cgap); else cgap=0; if ((cptr=getenv("OPTCUT"))!=NULL) sscanf(cptr,"%d",&optcut); else optcut = 0; if ((cptr=getenv("PAMFACT"))!=NULL) { sscanf(cptr,"%d",&pamfact); if (pamfact==1) pamfact= -2; else pamfact = 0; } #ifndef LFASTA if ((cptr=getenv("LIBTYPE"))!=NULL) sscanf(cptr,"%d",&deftype); if (deftype<0 || deftype>BINARYGB) deftype= 0; #endif while ((copt= getopt(argc,argv,"QqaAb:c:d:eE:f:g:hik:l:m:noO:r:s:v:w:x:y:z13"))!=EOF) switch(copt) { #ifndef LFASTA case 'q': case 'Q': outtty=0; break; case 'a': showall=1; break; case 'A': dnasw_flg = 1; break; #endif case 'b': sscanf(optarg,"%d",&mshow); mshow_flg = 1; if (mshow<1) mshow=1; break; case 'c': sscanf(optarg,"%d",&optcut); break; case 'd': sscanf(optarg,"%d",&ashow); if (ashow<0) ashow=1; break; case 'e': /* lnsflg = 1; */ fprintf(stderr," ln() normalization not available\n"); break; case 'E': sscanf(optarg,"%g",&e_cut); e_cut_set=1; break; case 'f': sscanf(optarg,"%d",&gdelval); del_set=1; break; case 'g': sscanf(optarg,"%d",&ggapval); gap_set=1; break; case 'k': sscanf(optarg,"%d",&cgap); break; case 'h': histflg = 0; break; case 'i': revflg = 1; compstr=" (rev-comp)"; break; case 'l': strncpy(flstr,optarg,sizeof(flstr)); break; case 'm': sscanf(optarg,"%d",&markx); if (markx == 4) llen=50; if (markx > 4 ) markx = 0; break; case 'n': dnaseq=1; sascii = nascii; sq = nt; nsq = nnt; hsq = hnt; strcpy(sqnam,"nt"); strcpy(sqtype,"DNA"); resetp(dnaseq); break; case 's': strncpy(smstr,optarg,sizeof(smstr)); if (initpam(smstr)) { dnaseq= -1; ldnaseq = (sqtype[0]=='D')?1:0; smptr = smstr; } break; case 'w': sscanf(optarg,"%d",&llen); break; case 'x': sscanf(optarg,"%ld %ld",&sq0off,&sq1off); break; case 'y': sscanf(optarg,"%d",&optwid); optwidf = 1; break; case 'z': zsflag = 0; break; case 'O': strncpy(resfile,optarg,sizeof(resfile)); break; #ifndef LFASTA case '1': init1flg=1; break; case 'o': optall=0; break; case 'r': dataflg=1; strncpy(tmpfname,optarg,sizeof(tmpfname)); break; #else case '1': case 'Q': case 'o': case 'R':fprintf(stderr," illegal option -%c\n",copt); break; #endif #ifdef TPLOT case 'v': strncpy(lvstr,optarg,sizeof(lvstr)); break; #endif #ifdef TFASTA case '3': nframe=3; break; #else case '3': #endif default : fprintf(stderr," illegal option -%c\n",copt); } optind--; if (dnaseq>=0) { if ((smptr=getenv("SMATRIX"))!=NULL && initpam(smptr)) { dnaseq = -1; ldnaseq = (sqtype[0]=='D')?1:0; } else smptr = sdstr; } ktmax = bktup; if (strlen(smptr)>0) fprintf(stderr," using matrix file %s\n",smptr); } resetp(dnaseq) int dnaseq; { if (dnaseq==1) { bestscale=80; bkfact=5; scfact=1; bktup=6; ktmax=6; bestmax=80; bestoff=45; pam = npam; if (!del_set) gdelval = -16; if (!gap_set) ggapval = -4; if (strlen(smstr)>0) fprintf(stderr," resetting matrix to DNA\n"); strncpy(sdstr,"DNA",sizeof(smstr)); smptr = sdstr; ldnaseq=1; if (pamfact>=0) pamfact = 0; if (!e_cut_set) e_cut = 2.0; } } initparm() { char *getenv(), *cptr; int itemp, btemp; if (!optwidf) { if (dnaseq!=1 && ktup==1) optwid = 32; else optwid = 16; } btemp = 2*bestoff/3 + n0/bestscale + bkfact*(bktup-ktup); if (btemp>bestmax) btemp = bestmax; if (btemp > 3*n0) btemp = 3*shscore(aa0,n0)/5; bestfull = 0; if (cgap<=0) cgap=btemp+bestoff/3; if (optcut<=0) optcut=btemp; #ifdef TFASTA optcut = (optcut*3)/2; #endif pgap = gdelval+ggapval; } /* hashaa - hash sequence 0 for rapid lookup of seq 1 (library) */ hashaa(aa0, n0, ktup) char *aa0; int n0, ktup; { int mhv,phv; int i0, hv; if (pamfact == -1) pamfact=0; else if (pamfact== -2) pamfact=1; for (i0=0, mhv= -1; i0mhv) mhv=hsq[i0]; if (mhv<=0) { fprintf(stderr," maximum hsq <=0 %d\n",mhv); exit(1); } for (kshft=0; mhv>0; mhv/=2) { kshft++; } /* kshft = 2; */ kt1 = ktup-1; hv = 1; for (i0 = 0; i0>kshft)-1; allochash(n0,hmax); for (i0=0; i00) { nlib++; ntt += n1; if (n1==1 || n1 < ktup) { if (igncnt++ <10) fprintf(stderr,"Ignoring: %s\n",libstr); goto loop; } if (aa1!=aa1ptr) {n1 += n0; nlib--;} #else maxt = maxn-loff-3; maxt -= maxt%3; maxt++; while ((n10=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont))>0) { nlib++; ntt += n10; if (n10==1 || n10 < 3*ktup) { if (igncnt++ <10) fprintf(stderr,"Ignoring: %s\n",libstr); goto loop; } if (aa10!=aa1ptr) {n10 += 3*n0; nlib--;} #endif #ifdef TFASTA for (itt=0; itt 5) lnscale = (log((float)n0)/log((float)n1)); else lnscale = 1.0; dpmax = &diag[nd]; for (dptr= &diag[ndo]; dptrstop = -1; dptr->dmax = NULL; dptr++->score = 0; } for (vmptr=vmax; vmptr<&vmax[MAXSAV]; vmptr++) vmptr->score = 0; lowscor = 0; /* start hashing */ lhval = 0; for (lpos=0; lpos=tlim; tpos=link[tpos]) { if ((tscor = (dptr = &diagp[-tpos])->stop)>=0) { #else lposn0 = noff + lpos; for ( ; lpos=tlim; tpos=link[tpos]) { dpos = lposn0 - tpos; if ((tscor = (dptr = &diag[dpos%nd])->stop)>=0) { #endif tscor += ktup; if ((tscor -=lpos)<=0) { scor = dptr->score; #ifdef FKFACT if ((tscor += kfact)<0 && lowscor=kfact) { dptr->score = tscor; dptr->stop=lpos; } else { dptr->score = kfact; dptr->start = (dptr->stop = lpos) - kt1; } } else { #ifdef FKFACT dptr->score += fact; #else dptr->score += pamh1[aa0[tpos]]; #endif dptr->stop = lpos; } } else { #ifdef FKFACT dptr->score = kfact; #else dptr->score = pamh2[lhval]; #endif dptr->start = (dptr->stop=lpos) - kt1; } } /* end tpos */ #ifdef LFASTA if (oneseq) tlim++; #endif #ifdef ALLOCN0 /* reinitialize diag structure */ if ((dptr= &diag[lpos%nd])->score>lowscor) savemax(dptr,lpos); dptr->stop = -1; dptr->dmax = NULL; dptr->score = 0; #endif } /* end lpos */ #ifdef ALLOCN0 for (tpos=0, dpos = noff+n1-1; tpos < n0; tpos++,dpos--) { if ((dptr= &diag[dpos%nd])->score>lowscor) savemax(dptr,dpos); } #else for (dptr=diag; dptr < dpmax; ) { if (dptr->score>lowscor) savemax(dptr); dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } ndo = nd; #endif /* at this point all of the elements of aa1[lpos] have been searched for elements of aa0[tpos] with the results in diag[dpos] */ for (ib= nsave =0,vmptr=vmax; vmptr < &vmax[MAXSAV]; vmptr++) { #ifdef LFASTA if (oneseq && vmptr->start==0 && vmptr->stop==n1-1) continue; #endif if (vmptr->score>0) { vmptr->score=spam(vmptr); vptr[ib++]= vmptr; nsave++; } } if (nsave>0) { #ifndef LFASTA scor = sconn(vptr,nsave); for (vmptr=vptr[0],ib=1; ibscore > vmptr->score) vmptr=vptr[ib]; /* kssort(vptr,nsave); */ scor = max(scor,vmptr->score); cscor = scor = (int)((float)scor*lnscale +0.5); scor0 = (int)((float)vmptr->score*lnscale + 0.5); if (init1flg) cscor = scor0; #else kssort(vptr,nsave); #endif #ifdef LFASTA for (ib=0; ibscore) > optcut) { vmptr=vptr[ib]; #else ib=0; gscor = scor; if (optall && scor>optcut) { cscor = gscor = dmatch(noff-vmptr->dp,FALSE); optcount++; } if (dohist) { addhistz(zscor=find_zm(cscor,n1),n1); } if (dataflg) fprintf(tmpfd,"%-10s %4d %4d %4d %4d %4d %8ld\n", libstr,sfnum,n1,gscor,scor,scor0,lmark); if (cscor > bestcut) { #endif if (nbest >= MAXBEST) { #ifndef LFASTA if (!dohist) { process_hist(n0,bptr,nbest); addhistz(zscor=find_zm(cscor,n1),n1); dohist=1; } #endif bestfull = nbest-MAXBEST/4; selectz(bestfull-1,nbest); bestcut = (int)(bptr[bestfull-1]->zscore + 0.5); nbest = bestfull; } bestptr = bptr[nbest]; bestptr->dp = vmptr->dp; bestptr->start = vmptr->start; bestptr->stop = vmptr->stop; bestptr->score0 = scor0; bestptr->score = scor; bestptr->sscore = cscor; bestptr->zscore = zscor; bestptr->gscore = gscor; bestptr->lseek = lmark; bestptr->cont = ocont; bestptr->lib = libfn; bestptr->n1 = n1; bestptr->frame = itt; nbest++; } } #ifndef LFASTA else { /* nsave <= 0 */ if (dataflg) fprintf(tmpfd,"%-10s %4d %4d %4d %4d %8d\n", libstr,sfnum,0,0,0,lmark); } #endif #ifdef TFASTA } /* end of for (itt ... ) */ #endif loop: if (lcont) { #ifndef TFASTA loff = n0; memcpy(aa1,&aa1[n1-n0],n0); aa1ptr= &aa1[loff]; #else loff = 3*n0; memcpy(aa10,&aa10[n10-loff],loff); aa1ptr= &aa10[loff]; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; #endif ocont = lcont; } else { loff = 0; #ifndef TFASTA aa1ptr=aa1; #else aa1ptr = aa10; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; #endif ocont = lcont; } } } #ifdef ALLOCN0 savemax(dptr,dpos) register struct dstruct *dptr; int dpos; { register struct beststr *vmptr; register int i; #else savemax(dptr) register struct dstruct *dptr; { register int dpos; register struct beststr *vmptr; register int i; #endif register struct beststr *lmp; static struct beststr *lowmax=vmax; #ifndef ALLOCN0 dpos = (int)(dptr-diag); #endif /* check to see if this is the continuation of a run that is already saved */ if ((vmptr=dptr->dmax)!=NULL && vmptr->dp== dpos && vmptr->start==dptr->start) { vmptr->stop = dptr->stop; if ((i=dptr->score)<=vmptr->score) return; vmptr->score = i; if (vmptr!=lowmax) return; } else { i=lowmax->score = dptr->score; lowmax->dp = dpos; lowmax->start = dptr->start; lowmax->stop = dptr->stop; dptr->dmax = lowmax; } lmp = lowmax; for (vmptr = vmax; vmptr < &vmax[MAXSAV]; vmptr++) if (vmptr->score < i) { i = vmptr->score; lmp = vmptr; } lowscor = i; lowmax = lmp; } initpam2() { int i, j, k; k=0; for (i=0; istart]; aa0p= &aa0[lpos - dmax->dp + noff]; curv.start = lpos; tot = curv.score = maxv.score = 0; for ( ; lpos <= dmax->stop; lpos++) { tot += pam2[*aa0p++][*aa1p++]; if (tot > curv.score) { curv.stop = lpos; curv.score = tot; } else if (tot < 0) { if (curv.score > maxv.score) { maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } tot = curv.score = 0; curv.start = lpos; } } if (curv.score > maxv.score) { maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } /* if (maxv.start != dmax->start || maxv.stop != dmax->stop) printf(" new region: %3d %3d %3d %3d\n",maxv.start, dmax->start,maxv.stop,dmax->stop); */ dmax->start = maxv.start; dmax->stop = maxv.stop; return maxv.score; } sconn(v,n) struct beststr *v[]; int n; { int i,si,cmpp(); struct slink { int score; struct beststr *vp; struct slink *next; } *start, *sl, *sj, *so, sarr[MAXSAV]; int lstart, tstart, plstop, ptstop; /* sort the score left to right in lib pos */ kpsort(v,n); start = NULL; /* for the remaining runs, see if they fit */ for (i=0,si=0; iscore < cgap) continue; lstart=v[i]->start; tstart=lstart-v[i]->dp+noff; /* put the run in the group */ sarr[si].vp = v[i]; sarr[si].score = v[i]->score; sarr[si].next = NULL; /* if it fits, then increase the score */ for (sl=start; sl!= NULL; sl = sl->next) { plstop = sl->vp->stop; ptstop = plstop - sl->vp->dp + noff; if (plstopscore+v[i]->score+pgap; break; } } /* now recalculate where the score fits */ if (start==NULL) start= &sarr[si]; else for (sj=start, so=NULL; sj!=NULL; sj = sj->next) { if (sarr[si].score>sj->score) { sarr[si].next = sj; if (so!=NULL) so->next= &sarr[si]; else start= &sarr[si]; break; } so=sj; } si++; } if (start!=NULL) return (start->score); else return (0); } shscore(aa0,n0) /* calculate the 100% identical score */ char *aa0; int n0; { int i, sum; for (i=0,sum=0; i maxval) maxval = hist[i]; if (i >= mht && hist[i]>maxvalt) maxvalt = hist[i]; } max_dev = 0.0; n_chi_sq = 0; cum_hl = -hist[0]; dotsiz = (maxval-1)/60+1; ddotsiz = (maxvalt-1)/50+1; doinset = (ddotsiz < dotsiz && dotsiz > 2); if (mh1 > 0) { fprintf(fd,"\n one = represents %d library sequences\n",dotsiz); if (doinset) fprintf(fd," for inset = represents %d library sequences\n",ddotsiz); if (optall) dstr="z-opt"; else if (init1flg) dstr="z-init1"; else dstr="z-initn"; fprintf(fd,"\n %s E()\n",dstr); } prev_e = 0.0; for (i=0; i<=mh1; i++) { pch = (i==mh1) ? '>' : ' '; pch = (i==0) ? '<' : pch; hll = hl = hist[i]; if (zsflag) { f_int = (float)(i*histint + min_hist) + (float)histint/2.0; cur_e = zs_to_Ec(f_int); cum_hl += hl; ev = el = ell = (long)(cur_e - prev_e + 0.5); if (hl > 0 && i>5 && i < (90-min_hist)/histint) { x_tmp = fabs((float)cum_hl - cur_e); if (x_tmp > max_dev) { max_dev = x_tmp; max_i = i; } n_chi_sq++; } if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60; if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40; fprintf(fd,"%c%3d %5ld %5ld :", pch,(i 60) hl = 60; if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40; for (j=0; j 0) hline[el-1]='*'; hline[hl]='\0'; } else { for (j = hl; j < el; j++) hline[j]=' '; hline[el-1]='*'; hline[el]='\0'; } } else hline[hl]='\0'; if (i >= mht&& doinset ) { for (j = max(el,hl); j < 10; j++) hline[j]=' '; hline[10]=':'; for (j = 11; j<11+hll; j++) hline[j]='='; hline[11+hll]='\0'; if (zsflag) { if (ell <= hll) hline[10+ell]='*'; else { for (j = 11+hll; j < 10+ell; j++) hline[j]=' '; hline[10+ell] = '*'; hline[11+ell] = '\0'; } } } fprintf(fd,"%s\n",hline); prev_e = cur_e; } } fprintf(fd, "%7ld residues in %5ld sequences\n",ntt,nlib); if (zsflag) { fprintf(fd," statistics extrapolated from %ld to %ld sequences\n", min(MAXBEST,num_db_entries),num_db_entries); if (histflg) fprintf(fd," Kolmogorov-Smirnov statistic: %6.4f (N=%3d) at %d\n", max_dev/(float)(cum_hl),n_chi_sq,max_i*histint+min_hist); } if (optall) fprintf(fd," results sorted and z-values calculated from opt score\n"); else if (init1flg) fprintf(fd," results sorted and z-values calculated from init1 score\n"); else fprintf(fd," results sorted and z-values calculated from initn score\n"); if (pamfact) strncpy(hline,"variable pamfact",sizeof(hline)); else sprintf(hline,"fact: %d",fact); if (lnsflg) fprintf(fd," scores scaled by ln(n0)/ln(n1)\n"); fprintf(fd," %4d scores better than %d saved, ktup: %d, %s\n", nbest,bestcut,ktup,hline); fprintf(fd," %s matrix,",smptr); fprintf(fd," gap penalties: %d,%d\n",gdelval,ggapval); if (optall) fprintf(fd, " joining threshold: %d, optimization threshold: %d, width: %d\n", cgap,optcut,optwid); else fprintf(fd," joining threshold: %d, opt. width: %d",cgap,optwid); if (dataflg) { if (lnsflg) fprintf(fd,"; scores scaled by ln(n0)/ln(n1)\n"); } fprintf(fd," scan time: "); ptime(fd,tscan-tstart); fprintf(fd,"\n"); fflush(fd); } allocdiag(dsize) /* allocates diagonal structures */ int dsize; { #ifdef I86BUG if ((int)sizeof(struct dstruct) != (1<lib!=olib) { closelib(); if (openlib(lbnames[bbp->lib],"\0")<=0) exit(0); olib=bbp->lib; } RANLIB(bline,llen-5,bbp->lseek); if (strlen(bline)==0) { bline[0]='>'; strncpy(&bline[1],lbnames[bbp->lib],llen-5); } #ifndef TFASTA aa1ptr=aa1; #else aa1ptr = aa10; #endif loff=0; loffset = 0l; lcont=0; for (ccont=0; ccont<=bbp->cont; ccont++) { #ifndef TFASTA n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont); if (aa1ptr!=aa1) n1 += n0; #else maxt = maxn-loff-3; maxt -= maxt%3; maxt++; n10 = GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont); if (aa1ptr!=aa10) n10 += 3*n0; #endif if (lcont>bbp->cont) break; #ifndef TFASTA if (lcont) { loff = n0; memcpy(aa1,&aa1[n1-n0],n0); aa1ptr= &aa1[loff]; loffset += n1-n0; } else { loff = 0; aa1ptr=aa1; } #else if (lcont) { loff = 3*n0; memcpy(aa10,&aa10[n10-loff],loff); aa1ptr= &aa10[loff]; loffset += n10-loff; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; } else { loff = 0; aa1ptr = aa10; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; } #endif } #ifdef TFASTA n1 = aatran(aa10,aa1,n10,bbp->frame); loffset /= 3; #endif if (markx != 4) { fprintf(outfd,fmt,bline,bbp->n1,sqnam); #ifndef TFASTA if (zsflag) fprintf(outfd, "initn: %4d init1: %4d opt: %4d z-score: %4.1f E(): %6.2g\n", bbp->score,bbp->score0,bbp->gscore,bbp->zscore, bbp->escore); else fprintf(outfd,"initn: %4d init1: %4d opt: %4d\n", bbp->score,bbp->score0,bbp->gscore); if (lnsflg) { if (n1 > 5) lnscale = log((double)n1)/log((double)n0); else lnscale = 1.0; fprintf(outfd," Unnormalized scores, initn: %4d init1: %4d opt %4d\n", (int)((double)bbp->score*lnscale+0.5), (int)((double)bbp->score0*lnscale+0.5), (int)((double)bbp->gscore*lnscale+0.5)); } #else if (zsflag) fprintf(outfd, "Frame: %1d init1: %4d initn: %4d opt: %4d z-score: %4.1f E(): %6.2g\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore, bbp->zscore,bbp->escore); else fprintf(outfd, "Frame: %1d init1: %4d initn: %4d opt: %4d\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore); if (lnsflg) { if (n1 > 5) lnscale = log((double)n1)/log((double)n0); else lnscale = 1.0; fprintf(outfd," Unnormalized scores, initn: %4d init1: %4d opt %4d\n", (int)((double)bbp->score*lnscale+0.5), (int)((double)bbp->score0*lnscale+0.5), (int)((double)bbp->gscore*lnscale+0.5)); } #endif } strncpy(name1,bline,6); name1[6]='\0'; if ((bp = strchr(name1,' '))!=NULL) *bp = '\0'; smark[2]=bbp->start; smark[3]=bbp->stop; smark[0]=noff+smark[2]-bbp->dp; smark[1]=noff+smark[3]-bbp->dp; iscore=bbp->score0; if (!ldnaseq || dnasw_flg) swscore=smatch(aa0,n0,aa1,n1,TRUE); else dmatch(noff-bbp->dp,TRUE); if (markx !=4) fprintf(outfd,"\n"); fflush(outfd); } } #endif #ifdef LFASTA showlocal(nshow) int nshow; { int ib, istart, istop; char bline[200], *bp; int lcont, olcont, ccont, loff; char *aa1ptr; int olib; olcont = -1; istart = 0; istop = nbest; bbp = bptr[0]; for (ib=istart; ibcont!=olcont) { RANLIB(bline,llen-5,bbp->lseek); if (strlen(bline)==0) { bline[0]='>'; strncpy(&bline[1],lbnames[bbp->lib],llen-5); } aa1ptr=aa1; loff=0; loffset = 0l; lcont=0; for (ccont=0; ccont<=bbp->cont; ccont++) { n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont); if (aa1ptr!=aa1) n1 += n0; if (lcont>bbp->cont) break; if (lcont) { loff = n0; memcpy(aa1,&aa1[n1-n0],n0); aa1ptr= &aa1[loff]; loffset += n1-n0; } else { loff = 0; aa1ptr=aa1; } } olcont = bbp->cont; } strncpy(name1,bline,6); name1[6]='\0'; if ((bp = strchr(name1,' '))!=NULL) *bp = '\0'; smark[2]=bbp->start; smark[3]=bbp->stop; smark[0]=noff+smark[2]-bbp->dp; smark[1]=noff+smark[3]-bbp->dp; iscore=bbp->score0; #ifndef TPLOT if (dmatch(smark[1],smark[3],TRUE)>=0) fprintf(outfd,"\n----------\n"); #else /* !TPLOT */ dmatch(smark[1],smark[3],FALSE); #endif /* TPLOT */ } } #endif #ifdef FAR_PTR #ifdef __TURBOC__ #define FCALLOC farcalloc #define MTYPE unsigned long #define FFREE farfree #endif #endif initbest(nbest) /* allocate arrays for best sort */ int nbest; { #ifndef FAR_PTR if ((best=(struct beststr *)calloc((size_t)nbest,sizeof(struct beststr))) == NULL) {fprintf(stderr,"cannot allocate best struct\n"); exit(1);} if ((bptr=(struct beststr **)calloc((size_t)nbest,sizeof(struct beststr *))) == NULL) {fprintf(stderr,"cannot allocate bptr\n"); exit(1);} #else /* FAR_PTR */ void far *FCALLOC(); if ((best=(struct beststr huge *) FCALLOC((MTYPE)nbest,(MTYPE)sizeof(struct beststr)))== NULL) { fprintf(stderr,"cannot allocate best struct\n"); exit(1);} if ((bptr=(struct beststr huge * huge *) FCALLOC((MTYPE)nbest,(MTYPE)sizeof(struct beststr huge *)))==NULL) { fprintf(stderr,"cannot allocate bptr\n"); exit(1);} #endif /* FAR_PTR */ } freebest() { #ifndef BIGMEM #ifndef FAR_PTR free(bptr); free(best); #else /* FAR_PTR */ FFREE(bptr); FFREE(best); #endif /* FAR_PTR */ #endif /* BIGMEM */ } #ifndef LFASTA showbest() { int ib, istart, istop; char bline[200], fmt[40], pad[200]; int ntmp; int lcont, ccont, loff; char *aa1ptr; int olib; #ifdef TFASTA int n10; #endif if (nshow <= 0) return; olib = -1; #ifndef TFASTA sprintf(fmt,"%%-%ds",llen-10); #else sprintf(fmt,"%%-%ds",llen-14); #endif nshow = min(nshow,nbest); mshow = min(mshow,nbest); if (outtty) { printf(" How many scores would you like to see? [%d] ",nshow); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&nshow); if (nshow<=0) nshow = min(20,nbest); } else nshow=mshow; memset(pad,' ',llen-25); #ifndef TFASTA pad[llen-31]='\0'; #else pad[llen-31]='\0'; #endif if (zsflag) fprintf(outfd,"The best scores are:%sinitn init1 opt z-sc E(%ld)\n", pad,num_db_entries); else fprintf(outfd,"The best scores are:%sinitn init1 opt\n",pad); if (outfd != stdout) if (zsflag) printf("The best scores are:%sinitn init1 opt z-sc E(%ld)\n", pad,num_db_entries); else printf("The best scores are:%sinitn init1 opt\n",pad); istart = 0; l1: istop = min(nbest,nshow); for (ib=istart; ibescore > e_cut) { nshow = ib; goto done; } if (bbp->lib!=olib) { closelib(); if (openlib(lbnames[bbp->lib],"\0")<=0) exit(0); olib=bbp->lib; } RANLIB(bline,llen-5,bbp->lseek); #ifndef TFASTA bline[llen-11]='\0'; #else bline[llen-15]='\0'; #endif if (!optall) { #ifndef TFASTA aa1ptr=aa1; #else aa1ptr = aa10; #endif loff=0; lcont=0; for (ccont=0; ccont <= bbp->cont; ccont++) { #ifndef TFASTA n1=GETLIB(aa1ptr,maxn-loff,libstr,&lmark,&lcont); if (aa1ptr!=aa1) n1 += n0; #else maxt = maxn-loff-3; maxt -= maxt%3; maxt++; n10=GETLIB(aa1ptr,maxt,libstr,&lmark,&lcont); if (aa1ptr!=aa10) n10 += 3*n0; #endif if (lcont>bbp->cont) break; #ifndef TFASTA if (lcont) { loff = n0; memcpy(aa1,&aa1[n1-n0],n0); aa1ptr= &aa1[loff]; } else { loff = 0; aa1ptr=aa1; } #else if (lcont) { loff = 3*n0; memcpy(aa10,&aa10[n10-loff],loff); aa1ptr= &aa10[loff]; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; } else { aa1ptr=aa10; loff = 0; maxt = maxn-loff-3; maxt -= maxt%3; maxt++; } #endif } #ifdef TFASTA n1=aatran(aa10,aa1,n10,bbp->frame); #endif bbp->gscore=dmatch(noff-bbp->dp,FALSE); } fprintf(outfd,fmt,bline); #ifndef TFASTA if (zsflag) fprintf(outfd,"%4d %3d %4d %4.1f %6.2g\n", bbp->score,bbp->score0,bbp->gscore, bbp->zscore,bbp->escore); else fprintf(outfd,"%4d %3d %4d\n", bbp->score,bbp->score0,bbp->gscore); #else if (zsflag) fprintf(outfd,"(%1d) %4d %3d %4d %4.1f %6.2g\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore, bbp->zscore,bbp->escore); else fprintf(outfd,"(%1d) %4d %3d %4d\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore); #endif if (outfd!=stdout) { fprintf(stdout,fmt,bline); #ifndef TFASTA if (zsflag) printf("%4d %3d %4d %4.1f %5.2g\n", bbp->score,bbp->score0,bbp->gscore, bbp->zscore,bbp->escore); else printf("%4d %3d %4d\n",bbp->score,bbp->score0,bbp->gscore); #else if (zsflag) printf("(%1d) %4d %3d %4d %4.1f %5.2g\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore, bbp->zscore,bbp->escore); else printf("(%1d) %4d %3d %4d\n", bbp->frame+1,bbp->score,bbp->score0,bbp->gscore); #endif } } fflush(outfd); if (outfd!=stdout) fflush(stdout); if (outtty) { printf(" More scores? [0] "); fflush(stdout); if (fgets(rline,20,stdin)==NULL) exit(0); ntmp = 0; if (rline[0]!='\n' && rline[0]!=0) sscanf(rline,"%d",&ntmp); if (ntmp<=0) ntmp = 0; if (ntmp>0) { istart = istop; nshow += ntmp; mshow += ntmp; goto l1; } } else if (zsflag && bbp->escore < e_cut) { if (mshow_flg && istop >= mshow) goto done; istart=istop; nshow += 10; goto l1; } done: if (outfd!=stdout) fprintf(outfd,"\n"); } #endif /* LFASTA */ selectz(k,n) /* k is rank in array */ int k,n; { int t, i, j, l, r; float v; #ifndef FAR_PTR struct beststr *tmptr; #else struct beststr huge * tmptr; #endif l=0; r=n-1; while ( r > l ) { i = l-1; j = r; v = bptr[r]->zscore; do { while (bptr[++i]->zscore > v ) ; while (bptr[--j]->zscore < v ) ; tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr; } while (j > i); bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr; if (i>=k) r = i-1; if (i<=k) l = i+1; } } sortbest() { #ifndef FAR_PTR int cmps(), cmp1(), cmpa(); if (init1flg) ksort(bptr,nbest,cmp1); else if (optall) ksort(bptr,nbest,cmpa); else ksort(bptr,nbest,cmps); #else int fcmps(), fcmp1(), fcmpa(); if (init1flg) fksort(bptr,nbest,fcmp1); else if (optall) fksort(bptr,nbest,fcmpa); else fksort(bptr,nbest,fcmps); #endif } sortbeste() { #ifndef FAR_PTR int cmpe(); ksort(bptr,nbest,cmpe); #else int fcmpe(); fksort(bptr,nbest,fcmpe); #endif } sortbestz() { #ifndef FAR_PTR int cmpz(); ksort(bptr,nbest,cmpz); #else int fcmpz(); fksort(bptr,nbest,fcmpz); #endif } cmps(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->score < ptr2->score) return (1); else if (ptr1->score > ptr2->score) return (-1); else return (0); } cmpa(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->gscore < ptr2->gscore) return (1); else if (ptr1->gscore > ptr2->gscore) return (-1); else return (0); } cmp1(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->score0 < ptr2->score0) return (1); else if (ptr1->score0 > ptr2->score0) return (-1); else return (0); } cmpz(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->zscore < ptr2->zscore) return (1); else if (ptr1->zscore > ptr2->zscore) return (-1); else return (0); } cmpe(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->escore < ptr2->escore) return (-1); else if (ptr1->escore > ptr2->escore) return (1); else return (0); } #ifdef FAR_PTR fcmps(ptr1,ptr2) struct beststr huge * ptr1, huge * ptr2; { if (ptr1->score < ptr2->score) return (1); else if (ptr1->score > ptr2->score) return (-1); else return (0); } fcmp1(ptr1,ptr2) struct beststr huge * ptr1, huge * ptr2; { if (ptr1->score0 < ptr2->score0) return (1); else if (ptr1->score0 > ptr2->score0) return (-1); else return (0); } fcmpa(ptr1,ptr2) struct beststr huge * ptr1, huge * ptr2; { if (ptr1->gscore < ptr2->gscore) return (1); else if (ptr1->gscore > ptr2->gscore) return (-1); else return (0); } fcmpz(ptr1,ptr2) struct beststr huge * ptr1, huge * ptr2; { if (ptr1->zscore < ptr2->zscore) return (1); else if (ptr1->zscore > ptr2->zscore) return (-1); else return (0); } fcmpe(ptr1,ptr2) struct beststr huge * ptr1, huge * ptr2; { if (ptr1->escore < ptr2->escore) return (-1); else if (ptr1->escore > ptr2->escore) return (1); else return (0); } #endif cmpp(ptr1,ptr2) struct beststr *ptr1, *ptr2; { if (ptr1->start < ptr2->start) return (-1); else if (ptr1->start > ptr2->start) return (1); else return (0); } kssort(v,n) struct beststr *v[]; int n; { int gap, i, j; struct beststr *tmp; for (gap=n/2; gap>0; gap/=2) for (i=gap; i=0; j -= gap) { if (v[j]->score >= v[j+gap]->score) break; tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp; } } kpsort(v,n) struct beststr *v[]; int n; { int gap, i, j; struct beststr *tmp; for (gap=n/2; gap>0; gap/=2) for (i=gap; i=0; j -= gap) { if (v[j]->start <= v[j+gap]->start) break; tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp; } } ksort(v,n,comp) void *v[]; int n, (*comp)(); { int gap, i, j; char *tmp; for (gap=n/2; gap>0; gap/=2) for (i=gap; i=0; j -= gap) { if ((*comp)(v[j],v[j+gap]) <=0) break; tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp; } } #ifdef FAR_PTR fksort(v,n,comp) void huge * huge *v; int n, (*comp)(); { int gap, i, j; void huge *tmp; for (gap=n/2; gap>0; gap/=2) for (i=gap; i=0; j -= gap) { if ((*comp)(v[j],v[j+gap]) <=0) break; tmp = v[j]; v[j]=v[j+gap]; v[j+gap]=tmp; } } #endif getlnames(tname) /* read in the library names */ char *tname; { int i; char *bp; char lline[120]; FILE *tptr; if (*tname != '@') {addfile(tname,"\0"); return 1;} else tname++; if ((bp=strchr(tname,' '))!=NULL) { *bp='\0'; #ifndef LFASTA sscanf(bp+1,"%d",&deftype); if (deftype<0 || deftype>BINARYGB) { fprintf(stderr," default type error %d\n",deftype); deftype=0; } #endif } if ((tptr=fopen(tname,"r"))==NULL) { fprintf(stderr," could not open file of names: %s\n",tname); return 0; } while (fgets(lline,sizeof(lline),tptr)!=NULL) { if (lline[0]==';') continue; if ((bp=strchr(lline,'\n'))!=NULL) *bp='\0'; if (lline[0]=='>') { strncpy(iname,&lline[1],sizeof(iname)); iname[sizeof(iname)-1]='\0'; #ifndef LFASTA if (deftype==0) deftype=BINARYGB; #endif } else if (lline[0]=='<') { if (ldname[0]!='\0' && strcmp(ldname,lline+1)!=0) fprintf(stderr, " changing default directory name from %s to %s\n", ldname,lline+1); strncpy(ldname,&lline[1],sizeof(ldname)); ldname[sizeof(iname)-1]='\0'; libenv=ldname; } else addfile(lline,libenv); } fclose(tptr); return 1; } /* modified Dec 13, 1989 requires different FASTLIBS */ #define MAXCHFIL 80 #define MAXCH 40 libchoice(lname,nl,aaenv) char *lname, *aaenv; int nl; { char tolower(); FILE *fch; char line[120], *bp; char *chstr[MAXCH],*chfile[MAXCH]; char *chtmp, *charr; int i,j,k,chlen; charr = NULL; if (strlen(flstr)>0) { chlen = MAXCH*MAXCHFIL; if ((chtmp=charr=calloc((size_t)chlen,sizeof(char)))==NULL) { fprintf(stderr,"cannot allocate choice file array\n"); goto l1; } chlen--; if ((fch=fopen(flstr,"r"))==NULL) { fprintf(stderr," cannot open choice file: %s\n",flstr); goto l1; } fprintf(stderr,"\n Choose sequence library:\n\n"); for (i=j=0; jchlen) break; strncpy(chstr[j]=chtmp,line,chlen); chtmp += k+1; chlen -= k+1; if ((k=strlen(bp))>chlen) break; strncpy(chfile[j]=chtmp,bp,chlen); chtmp += k+1; chlen -= k+1; fprintf(stderr," %c: %s\n",*chfile[j++],line); } l2: fprintf(stderr,"\n Enter library filename (e.g. %s), letter (e.g. P)\n", (ldnaseq==0)? "prot.lib" : "dna.lib"); fprintf(stderr," or a %% followed by a list of letters (e.g. %%PN): "); fflush(stderr); if (fgets(line,sizeof(line),stdin)==NULL) exit(0); if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; if (strlen(line)==0) goto l2; strncpy(lname,line,nl); } else { l1: fprintf(stderr," library file name: [%s]",aaenv); fflush(stderr); if (fgets(line,sizeof(line),stdin)==NULL) exit(0); if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; if (strlen(line)>0) strncpy(lname,line,nl); else strncpy(lname,aaenv,nl); } if (charr!=NULL) { fclose(fch); free(charr); } } libselect(lname) char *lname; { char tolower(); char line[120], *bp, *ulindex(); FILE *fch; int i; if (strlen(lname)>1 && *lname != '%') getlnames(lname); else { if (*lname=='%') if (*flstr=='\0') { fprintf(stderr," FASTLIBS undefined, cannot use %s\n",lname); exit(1); } else lname++; if (strlen(flstr)>0) { if ((fch=fopen(flstr,"r"))==NULL) { fprintf(stderr," cannot open choice file: %s\n",flstr); return; } } else { fprintf(stderr," FASTLIBS undefined\n"); addfile(lname,"\0"); return; } while (fgets(line,sizeof(line),fch)!=NULL) { if (line[0]==';') continue; if ((bp=strchr(line,'\n'))!=NULL) *bp='\0'; if ((bp=strchr(line,'$'))==NULL) continue; *bp++='\0'; if ((*bp++ -'0')!=ldnaseq) continue; if (ulindex(lname,*bp)!=NULL) { strncpy(ltitle,line,sizeof(ltitle)); getlnames(bp+1); } } fclose(fch); } } char *lbptr; int nnsize; addfile(fname,env) char *fname, *env; { char tname[120]; int len, lenv, i; /* allocate some space for file names */ if (lbnarr==NULL) { if ((lbnarr=calloc((size_t)MAXLF*MAXLN,sizeof(char)))==NULL) { fprintf(stderr," could not allocate name table\n"); exit(1); } nln = 0; nnsize = MAXLF*MAXLN; lbptr = lbnarr; } lenv = strlen(env)+1; len=strlen(fname)+1+lenv; if (nnsize > sizeof(tname)) { if (lenv > 1 && *fname != '#') { strncpy(tname,env,sizeof(tname)); #ifdef UNIX strcat(tname,"/"); #endif } else tname[0]='\0'; strncat(tname,fname,sizeof(tname)-strlen(tname)-1); len=strlen(tname)+1; strncpy(lbptr,tname,nnsize); } else fprintf(stderr,"no more space for filenames: %s ignored\n",fname); if (nln< MAXLF) lbnames[nln++]=lbptr; else fprintf(stderr," no more file name slots: %s ignored\n",lbptr); lbptr += len; nnsize -= len; } char tolower(a) char a; { if (a>='A' && a<='Z') return a + ('a'-'A'); else return a; } char *ulindex(str,chr) char *str, chr; { char c; c = tolower(chr); while (*str != '\0' && tolower(*str) !=c ) str++; if (*str=='\0') return NULL; else return str; }