/* PATTERN This program looks for a pattern of motifs defined by the user in the sequence(s) contained in the database file specified. The program can be invoked either by use of command line parameters or in an interactive mode when the user is prompted for replies. Help on the command line parameters can be obtained by typing: PATTERN HELP or PATTERN ?. The program is written in standard ANSI C. In this source file conditional compliation for the Borland Turbo C complier (IBM PC) or the Waterloo compiler for an IBM 3090 mainframe can be selected by setting the appropriate define to TRUE and the other to FALSE. Most constants set by the conditional compliation are concerned with the underlying file naming conventions on the different systems and with the amount of memory available. The code is written to be able to run on either ASCII or EBCDIC encoded machines. Please acknowledge the original description of this program in any publications: Cockwell & Giles (1989) Comput Applic Biosci 5, 227-232 */ /* 9.11.89: Ported to VAX C by R. Fuchs (FUCHS@EMBL). Uses Curses, so make sure that LNK$LIBRARY is defined as SYS$LIBRARY:VAXCCURSE and LNK$LIBRARY_1 as SYS$LIBRARY:VAXCRTL. */ #include #include #include #define TRUE 1 #define FALSE 0 #define waterloo FALSE #define turboc FALSE #define VAX TRUE #define MAXQ 50 #define ARRSIZE 256 #define MAXMATRIX 30 #define MAXLINE 128 #if (turboc) #define MAXSEQ 10000 #define SCORE 1000 #define LIMIT 4 #define ERRLOG "CON" #define RESFILE "PATTERN.OUT" #define SCANNAME "SCA" #define FILENAMELEN 80 #include #define cls clrscr #define index strchr #endif #if (waterloo) #define SCORE 1500 #define LIMIT 6 #define ERRLOG "TERMINAL" #define RESFILE "PATTERN OUTPUT A" #define SCANNAME "SCANFILE" #define FILENAMELEN 20 #include #include #define MAXSEQ 200000 #define cls clear #endif #if (VAX) #include #define SCORE 1500 #define LIMIT 6 #define RESFILE "PATTERN.OUT" #define ERRLOG "screen" #define SCANNAME "SCA" #define MAXSEQ 200000 #define FILENAMELEN 80 #define cls clear #define index strchr #endif int dlength, matches[LIMIT][SCORE], qlength[LIMIT]; char setmatrix[LIMIT][ARRSIZE][MAXMATRIX]; int max[LIMIT-1], min[LIMIT-1]; int left, right; int total, totalseq, sum; int totmotif, minmotif, maxmotif; char qseq[LIMIT][MAXQ], *dseq, uptable[256]; char rname[FILENAMELEN], lname[FILENAMELEN], ename[FILENAMELEN]; char dbname[71]; FILE *rfil, *lfil, *efil; main(argc,argv) int argc; char *argv[]; { int i, count, test; char *calloc(); #if (VAX) initscr(); #endif left = right = 0; settranstable(); /* get input parameters and find number of motifs in pattern definition */ if(argc >1) count = getnames(argc,argv); else count = interractive(); test = databasecheck(lname); openfiles(count); totmotif = count; checkminmax(count); for(i=0;i 3) { for(i=1;i4) { printf("ERROR: minimum gap too large.\n"); exit(0); } if(((int)arg[j] >= (int)'0') &&((int)arg[j] <= (int)'9')) temp[j] = arg[j]; else break; } sscanf(temp,"%d",&min[gapcount-1]); } strcpy(qseq[qcount++],argv[i++]); while( i < argc) { strcpy(arg, argv[i]); eq = strchr(arg,'='); strncpy(&key,arg,1); key = uptable[key]; switch (key) { case 'O': /* get results file */ strcpy(rname, eq+1 ); for(k = 1;k <= 2; k++) { if(index(argv[i+1], '=')) break; if(i+1 >= argc) break; else { strcpy( arg, argv[i + 1]); i++; strcat(rname," "); strcat(rname, arg); } } break; case 'E': /* get error log file */ strcpy(ename, eq+1 ); for(k = 1;k <= 2; k++) { if(i+1 >= argc) break; else { strcpy( arg, argv[i + 1]); i++; strcat(ename," "); strcat(ename, arg); } } break; case 'S': /* get sequence file */ strcpy(lname, eq+1 ); for(k = 1;k <= 2; k++) { if(index(argv[i+1], '=')) break; if(i+1 >= argc) break; else { strcpy( arg, argv[i + 1]); i++; strcat(lname," "); strcat(lname, arg); } } break; default: printf(" WARNING: Unknown argument %s \n",argv[i]); } i++; } } /* set initial length of query sequence */ for(i=0;i min */ for(i=0;i1000 )||(max[i] >1000 )){ fprintf(efil,"ERROR: gap values must be less than 1000.\n"); exit(0); } } for(i=0;i= MAXQ) { fprintf(efil,"ERROR: motif %s too long.\n",qseq[i]); exit(0); } } checkseq(qcount) int qcount; { int i; for(i=0;i= MAXMATRIX){ fprintf(efil,"ERROR: motif %s too long.\n",qseq[i]); exit(0); } } openfiles(count) int count; { int i; if(strcmp(ename,"screen")) { efil = fopen(ename,"w"); if (efil == 0) { printf("ERROR: cannot open error log file %s\n",ename); exit(0); } } else efil = stdout; if(strcmp(rname,"screen")) { rfil = fopen(rname,"w"); if (rfil == 0) { fprintf(efil,"ERROR: cannot open results file %s \n",rname); exit(0); } } else rfil = stdout; lfil = fopen(lname,"r"); if (lfil == 0) { fprintf(efil,"ERROR: cannot open sequence file %s.\n",lname); exit(0); } #ifndef waterloo if (setvbuf(lfil, NULL, _IOFBF, 32767) != 0) { fprintf(efil,"WARNING: insufficient memory for large database buffer\n"); } #endif for(i=0;i') strncpy(dbname,string,70); else { fprintf(efil,"Warning: SCANFILE in unrecognised format\n"); test = 0; } if(test == 0) return(0); while ((dlength = getlib(string)) != 0) { if (dlength >= sum) compare(0); for(i=1;i= sum) compare(0); for(i=1;i') strncpy(dbname,string,70); if(*dbname != 0) if(strlen(dbname) >= 70) dbname[70] = '\n'; while(fgets(string,MAXLINE,lfil) != 0) { if (string[0] == '>') { break; } if(stop == 1) break; len = strlen(string)-1; if(((len+n)>=MAXSEQ)&&(stop == 0)){ fprintf(efil,"WARNING: sequence over %d long; truncated.\n",MAXSEQ) ; len = MAXSEQ - n; stop = 1; } for(i=0;i')) staden = TRUE; else if(strncmp(temp,"ID ",5) == 0) embl = TRUE; else if(strncmp(temp,"LOCUS ",10) == 0) genbank = TRUE; else if(strncmp(temp,"ENTRY",5) == 0) codata = TRUE; else if (temp[0] == '>') fastp = TRUE; if (embl){ strncpy(dbname,temp+5,70); while(temp[0] != ' ') fgets(temp,MAXLINE,lfil); } if (genbank){ while(strncmp(temp,"ORIGIN",6) !=0){ fgets(temp,MAXLINE,lfil); if(strncmp(temp,"DEFINITION",10) == 0) strncpy(dbname,temp+12,70); } fgets(temp,MAXLINE,lfil); } if(codata){ strncpy(dbname,temp+6,70); while(strncmp(temp,"SEQUENCE",8) != 0) fgets(temp,MAXLINE,lfil); fgets(temp,MAXLINE,lfil); } if (fastp){ strncpy(dbname,temp+1,70 ); fgets(temp,MAXLINE,lfil); } if(staden){ strncpy(dbname,temp+1,18); offset = 20; } if(*dbname != 0) if(strlen(dbname) >= 70) dbname[70] = '\n'; do{ if(strncmp(temp,"//",2) == 0) break; len = strlen(temp); for(i=offset;i= SCORE) { fprintf(efil,"WARNING matches overflow.\n"); return; } matches[q][0] = matches[q][0] + 1; matches[q][count] = i+1; /* offset by one to count from one instead of zero */ } } /* add one to total for each match, as soon as a match does not occur break out of the loop */ else if(q>0){ count = matches[0][0]; for(i=0;idlength) break; if(matches[q-1][i+1] <= 0) break; for(j=0,total=0;j0){ postemp++; for(n=0;n 0) { next++; /* print out matched sequence name */ if(next == 1) { fprintf(rfil,"\n%s",dbname); totalseq++; } total++; fprintf(rfil,"match at;"); for(i=0;i