/* A GLOBAL THREE-SEQUENCE ALIGNMENT PROGRAM (GTA): Copyright (c) 1992 Xiaoqiu Huang All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for educational, research and non-profit purposes, without fee, and without a written agreement is hereby granted, provided that the above copyright notice, this paragraph and the following three paragraphs appear in all copies. Permission to incorporate this software into commercial products may be obtained from the Intellectual Property Office, 1400 Townsend Drive, 301 Administration Building, Houghton, MI 49931, phone (906) 487-3429. IN NO EVENT SHALL THE AUTHOR OR MICHIGAN TECHNOLOGICAL UNIVERSITY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF MICHIGAN TECHNOLOGICAL UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. MICHIGAN TECHNOLOGICAL UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND MICHIGAN TECHNOLOGICAL UNIVERSITY HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Proper attribution of the author as the source of the software would be appreciated: Alignment of Three Sequences in Quadratic Space, Applied Computing Review, 1(2): 7-11, 1993. Xiaoqiu Huang Department of Computer Science Michigan Technological University Houghton, MI 49931 E-mail: huang@cs.mtu.edu The GTA program computes an optimal global alignment of three sequences using dynamic programming and divide-conquer techniques. The program takes cubic time and quadratic space. Note that the space requirement of the GTA program is one order of magnitude less than that of previous programs based on rigorous dynamic programming algorithms. Thus, the GTA program can be used to align three protein sequences of a few thousand residues on workstations with tens of megabytes of memory. Users supply scoring parameters. In the simplest form, users just provide 3 integers: ms, q and r, where ms is the score of a mismatch and the score of an i-symbol indel is -(q + r * i). Each match automatically receives score 10. This simple scoring scheme may be used for DNA sequences. NOTE: all scores are integers. In general, users can define an alphabet of characters appearing in the sequences and a matrix that gives the substitution score for each pair of symbols in the alphabet. The 127 ASCII characters are eligible. The alphabet and matrix are given in a file, where the first line lists the characters in the alphabet and the lower triangle of the matrix comes next. An example file looks as follows: ARNDC 13 -15 19 -10 -22 11 -20 -10 -20 18 -10 -20 -10 -20 12 Here the -22 at position (3,2) is the score of replacing N by R. This general scoring scheme is useful for protein sequences where the set of protein characters and Dayhoff matrix are specified in the file. The score of an alignment of three sequences is the sum of the scores of the configurations in the alignment. Let s(a,b) be the score of aligning a and b. Sample Configuration Types Scores a b s(a,b)+s(b,c)+s(a,c) c a b s(a,b)-r - a - -r - In addtion, a sequence of the same type of configurations containing the null symbol '-' is given a score of -q. The GTA program is written in C and runs under Unix systems on Sun workstations. We think that the program is portable to many machines. Sequences to be analyzed are stored in separate files. An input file contains all characters of a sequence, separated by newline characters, in linear order. No other characters are allowed. Since upper case and lower case characters are different, use the same case consistently. A sample sequence file of 3 lines is shown below. VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGNPKV KAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHHFGK EFTPPVQAAYQKVVAGVANALAHKYH To find the best alignment of three sequences in files A, B and C, use a command of form gta A B C ms q r > result where gta is the name of the object code, ms is a negative integer specifying mismatch weight, q and r are non-negative integers specifying gap-open and gap-extend penalties, respectively. Output alignment is saved in the file "result". For using a scoring matrix defined in file S, use a command of form gta A B C S q r > result Note that ms is replaced by the file S. */ #include #define MAX 900 /* the maximum length of three sequences */ static int SS[MAX+1][MAX+1]; /* maximum-scoring matrix */ static int DD[MAX+1][MAX+1]; /* matrix for indels in sequence 3 */ static int EE[MAX+1][MAX+1]; /* matrix for indels in sequences 1 and 3 */ static int FF[MAX+1][MAX+1]; /* matrix for indels in sequences 2 and 3 */ static int GG[MAX+1]; /* array for indels in sequences 1 and 2 */ static int HH[MAX+1]; /* array for indels in sequence 1 */ static int SP[MAX+1]; /* array for the last row of SS */ static int EP[MAX+1]; /* array for the last row of EE */ static int PS[3*MAX+3];/* alignment script */ /* script code: a--,1;-b-,2;--c,3;ab-,4;a-c,5;-bc,6;abc,7; */ /* Below is for the reverse pass */ static int RR[MAX+1][MAX+1]; /* maximum-scoring matrix */ static int TT[MAX+1][MAX+1]; /* matrix for indels in sequence 3 */ static int UU[MAX+1][MAX+1]; /* matrix for indels in sequences 1 and 3 */ static int VV[MAX+1][MAX+1]; /* matrix for indels in sequences 2 and 3 */ /* scoring parameters */ static int q1; /* penalty for starting a gap in one sequence */ static int q2; /* penalty for starting a gap in two sequences */ static int r1; /* penalty for extending a gap in one sequence */ static int r2; /* penalty for extending a gap in one sequence */ static int qr1; /* q1 + r1 */ static int qr2; /* q2 + r2 */ static int v[128][128]; /* substitution scores */ static int match, mismh; /* max and min substitution weights */ static char *name1, *name2, *name3; /* names of sequence files */ int global(); static int zero = 0; /* int type zero */ static int *sapp; /* Current script append ptr */ #define ADD(k) \ { *sapp++ = (k); \ } main(argc, argv) int argc; char *argv[]; { int M, N, L; /* Sequence lengths and k */ char *A, *B, *C; /* Storing two sequences */ int symbol; /* The next character */ int ms; /* User-supplied weights */ FILE *Bp, *Ap, *Sp, *ckopen(); char *ckalloc(); /* space-allocating function */ register int i, j; char alph[129], *s; /* alphabet */ int size; /* size of alphabet */ int score; if ( argc != 7 ) fatalf("The proper form of command is: \n%s file1 file2 file3 mismatch(<0) gap-open(>=0) gap-extend(>0)", argv[0]); /* determine the sequence lengths */ Ap = ckopen(argv[1], "r"); for (M = 0; ( symbol = getc(Ap)) != EOF ; ) if ( symbol != '\n' ) ++M; (void) fclose(Ap); if ( M > MAX ) fatal("The length of first sequence exceeds the maximum limit: MAX"); name1 = argv[1]; /* allocate space for A */ A = ( char * ) ckalloc( (M + 1) * sizeof(char)); /* read the first sequence into A */ Ap = ckopen(argv[1], "r"); for (M = 0; ( symbol = getc(Ap)) != EOF ; ) if ( symbol != '\n' ) A[++M] = symbol; (void) fclose(Ap); /* read the second sequence into B */ Bp = ckopen(argv[2], "r"); for (N = 0; ( symbol = getc(Bp)) != EOF ; ) if ( symbol != '\n' ) ++N; (void) fclose(Bp); if ( N > MAX ) fatal("The length of second sequence exceeds the maximum limit: MAX"); name2 = argv[2]; B = ( char * ) ckalloc( (N + 1) * sizeof(char)); Bp = ckopen(argv[2], "r"); for (N = 0; ( symbol = getc(Bp)) != EOF ; ) if ( symbol != '\n' ) B[++N] = symbol; (void) fclose(Bp); /* read the third sequence into C */ Bp = ckopen(argv[3], "r"); for (L = 0; ( symbol = getc(Bp)) != EOF ; ) if ( symbol != '\n' ) ++L; (void) fclose(Bp); if ( L > MAX ) fatal("The length of third sequence exceeds the maximum limit: MAX"); name3 = argv[3]; C = ( char * ) ckalloc( (L + 1) * sizeof(char)); Bp = ckopen(argv[3], "r"); for (L = 0; ( symbol = getc(Bp)) != EOF ; ) if ( symbol != '\n' ) C[++L] = symbol; (void) sscanf(argv[argc-2],"%d", &q1); if ( q1 < 0 ) fatal("The gap-open penalty is a nonnegative integer"); (void) sscanf(argv[argc-1],"%d", &r1); if ( r1 < 0 ) fatal("The gap-extend penalty is a positive integer"); /* check if the argument represents a negative integer */ s = argv[argc-3]; if ( *s == '-' ) s++; for ( ; *s >= '0' && *s <= '9' ; s++ ); if ( *s == '\0' ) { (void) sscanf(argv[argc-3],"%d", &ms); if ( ms >= 0 ) fatal("The mismatch weight is a negative integer"); match = 10; mismh = ms; /* set match and mismatch weights */ for ( i = 0; i < 128 ; i++ ) for ( j = 0; j < 128 ; j++ ) if (i == j ) v[i][j] = 10; else v[i][j] = mismh; } else { /* read a file containing alphabet and substitution weights */ Sp = ckopen(argv[argc-3], "r"); (void) fscanf(Sp, "%s", alph); size = strlen(alph); match = mismh = 0; for ( i = 0; i < size ; i++ ) for ( j = 0; j <= i ; j++ ) { (void) fscanf(Sp, "%d", &ms); v[alph[i]][alph[j]] = v[alph[j]][alph[i]] = ms; if ( ms > match ) match = ms; if ( ms < mismh ) mismh = ms; } } q2 = q1; r2 = r1; qr1 = q1 + r1; qr2 = q2 + r2; sapp = PS; score = global(A,B,C,M,N,L,q2,q2,q1,q1,q1,q1); (void) printf("Max Match Min Mismatch Gap-Open Penalty Gap-Extension Penalty\n"); (void) printf(" %d %d %d %d\n\n", match, mismh, q2, r2); (void) printf(" Upper Sequence : %s\n", name1); (void) printf(" Length : %d\n", M); (void) printf(" Middle Sequence : %s\n", name2); (void) printf(" Length : %d\n", N); (void) printf(" Lower Sequence : %s\n", name3); (void) printf(" Length : %d\n\n", L); (void) printf(" Similarity Score : %d\n",score); display(A,B,C,M,N,L,PS,1,1,1); } /* global(A,B,C,M,N,L,cs,ce,acs,ace,bcs,bce) returns the similarity score of an optimal alignment of three sequences A[1..M], B[1..N] and C[1..L] that starts (ends) with indels if cs, acs, or bcs (ce, ace, bce) are zeros and appends the alignment to the current script. */ int global(A,B,C,M,N,L,cs,ce,acs,ace,bcs,bce) char *A, *B, *C; /* three sequecnes */ int M, N, L; /* three sequence lengths */ int cs, ce, acs, ace, bcs, bce; /* gap-open penalties in forward and reverse passes */ { int midi, midj, midk; /* Midpoint */ int type; /* Its type */ int midc; /* Its cost */ { register int i, j, k; register int c, e, f, g, ie, d; int h, spp, fp, gp, s; int t, *va, *wa; int x1, x2; /* Boundary cases: L <= 1 */ if ( M <= 0 ) { if ( N <= 0 ) { if ( L > 0 ) { for ( i = 1; i <= L; i++ ) ADD(3); if ( cs > ce ) cs = ce; return -(cs + L * r2); } return 0; } if ( L <= 0 ) { for ( i = 1; i <= N; i++ ) ADD(2); return -(q2 + N * r2); } SS[L][N] = 0; DD[L][N] = 0; GG[N] = - bce; t = - q2; for ( j = N - 1; j >= 0; j-- ) { SS[L][j] = t = t - r2; GG[j] = t - q1; HH[j] = t - q2; DD[L][j] = 2; } t = - ce; for ( i = L - 1; i >= 0; i-- ) { SS[i][N] = c = t = t - r2; gp = GG[N]; GG[N] = t - q1; DD[i][N] = 3; ie = t - q2; va = v[C[i+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ( j ) { if ((c = SS[i+1][j] - qr2) > (d = HH[j] - r2)) d = c; } else { if ((c = SS[i+1][j] - cs - r2) > (d = HH[j] - r2)) d = c; } s = va[B[j+1]]; if ((c = SS[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; c = g; s = 6; if (c < ie) { c = ie; s = 2; } if (c < d) { c = d; s = 3; } DD[i][j] = s; SS[i][j] = c; HH[j] = d; } } c = SS[0][0]; if ( ! bcs ) { t = v[C[1]][B[1]] - r1; g = SS[1][1] + t; d = 1; for ( i = 2; i <= N && i <= L; i++ ) { t += v[C[i]][B[i]] - r1; if ( g < (f = SS[i][i] + t) ) { g = f; d = i; } } if ( c < g ) { c = g; for ( i = 0; i < d; i++ ) DD[i][i] = 6; } } midc = SS[0][0] = c; i = j = 0; while ( s = DD[i][j] ) { switch(s) { case 3: i += 1; break; case 2: j += 1; break; case 6: i += 1; j += 1; break; } ADD(s); } return midc; } if ( N <= 0 ) { if ( L <= 0 ) { for ( i = 1; i <= M; i++ ) ADD(1); return - (q2 + M * r2); } SS[L][M] = 0; DD[L][M] = 0; GG[M] = - ace; t = - q2; for ( j = M - 1; j >= 0; j-- ) { SS[L][j] = t = t - r2; GG[j] = t - q1; HH[j] = t - q2; DD[L][j] = 1; } t = - ce; for ( i = L - 1; i >= 0; i-- ) { SS[i][M] = c = t = t - r2; gp = GG[M]; GG[M] = t - q1; DD[i][M] = 3; ie = t - q2; va = v[C[i+1]]; for ( j = M - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ( j ) { if ((c = SS[i+1][j] - qr2) > (d = HH[j] - r2)) d = c; } else { if ((c = SS[i+1][j] - cs - r2) > (d = HH[j] - r2)) d = c; } s = va[A[j+1]]; if ((c = SS[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; c = g; s = 5; if (c < ie) { c = ie; s = 1; } if (c < d) { c = d; s = 3; } DD[i][j] = s; SS[i][j] = c; HH[j] = d; } } c = SS[0][0]; if ( ! acs ) { t = v[C[1]][A[1]] - r1; g = SS[1][1] + t; d = 1; for ( i = 2; i <= M && i <= L; i++ ) { t += v[C[i]][A[i]] - r1; if ( g < (f = SS[i][i] + t) ) { g = f; d = i; } } if ( c < g ) { c = g; for ( i = 0; i < d; i++ ) DD[i][i] = 5; } } midc = SS[0][0] = c; i = j = 0; while ( s = DD[i][j] ) { switch(s) { case 3: i += 1; break; case 1: j += 1; break; case 5: i += 1; j += 1; break; } ADD(s); } return midc; } if ( L <= 1 ) { SS[M][N] = c = 0; DD[M][N] = 0; GG[N] = - q1; t = - q2; for ( j = N - 1; j >= 0; j-- ) { SS[M][j] = c = t = t - r2; GG[j] = t - q1; HH[j] = t - q2; DD[M][j] = 2; } t = - q2; for ( i = M - 1; i >= 0; i-- ) { SS[i][N] = c = t = t - r2; gp = GG[N]; GG[N] = t - q1; DD[i][N] = 1; ie = t - q2; va = v[A[i+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = SS[i+1][j] - qr2) > (d = HH[j] - r2)) d = c; s = va[B[j+1]]; if ((c = SS[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; c = g; s = 4; if (c < d) { c = d; s = 1; } if (c < ie) { c = ie; s = 2; } DD[i][j] = s; SS[i][j] = c; HH[j] = d; } } midc = c; if ( L == 1 ) { EE[M][N] = c = SS[M][N] - ce - r2; FF[M][N] = 3; GG[N] = c - q1; ie = HH[N] = c - q2; va = v[C[1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; t = SS[M][j] - qr2; f = SS[M][j+1] + va[B[j+1]] - r1; if ( j == N-1 ) f -= bce; else f -= q1; c = f; s = 6; if ( c < t ) { c = t; s = 3; } if ( c < ie ) { c = ie; s = 2; } EE[M][j] = c; FF[M][j] = s; HH[j] = c - q2; GG[j] = c - q1; } for ( i = M - 1; i >= 0; i-- ) { t = SS[i][N] - qr2; if ((c = EE[i+1][N] - qr2) > (d = HH[N] - r2)) d = c; HH[N] = d; e = SS[i+1][N] + va[A[i+1]] - r1; if ( i == M-1 ) e -= ace; else e -= q1; c = e; s = 5; if ( c < d ) { c = d; s = 1; } if ( c < t ) { c = t; s = 3; } EE[i][N] = c; FF[i][N] = s; gp = GG[N]; GG[N] = c - q1; ie = c - q2; wa = v[A[i+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; t = SS[i][j] - qr2; if ((c = EE[i+1][j] - qr2) > (d = HH[j] - r2)) d = c; HH[j] = d; s = wa[B[j+1]]; if ((c = EE[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; f = SS[i][j+1] + (x1 = va[B[j+1]] ) - qr1; e = SS[i+1][j] + (x2 = va[A[i+1]] ) - qr1; c = SS[i+1][j+1] + s + x1 + x2; s = 7; if ( c < g ) { c = g; s = 4; } if ( c < e ) { c = e; s = 5; } if ( c < f ) { c = f; s = 6; } if ( c < d ) { c = d; s = 1; } if ( c < ie ) { c = ie; s = 2; } if ( c < t ) { c = t; s = 3; } EE[i][j] = c; FF[i][j] = s; } } c = EE[0][0]; s = FF[0][0]; t = SS[0][0] - cs - r2; if ( c < t ) { c = t; s = 3; } f = SS[0][1] + va[B[1]] - bcs - r1; if ( c < f ) { c = f; s = 6; } e = SS[1][0] + va[A[1]] - acs - r1; if ( c < e ) { c = e; s = 5; } midc = EE[0][0] = c; FF[0][0] = s; } i = j = 0; if ( L == 1 ) while ( 1 ) { s = FF[i][j]; switch(s) { case 1: case 5: i += 1; break; case 2: case 6: j += 1; break; case 4: case 7: i += 1; j += 1; break; case 3: break; } ADD(s); if ( s == 3 || s >= 5 ) break; } while ( s = DD[i][j] ) { switch(s) { case 1: i += 1; break; case 2: j += 1; break; case 4: i += 1; j += 1; break; } ADD(s); } return midc; } /* Divide: Find optimum midpoint (midi,midj,midk) of cost midc */ /* Forward pass */ midk = L/2; SS[0][0] = 0; DD[0][0] = -cs; EE[0][0] = -acs; FF[0][0] = -bcs; GG[0] = - q1; t = - q2; for ( j = 1; j <= N; j++ ) { SS[0][j] = t = t - r2; EE[0][j] = FF[0][j] = GG[j] = t - q1; DD[0][j] = HH[j] = t - q2; } t = - q2; for ( i = 1; i <= M; i++ ) { SS[i][0] = c = t = t - r2; DD[i][0] = ie = t - q2; gp = GG[0]; EE[i][0] = FF[i][0] = GG[0] = t - q1; va = v[A[i]]; for ( j = 1; j <= N; j++ ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = SS[i-1][j] - qr2) > (d = HH[j] - r2)) d = c; HH[j] = d; s = va[B[j]]; if ((c = SS[i-1][j-1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; c = g; if (c < ie) c = ie; if (c < d) c = d; SS[i][j] = c; DD[i][j] = c - q2; EE[i][j] = FF[i][j] = c - q1; } } for ( k = 1; k <= midk; k++ ) { if ((d = SS[0][0] - qr2) > (c = DD[0][0] - r2)) c = d; SP[0] = SS[0][0]; SS[0][0] = DD[0][0] = c; fp = FF[0][0]; EP[0] = EE[0][0]; EE[0][0] = FF[0][0] = GG[0] = c - q1; ie = HH[0] = c - q2; va = v[C[k]]; for ( j = 1; j <= N; j++ ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = SS[0][j] - qr2) > (d = DD[0][j] - r2)) d = c; DD[0][j] = d; s = va[B[j]]; if ((c = SP[j-1] + s - qr1) > (f = fp + s - r1)) f = c; fp = FF[0][j]; FF[0][j] = f; c = f; if ( c < d ) c = d; if ( c < ie ) c = ie; SP[j] = SS[0][j]; SS[0][j] = c; HH[j] = c - q2; EP[j] = EE[0][j]; EE[0][j] = GG[j] = c - q1; } for ( i = 1; i <= M; i++ ) { if ((c = SS[i][0] - qr2) > (d = DD[i][0] - r2)) d = c; DD[i][0] = d; if ((c = SS[i-1][0] - qr2) > (h = HH[0] - r2)) h = c; HH[0] = h; s = va[A[i]]; if ((c = SP[0] + s - qr1) > (e = EP[0] + s - r1)) e = c; EP[0] = EE[i][0]; EE[i][0] = e; c = e; if ( c < d ) c = d; if ( c < h ) c = h; spp = SP[0]; SP[0] = SS[i][0]; SS[i][0] = c; fp = FF[i][0]; gp = GG[0]; FF[i][0] = GG[0] = c - q1; ie = c - q2; wa = v[A[i]]; for ( j = 1; j <= N; j++ ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = SS[i][j] - qr2) > (d = DD[i][j] - r2)) d = c; DD[i][j] = d; if ((c = SS[i-1][j] - qr2) > (h = HH[j] - r2)) h = c; HH[j] = h; s = wa[B[j]]; if ((c = SS[i-1][j-1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; x1 = va[B[j]]; if ((c = SP[j-1] + x1 - qr1) > (f = fp + x1 - r1)) f = c; fp = FF[i][j]; FF[i][j] = f; x2 = va[A[i]]; if ((c = SP[j] + x2 - qr1) > (e = EP[j] + x2 - r1)) e = c; EP[j] = EE[i][j]; EE[i][j] = e; c = spp + s + x1 + x2; if ( c < ie ) c = ie; if ( c < d ) c = d; if ( c < h ) c = h; if ( c < g ) c = g; if ( c < f ) c = f; if ( c < e ) c = e; spp = SP[j]; SP[j] = SS[i][j]; SS[i][j] = c; } } } /* Reverse pass */ RR[M][N] = 0; TT[M][N] = -ce; UU[M][N] = -ace; VV[M][N] = -bce; GG[N] = - q1; t = - q2; for ( j = N - 1; j >= 0; j-- ) { RR[M][j] = t = t - r2; UU[M][j] = VV[M][j] = GG[j] = t - q1; TT[M][j] = HH[j] = t - q2; } t = - q2; for ( i = M - 1; i >= 0; i-- ) { RR[i][N] = c = t = t - r2; TT[i][N] = ie = t - q2; gp = GG[N]; UU[i][N] = VV[i][N] = GG[N] = t - q1; va = v[A[i+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = RR[i+1][j] - qr2) > (d = HH[j] - r2)) d = c; HH[j] = d; s = va[B[j+1]]; if ((c = RR[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; c = g; if (c < ie) c = ie; if (c < d) c = d; RR[i][j] = c; TT[i][j] = c - q2; UU[i][j] = VV[i][j] = c - q1; } } for ( k = L - 1; k >= midk; k-- ) { if ((d = RR[M][N] - qr2) > (c = TT[M][N] - r2)) c = d; SP[N] = RR[M][N]; RR[M][N] = TT[M][N] = c; fp = VV[M][N]; EP[N] = UU[M][N]; UU[M][N] = VV[M][N] = GG[N] = c - q1; ie = HH[N] = c - q2; va = v[C[k+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = RR[M][j] - qr2) > (d = TT[M][j] - r2)) d = c; TT[M][j] = d; s = va[B[j+1]]; if ((c = SP[j+1] + s - qr1) > (f = fp + s - r1)) f = c; fp = VV[M][j]; VV[M][j] = f; c = f; if ( c < d ) c = d; if ( c < ie ) c = ie; SP[j] = RR[M][j]; RR[M][j] = c; HH[j] = c - q2; EP[j] = UU[M][j]; UU[M][j] = GG[j] = c - q1; } for ( i = M - 1; i >= 0; i-- ) { if ((c = RR[i][N] - qr2) > (d = TT[i][N] - r2)) d = c; TT[i][N] = d; if ((c = RR[i+1][N] - qr2) > (h = HH[N] - r2)) h = c; HH[N] = h; s = va[A[i+1]]; if ((c = SP[N] + s - qr1) > (e = EP[N] + s - r1)) e = c; EP[N] = UU[i][N]; UU[i][N] = e; c = e; if ( c < d ) c = d; if ( c < h ) c = h; spp = SP[N]; SP[N] = RR[i][N]; RR[i][N] = c; fp = VV[i][N]; gp = GG[N]; VV[i][N] = GG[N] = c - q1; ie = c - q2; wa = v[A[i+1]]; for ( j = N - 1; j >= 0; j-- ) { if ((c = c - qr2) > (ie = ie - r2)) ie = c; if ((c = RR[i][j] - qr2) > (d = TT[i][j] - r2)) d = c; TT[i][j] = d; if ((c = RR[i+1][j] - qr2) > (h = HH[j] - r2)) h = c; HH[j] = h; s = wa[B[j+1]]; if ((c = RR[i+1][j+1] + s - qr1) > (g = gp + s - r1)) g = c; gp = GG[j]; GG[j] = g; x1 = va[B[j+1]]; if ((c = SP[j+1] + x1 - qr1) > (f = fp + x1 - r1)) f = c; fp = VV[i][j]; VV[i][j] = f; x2 = va[A[i+1]]; if ((c = SP[j] + x2 - qr1) > (e = EP[j] + x2 - r1)) e = c; EP[j] = UU[i][j]; UU[i][j] = e; c = spp + s + x1 + x2; if ( c < ie ) c = ie; if ( c < d ) c = d; if ( c < h ) c = h; if ( c < g ) c = g; if ( c < f ) c = f; if ( c < e ) c = e; spp = SP[j]; SP[j] = RR[i][j]; RR[i][j] = c; } } } /* Find optimal midpoint */ midc = SS[0][0]+RR[0][0]; midi = midj = 0; type = 1; for (i = 0; i <= M; i++) for (j = 0; j <= N; j++) if ((c = SS[i][j] + RR[i][j]) >= midc) if (c > midc || (SS[i][j] != EE[i][j] && RR[i][j] == UU[i][j]) || (SS[i][j] != FF[i][j] && RR[i][j] == VV[i][j]) || (SS[i][j] != DD[i][j] && RR[i][j] == TT[i][j])) { midc = c; midi = i; midj = j; } for (i = M-1; i >= 1; i-- ) for (j = N; j >= 0; j-- ) if ((c = EE[i][j] + UU[i][j] + q1) > midc) { midc = c; midi = i; midj = j; type = 3; } for (i = M; i >= 0; i-- ) for (j = N-1; j >= 1; j-- ) if ((c = FF[i][j] + VV[i][j] + q1) > midc) { midc = c; midi = i; midj = j; type = 4; } for (i = M; i >= 0; i-- ) for (j = N; j >= 0; j-- ) if ((c = DD[i][j] + TT[i][j] + q2) > midc) { midc = c; midi = i; midj = j; type = 2; } } /* Conquer: recursively around midpoint */ switch (type) { case 1: (void) global(A,B,C,midi,midj,midk,cs,q2,acs,q1,bcs,q1); (void) global(A+midi,B+midj,C+midk,M-midi,N-midj,L-midk,q2,ce,q1,ace,q1,bce); break; case 2: (void) global(A,B,C,midi,midj,midk-1,cs,zero,acs,q1,bcs,q1); ADD(3); /*delete c^midk c^midk+1*/ ADD(3); (void) global(A+midi,B+midj,C+midk+1,M-midi,N-midj,L-midk-1,zero,ce,q1,ace,q1,bce); break; case 3: (void) global(A,B,C,midi-1,midj,midk-1,cs,q2,acs,zero,bcs,q1); ADD(5); /*delete a^midi a^midi+1 - - c^midk c^midk+1*/ ADD(5); (void) global(A+midi+1,B+midj,C+midk+1,M-midi-1,N-midj,L-midk-1,q2,ce,zero,ace,q1,bce); break; case 4: (void) global(A,B,C,midi,midj-1,midk-1,cs,q2,acs,q1,bcs,zero); ADD(6); /*delete - - b^midj b^midj+1 c^midk c^midk+1*/ ADD(6); (void) global(A+midi,B+midj+1,C+midk+1,M-midi,N-midj-1,L-midk-1,q2,ce,q1,ace,zero,bce); break; } return midc; } /* Alignment display routine */ static char ALINE[51], BLINE[51], CLINE[51], DLINE[51], ELINE[51]; display(A,B,C,M,N,L,S,AP,BP,CP) char A[], B[], C[]; int M, N, L; int S[], AP, BP, CP; { register char *a, *b, *c, *d, *e; register int i, j, k, op; int lines, ap, bp, cp; i = j = k = lines = 0; ap = AP; bp = BP; cp = CP; a = ALINE; b = BLINE; c = CLINE; d = DLINE; e = ELINE; while ( i < M || j < N || k < L ) { op = *S++; switch (op) { case 1: *a++ = A[++i]; *b++ = ' '; *c++ = ' '; *d++ = '-'; *e++ = '-'; break; case 2: *a++ = ' '; *b++ = B[++j]; *c++ = ' '; *d++ = '-'; *e++ = '-'; break; case 3: *a++ = ' '; *b++ = ' '; *c++ = C[++k]; *d++ = '-'; *e++ = '-'; break; case 4: *a = A[++i]; *b = B[++j]; *c++ = ' '; *d++ = (*a++ == *b++) ? '|' : ' '; *e++ = '-'; break; case 5: *a = A[++i]; *b++ = ' '; *c = C[++k]; *d++ = (*a++ == *c++) ? '|' : ' '; *e++ = '-'; break; case 6: *a++ = ' '; *b = B[++j]; *c = C[++k]; *d++ = '-'; *e++ = (*b++ == *c++) ? '|' : ' '; break; case 7: *a = A[++i]; *b = B[++j]; *c = C[++k]; *d++ = (*a++ == *b) ? '|' : ' '; *e++ = (*b++ == *c++) ? '|' : ' '; break; } if (a >= ALINE+50 || i >= M && j >= N && k >= L) { *a = *b = *c = *d = *e = '\0'; (void) printf("\n%5d ",50*lines++); for (b = ALINE+10; b <= a; b += 10) (void) printf(" . :"); if (b <= a+5) (void) printf(" ."); (void) printf("\n%5d %s\n %s\n%5d %s\n %s\n%5d %s\n", ap,ALINE,DLINE,bp,BLINE,ELINE,cp,CLINE); ap = AP + i; bp = BP + j; cp = CP + k; a = ALINE; b = BLINE; c = CLINE; d = DLINE; e = ELINE; } } } /* lib.c - library of C procedures. */ /* fatal - print message and die */ fatal(msg) char *msg; { (void) fprintf(stderr, "%s\n", msg); exit(1); } /* fatalf - format message, print it, and die */ fatalf(msg, val) char *msg, *val; { (void) fprintf(stderr, msg, val); (void) putc('\n', stderr); exit(1); } /* ckopen - open file; check for success */ FILE *ckopen(name, mode) char *name, *mode; { FILE *fopen(), *fp; if ((fp = fopen(name, mode)) == NULL) fatalf("Cannot open %s.", name); return(fp); } /* ckalloc - allocate space; check for success */ char *ckalloc(amount) int amount; { char *malloc(), *p; if ((p = malloc( (unsigned) amount)) == NULL) fatal("Ran out of memory."); return(p); }