>From: ORANGE::EDU%"jkramer@MIAVAX.IR.MIAMI.EDU" "John Kramer" 22-AUG-1990 07:27 To: Don Gilbert Subj: Re: Molecular Weight from Sequence? Received: From PURCCVM(MAILER) by IUGOLD with Jnet id 1977 for GILBERTD@IUBACS; Wed, 22 Aug 90 07:33 EST Received: by PURCCVM (Mailer R2.07) id 7321; Wed, 22 Aug 90 07:22:24 EST Date: Wed, 22 Aug 90 08:18:11 EDT Reply-To: John Kramer Sender: "INFO-GCG: GCG Genetics Software Discussion" >From: John Kramer Subject: Re: Molecular Weight from Sequence? Comments: To: SYSJOHN%UTOROCI.BITNET@pucc.princeton.edu To: Don Gilbert The following is is standard c. It has been used on IBM PC's (MSC compiler) and several UNIX boxes (cc compiler). The expected file input format is FASTA (Bill Pearson's) but this is the source so it can be changed to whatever is nedded locally. This source is distributed FREE for anyone to use in anyway for anything. Jack Kramer Director, Mol Bio Computing University of Miami School of Medicine -----------------------------cut here----------------------------- /* AASTATS computes some basic statistics of a protein based on the relative abundance of each amino acid. The primary purpose is to provide an estimate of the partial specific volume of the protein by an averaging method to aid in the calculation of band migration in the ultra centrifuge. Since other values which may also be useful were computed as intermediates such as mole and weight percents they are included in the output for each amino acid and as a total for the peptide as a check.*/ /* references */ /* Conformation TIBS (1987) vol 12 pp 37- */ /* Mass CRC Handbook of Chem and Phys (1961) */ /* Vol Prog. Biophys. Mol Biol (1972) vol 24 pp 107-123 */ /* Vbar Prog. Biophys. Mol Biol (1972) vol 24 pp 107-123 */ /* CHP JBC (1984) vol 259, no.5, pp 2910-2912 */ /* K&D Ann. Rev. Biophys. Biophys. Chem (1986) 15:321-353 */ /* E,S&G J. Mol. Biol. (1982) 157:105-132 */ /* size BB Acta (1971) vol 259, pp 557-566 */ #include #include #define MAXAA 10000 #define MAXSTRING 255 #define MASS 6 #define VOL 7 #define VBAR 8 struct comment { char cmnt[81]; struct comment *nxtcmnt; }; struct statrec { int aa; char code; char name[3]; int cumnum; float cumwt,cumvol,cumvbar,numpcnt,wtpcnt,volpcnt,vbarpcnt; }; int i,j,k; int aapos,aaposmax,aaindex; struct comment *firstcomment, *lastcomment, *nextcomment; char anystring[MAXSTRING]; char aacode; char aaary[MAXAA]; FILE *infile; char infilename[40]; float meanvbar; char aacodeset[] = {'A','C','D','E','F','G','H','I','K','L', 'M','N','P','Q','R','S','T','V','W','Y','X','\0'}; float aadata[21][11] = /* chp, kd, esg,helix,sheet, turn, mass, vol, Vbar, size, test /* aa */ { { 0.00, 0.0, 0.0, 0.00, 0.00, 0.00, 0.00, 0.0, 0.000, 0.0, 0 },/* unk X */ { -0.48, 1.8, 1.6, 1.42, 0.83, 0.66, 71.08, 88.6, 0.748, 22.7, 1 },/* ala A */ { -0.32, 2.5, 2.0, 0.70, 1.19, 1.19, 103.14, 108.5, 0.631, 41.5, 2 },/* cys C */ { -0.75, -3.5, -9.2, 1.01, 0.54, 1.46, 115.09, 111.1, 0.579, 46.5, 3 },/* asp D */ { -0.71, -3.5, -8.2, 1.51, 0.37, 0.74, 129.12, 138.4, 0.643, 63.5, 4 },/* glu E */ { 1.03, 2.8, 3.7, 1.13, 1.38, 0.60, 147.18, 189.9, 0.774, 92.1, 5 },/* phe F */ { 0.00, -0.4, 1.0, 0.57, 0.75, 1.56, 57.06, 60.1, 0.632, 5.7, 6 },/* gly G */ { -0.51, -3.2, -3.0, 1.00, 0.87, 0.95, 137.15, 153.2, 0.670, 74.7, 7 },/* his H */ { 0.81, 4.5, 3.1, 1.08, 1.60, 0.47, 113.17, 166.7, 0.884, 73.7, 8 },/* ile I */ { -0.09, -3.9, -8.8, 1.16, 0.74, 1.01, 128.18, 168.6, 0.789, 79.5, 9 },/* lys K */ { 1.02, 3.8, 2.8, 1.21, 1.30, 0.59, 113.17, 166.7, 0.884, 73.7, 10 },/* leu L */ { 0.81, 1.9, 3.4, 1.45, 1.05, 0.60, 131.21, 162.9, 0.745, 74.6, 11 },/* met M */ { -0.87, -3.5, -4.8, 0.67, 0.89, 1.56, 114.11, 117.7, 0.619, 54.0, 12 },/* asn N */ { 2.03, -1.6, -0.2, 0.57, 0.55, 1.52, 97.12, 122.7, 0.758, 45.3, 13 },/* pro P */ { -0.32, -3.5, -4.1, 1.11, 1.10, 0.98, 128.14, 143.9, 0.674, 71.0, 14 },/* gln Q */ { -0.06, -4.5,-12.3, 0.98, 0.93, 0.95, 156.2, 173.4, 0.666, 100.4, 15 },/* arg R */ { 0.05, -0.8, 0.6, 0.77, 0.75, 1.43, 87.08, 89.0, 0.613, 30.4, 16 },/* ser S */ { -0.35, -0.7, 1.2, 0.83, 1.19, 0.96, 101.11, 116.1, 0.689, 47.4, 17 },/* thr T */ { 0.56, 4.2, 2.6, 1.06, 1.70, 0.50, 99.14, 140.0, 0.847, 56.7, 18 },/* val V */ { 0.66, -0.9, 1.9, 1.08, 1.37, 0.96, 186.21, 227.8, 0.734, 120.7, 19 },/* trp W */ { 1.24, -1.3, -0.3, 0.69, 1.47, 1.14, 163.18, 193.6, 0.712, 100.2, 20 } /* tyr Y */ }; struct statrec aastats[] = /*aaindex,code,name,number,weight,vol,vbar*/ { { 0,'X',"unk",0,0.0,0.0,0.0}, { 1,'A',"ala",0,0.0,0.0,0.0}, { 2,'C',"cys",0,0.0,0.0,0.0}, { 3,'D',"asp",0,0.0,0.0,0.0}, { 4,'E',"glu",0,0.0,0.0,0.0}, { 5,'F',"phe",0,0.0,0.0,0.0}, { 6,'G',"gly",0,0.0,0.0,0.0}, { 7,'H',"his",0,0.0,0.0,0.0}, { 8,'I',"ile",0,0.0,0.0,0.0}, { 9,'K',"lys",0,0.0,0.0,0.0}, {10,'L',"leu",0,0.0,0.0,0.0}, {11,'M',"met",0,0.0,0.0,0.0}, {12,'N',"asn",0,0.0,0.0,0.0}, {13,'P',"pro",0,0.0,0.0,0.0}, {14,'Q',"gln",0,0.0,0.0,0.0}, {15,'R',"arg",0,0.0,0.0,0.0}, {16,'S',"ser",0,0.0,0.0,0.0}, {17,'T',"thr",0,0.0,0.0,0.0}, {18,'V',"val",0,0.0,0.0,0.0}, {19,'W',"trp",0,0.0,0.0,0.0}, {20,'Y',"tyr",0,0.0,0.0,0.0}, {21,' ',"tot",0,0.0,0.0,0.0} }; main() { do { fprintf(stderr,"enter sequence filename: "); scanf("%s",infilename); } while ((infile = fopen(infilename,"r")) == NULL); aapos = 0; firstcomment = NULL; while (feof(infile) == 0) { aacode = fgetc(infile);putc(aacode,stderr); if (aacode == '>' || aacode == ';') { fgets(anystring,MAXSTRING,infile); fputs(anystring,stderr); if((nextcomment = (struct comment *)malloc(sizeof(struct comment))) == NULL) { printf("too many comments -- aborting"); fclose(infile); exit(1); } sprintf(nextcomment->cmnt,"%c%s",aacode,anystring); if (firstcomment ==NULL) { firstcomment = nextcomment; lastcomment = nextcomment; lastcomment->nxtcmnt =NULL; } else { lastcomment->nxtcmnt = nextcomment; lastcomment = nextcomment; lastcomment->nxtcmnt =NULL; }; } aacode = toupper(aacode); if (strchr(aacodeset,aacode) != NULL) { for(aaindex = 0;aastats[aaindex].code != aacode;aaindex++); aapos = aapos + 1; aaary[aapos]=aacode; aastats[aaindex].cumnum += 1; aastats[21].cumnum += 1; aastats[aaindex].cumwt += aadata[aaindex][MASS]; aastats[21].cumwt += aadata[aaindex][MASS]; aastats[aaindex].cumvol += aadata[aaindex][VOL]; aastats[21].cumvol += aadata[aaindex][VOL]; aastats[aaindex].cumvbar += aadata[aaindex][VBAR]; aastats[21].cumvbar += aadata[aaindex][VBAR]; } /*if*/ };/*while*/ aaposmax = aapos; fclose(infile); printf("\n\n\n\n\n Polypeptide Amino Acid Characteristic Statistics\n\n"); nextcomment = firstcomment; while (nextcomment != NULL) { printf(" %s",nextcomment->cmnt); nextcomment = nextcomment->nxtcmnt; }; i = 1; while (i <= aaposmax) { printf("%10d ",i); for(j=1;j<=6;++j) { for(k=1;k<=10;++k) { if(i <= aaposmax) putchar(aaary[i]); ++i; }; /*for*/ putchar(' '); }; /*for*/ putchar('\n'); }; /*while*/ printf("\n\n name # wt vol vbar "); printf("mol%% wt%% vol%% vbar%%\n\n"); for(aaindex=1;aaindex<=21;++aaindex) { aastats[aaindex].numpcnt = (float)aastats[aaindex].cumnum/aastats[21].cumnum*100.0; aastats[aaindex].wtpcnt = aastats[aaindex].cumwt/aastats[21].cumwt*100.0; aastats[aaindex].volpcnt = aastats[aaindex].cumvol/aastats[21].cumvol*100.0; aastats[aaindex].vbarpcnt = aastats[aaindex].cumvbar/aastats[21].cumvbar*100.0; printf("%5.3s%5d%10.2f%10.2f%10.2f%8.2f%8.2f%8.2f%8.2f\n", aastats[aaindex].name,aastats[aaindex].cumnum, aastats[aaindex].cumwt,aastats[aaindex].cumvol, aastats[aaindex].cumvbar,aastats[aaindex].numpcnt, aastats[aaindex].wtpcnt,aastats[aaindex].volpcnt, aastats[aaindex].vbarpcnt); };/*for*/ meanvbar = aastats[21].cumvbar/aastats[21].cumnum; printf("\n vbar of protein: %5.4f\n\014",meanvbar); }/* end program aastats */