/* Ne.c, Copyright, Ziheng Yang, September 1996. Estimation of Ne using the methods of Takahata (1995. TPB 48:198-221) and Yang (1997. Genet Res Cam 69: 111-116) cc -o Ne Ne.c -lm */ #include #include #include #include #define square(a) ((a)*(a)) #define FOR(i,n) for(i=0; i(b)?(a):(b)) double lfun_1s (double x[], int nx); double lfun_2s (double x[], int nx); void error (char * message); int matinv (double x[], int n, int m, double space[]); int Hessian (int nx, double x[], double f, double g[], double H[], double (*fun)(double x[], int n), double space[]); int ming2 (FILE *fout, double *f, double (*fun)(double x[], int n), int (*dfun)(double x[], double *f, double dx[], int n), double x[], double xb[][2], double space[], double e, int n); int DiscreteGamma (double freqK[], double rK[], double alfa, double beta, int K, int median); double PointChi2 (double prob, double v); #define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta)) double PointNormal (double prob); double CDFNormal (double x); double LnGamma (double alpha); double IncompleteGamma (double x, double alpha, double ln_gamma_alpha); #define NCATG 16 static int *ni, *ki, nloci; double *ri, mutau; struct CommonInf { double alfa, freqK[NCATG], rK[NCATG]; } com; int main (int argc, char* argv[]) { char infile[64]="Ne.dat"; FILE *fin; int np=1, ns, i,j1,j2; double space[1000], *var=space+10, (*plfun)(double x[],int np)=lfun_1s; double lnL, x[2], xb[][2]={{1e-5,.05}, {1e-5,.05}}; if (argc>1) strcpy(infile, argv[1]); if ((fin=fopen(infile,"r"))==NULL) error("infile err"); fscanf (fin, "%d", &nloci); printf ("\n%d loci.\nNumber of species (1 or 2)? ", nloci); scanf ("%d", &ns); printf ("alpha for rate variation among loci?\n"); printf ("\t(0: constant; -1: rates in file; positive: gamma model)\n"); scanf ("%lf", &com.alfa); if (com.alfa<0 && com.alfa!=-1) error ("alpha<0"); ri=(double*)malloc(nloci*(sizeof(double)+2*sizeof(int))); if (ri==NULL) error("oom"); ni=(int*)(ri+nloci); ki=ni+nloci; for (i=0; i0) DiscreteGamma (com.freqK,com.rK,com.alfa,com.alfa,NCATG,0); for (j1=0; j1<5; j1++) { printf ("\n\nRun %d out of 5 (Ctrl-C to break):\n", j1+1); printf ("Initial%s for theta%s%s", (np==2?"s":""),(ns==2?"0":""),(np==2?" and gamma":"")); printf (" (between %.5f and %.5f)? ", xb[0][0],xb[0][1]); FOR (i,np) scanf("%lf", &x[i]); printf ("\nlnL0=%9.4f\n", -plfun(x,np)); ming2(F0, &lnL, plfun, NULL, x, xb, space, 1e-7, np); Hessian (np, x, lnL, space, var, plfun, var+np*np); matinv (var, np, np, var+np*np); var[0]=(var[0]>0?sqrt(var[0]):0); if (np==2) var[3]=(var[3]>0?sqrt(var[3]):0); printf ("\n\nlnL =%9.4f at\n", -lnL); printf ("theta%s: %9.5f +- %9.5f\n", (ns==2?"0":" "), x[0],var[0]); if (np==2) printf ("gamma : %9.5f +- %9.5f\n", x[1],var[3]); } free (ri); return (0); } double lfun_1s (double x[], int nx) { int i, ir; double lnL, theta=x[0], a, plocus; if (com.alfa==0) /* one rate for all loci */ for (i=0,lnL=0; i0) /* gamma random rates for loci */ for (i=0,lnL=0; i0) /* gamma random rates for loci */ for (i=0,lnL=0; ilimit) return (invers?0:1); if (x0, accurate to 10 decimal places. Stirling's formula is used for the central polynomial part of the procedure. Pike MC & Hill ID (1966) Algorithm 291: Logarithm of the gamma function. Communications of the Association for Computing Machinery, 9:684 */ double x=alpha, f=0, z; if (x<7) { f=1; z=x-1; while (++z<7) f*=z; x=z; f=-log(f); } z = 1/(x*x); return f + (x-0.5)*log(x) - x + .918938533204673 + (((-.000595238095238*z+.000793650793651)*z-.002777777777778)*z +.083333333333333)/x; } double IncompleteGamma (double x, double alpha, double ln_gamma_alpha) { /* returns the incomplete gamma ratio I(x,alpha) where x is the upper limit of the integration and alpha is the shape parameter. returns (-1) if in error ln_gamma_alpha = ln(Gamma(alpha)), is almost redundant. (1) series expansion if (alpha>x || x<=1) (2) continued fraction otherwise RATNEST FORTRAN by Bhattacharjee GP (1970) The incomplete gamma integral. Applied Statistics, 19: 285-287 (AS32) */ int i; double p=alpha, g=ln_gamma_alpha; double accurate=1e-8, overflow=1e30; double factor, gin=0, rn=0, a=0,b=0,an=0,dif=0, term=0, pn[6]; if (x==0) return (0); if (x<0 || p<=0) return (-1); factor=exp(p*log(x)-x-g); if (x>1 && x>=p) goto l30; /* (1) series expansion */ gin=1; term=1; rn=p; l20: rn++; term*=x/rn; gin+=term; if (term > accurate) goto l20; gin*=factor/p; goto l50; l30: /* (2) continued fraction */ a=1-p; b=a+x+1; term=0; pn[0]=1; pn[1]=x; pn[2]=x+1; pn[3]=x*b; gin=pn[2]/pn[3]; l32: a++; b+=2; term++; an=a*term; for (i=0; i<2; i++) pn[i+4]=b*pn[i+2]-an*pn[i]; if (pn[5] == 0) goto l35; rn=pn[4]/pn[5]; dif=fabs(gin-rn); if (dif>accurate) goto l34; if (dif<=accurate*rn) goto l42; l34: gin=rn; l35: for (i=0; i<4; i++) pn[i]=pn[i+2]; if (fabs(pn[4]) < overflow) goto l32; for (i=0; i<4; i++) pn[i]/=overflow; goto l32; l42: gin=1-factor*gin; l50: return (gin); } double PointChi2 (double prob, double v) { /* returns z so that Prob{x.999998 || v<=0) return (-1); g = LnGamma (v/2); xx=v/2; c=xx-1; if (v >= -1.24*log(p)) goto l1; ch=pow((p*xx*exp(g+xx*aa)), 1/xx); if (ch-e<0) return (ch); goto l4; l1: if (v>.32) goto l3; ch=0.4; a=log(1-p); l2: q=ch; p1=1+ch*(4.67+ch); p2=ch*(6.73+ch*(6.66+ch)); t=-0.5+(4.67+2*ch)/p1 - (6.73+ch*(13.32+3*ch))/p2; ch-=(1-exp(a+g+.5*ch+c*aa)*p2/p1)/t; if (fabs(q/ch-1)-.01 <= 0) goto l4; else goto l2; l3: x=PointNormal (p); p1=0.222222/v; ch=v*pow((x*sqrt(p1)+1-p1), 3.0); if (ch>2.2*v+6) ch=-2*(log(1-p)-c*log(.5*ch)+g); l4: q=ch; p1=.5*ch; if ((t=IncompleteGamma (p1, xx, g))<0) { printf ("\nerr IncompleteGamma"); return (-1); } p2=p-t; t=p2*exp(xx*aa+g+p1-c*log(ch)); b=t/ch; a=0.5*t-b*c; s1=(210+a*(140+a*(105+a*(84+a*(70+60*a))))) / 420; s2=(420+a*(735+a*(966+a*(1141+1278*a))))/2520; s3=(210+a*(462+a*(707+932*a)))/2520; s4=(252+a*(672+1182*a)+c*(294+a*(889+1740*a)))/5040; s5=(84+264*a+c*(175+606*a))/2520; s6=(120+c*(346+127*c))/5040; ch+=t*(1+0.5*t*s1-b*c*(s1-b*(s2-b*(s3-b*(s4-b*(s5-b*s6)))))); if (fabs(q/ch-1) > e) goto l4; return (ch); } int DiscreteGamma (double freqK[], double rK[], double alfa, double beta, int K, int median) { /* discretization of gamma distribution with equal proportions in each category */ int i; double gap05=1.0/(2.0*K), t, factor=alfa/beta*K, lnga1; if (median) { for (i=0; i=n */ register int i,j,k; int *irow=(int*) space; double ee=1.0e-30, t,t1,xmax; double det=1.0; FOR (i,n) { xmax = 0.; for (j=i; j=0; i--) { if (irow[i] == i) continue; FOR(j,n) { t = x[j*m+i]; x[j*m+i] = x[j*m + irow[i]]; x[j*m + irow[i]] = t; } } return (0); } /****************************************** Minimization *******************************************/ int H_end (double x0[], double x1[], double f0, double f1, double e1, double e2, int n) /* Himmelblau termination rule. return 1 for stop, 0 otherwise. */ { double r; if ( (r=norm(x0,n))= r) return (0); r=fabs(f0); if (r= r) return (0); return (1); } int gradient (int n, double x[], double f0, double g[], double (*fun)(double x[],int n), double space[], int Central) { /* f0=fun(x) is given for Central=0 */ int i,j; double *x0=space, *x1=space+n, eh0=1e-7, eh; if (Central) { FOR(i,n) { FOR(j, n) x0[j]=x1[j]=x[j]; eh=pow(eh0*(fabs(x[i])+1), 0.67); x0[i]-=eh; x1[i]+=eh; g[i] = ((*fun)(x1,n) - (*fun)(x0,n))/(eh*2.0); } } else { FOR(i,n) { FOR(j, n) x1[j]=x[j]; eh=eh0*(fabs(x[i])+1); x1[i]+=eh; g[i] = ((*fun)(x1,n)-f0)/eh; } } return(0); } int Hessian (int n, double x[], double f0, double g[], double H[], double (*fun)(double x[], int n), double space[]) { /* Hessian matrix H[n][n] by the central difference method # of function calls: (1+2*n*n) */ int i,j,k; double *x1=space, eh0=7e-6, *eh=x1+n, f11,f22,f12,f21; /* 1:+ 2:- */ /* eh0=1e-5 or 1e-6 */ FOR (k,n) eh[k] = eh0*(1+fabs(x[k])); FOR (i,n) { for (j=i; j=0 */ if (noisy>2) printf ("\n%4d h-lim-p%9.4f%9.4f%9.4f", Iround+1, h, limit, norm(p,n)); if (h<=0 || limit=limit) { if (noisy>2) printf ("\nh-lim-p:%20.8e%20.8e%20.8e%12.6f\n",h,limit,norm(p,n),*f); return (0); } a0=a1=0; f1=f0=*f; a2=a0+h; f2=fun_ls(a2, fun,x0,p,x,n); if (f2>f1) { for (; ;) { h/=factor; if (hlimit) h=limit; a3=a0+h; f3=fun_ls(a3, fun,x0,p,x,n); if (f3>=f2) break; a1=a2; f1=f2; a2=a3; f2=f3; if (h>=limit) { if (noisy>2) printf("%14.6f%3c%7.4f%6d", *f=f3, 'm', a3, NFunCall); *f=f3; return(a3); } } } a4=(a2+a3)/2; f4=fun_ls(a4, fun,x0,p,x,n); if (f4a2 || a3f1 || f2>f3) { puts ("\n? in ls2!"); break; } a4 = 2*((a2-a3)*f1+(a3-a1)*f2+(a1-a2)*f3); if (a4>small*.01) a4 = ((a2*a2-a3*a3)*f1+(a3*a3-a1*a1)*f2+(a1*a1-a2*a2)*f3)/a4; if (a4>a3 || a4=e2 && fabs(f2-f4)<=e1*fabs(f2)) || (fabs(f2).2*fabs(a1-a2)) { if (f1>=f4 && f4<=f2) { a3=a2; a2=a4; f3=f2; f2=f4; } else { a1=a4; f1=f4; } } else { if (f4>f2) { a5=(a2+a3)/2; f5=fun_ls(a5, fun,x0,p,x,n); if (f5>f2) { a1=a4; a3=a5; f1=f4; f3=f5; } else { a1=a2; a2=a5; f1=f2; f2=f5; } } else { a5=(a1+a4)/2; f5=fun_ls(a5, fun,x0,p,x,n); if (f5>=f4 && f4<=f2) { a3=a2; a2=a4; a1=a5; f3=f2; f2=f4; f1=f5; } else { a6=(a1+a5)/2; f6=fun_ls(a6, fun,x0,p,x,n); if (f6>f5) { a1=a6; a2=a5; a3=a4; f1=f6; f2=f5; f3=f4; } else { a2=a6; a3=a5; f2=f6; f3=f5; } } } } } else { /* fig 2.2.9 */ if (fabs(a2-a4)>.2*fabs(a2-a3)) { if (f2>=f4 && f4<=f3) { a1=a2; a2=a4; f1=f2; f2=f4; } else { a3=a4; f3=f4; } } else { if (f4>f2) { a5=(a1+a2)/2; f5=fun_ls(a5, fun,x0,p,x,n); if (f5>f2) { a1=a5; a3=a4; f1=f5; f3=f4; } else { a3=a2; a2=a5; f3=f2; f2=f5; } } else { a5=(a3+a4)/2; f5=fun_ls(a5, fun,x0,p,x,n); if (f2>=f4 && f4<=f5) { a1=a2; a2=a4; a3=a5; f1=f2; f2=f4; f3=f5; } else { a6=(a3+a5)/2; f6=fun_ls(a6, fun,x0,p,x,n); if (f6>f5) { a1=a4; a2=a5; a3=a6; f1=f4; f2=f5; f3=f6; } else { a1=a5; a2=a6; f1=f5; f2=f6; } } } } } } if (f2>f0 && f4>f0) a4=0; if (f2<=f4) { *f=f2; a4=a2; } else *f=f4; if (noisy>2) printf ("%14.6f%3d%7.4f%6d", *f, ii, a4, NFunCall); return (a4); } int gradient2 (int n, double x[], double f0, double g[], double (*fun)(double x[],int n), double space[], int xmark[]); extern int noisy, Iround, NFunCall; extern double SIZEp; int gradient2 (int n, double x[], double f0, double g[], double (*fun)(double x[],int n), double space[], int xmark[]) { /* f0=fun(x) is provided. xmark=0: central; 1: upper; -1: down */ int i,j; double *x0=space, *x1=space+n, eh0=1e-7, eh; FOR (i,n) { eh=eh0*(fabs(x[i])+1); if (xmark[i]==0 && SIZEp<1) { /* central */ FOR (j, n) x0[j]=x1[j]=x[j]; eh=pow(eh,.67); x0[i]-=eh; x1[i]+=eh; g[i]=((*fun)(x1,n)-(*fun)(x0,n))/(eh*2.0); } else { /* forward */ FOR (j, n) x1[j]=x[j]; if (xmark[i]) eh*=-xmark[i]; x1[i] += eh; g[i]=((*fun)(x1,n)-f0)/eh; } } return(0); } int ming2 (FILE *fout, double *f, double (*fun)(double x[], int n), int (*dfun)(double x[], double *f, double dx[], int n), double x[], double xb[][2], double space[], double e, int n) { /* n-D minimization with bounds using the BFGS algorithm g0[n] g[n] p[n] x[n] y[n] s[n] z[n] H[n*n] tv[2*n] using bound() */ int i,j, i1,i2,it, maxround=1000, fail=0, *xmark, *ix, nfr=n; double small=1.e-20; /* small value for checking |w|=0 */ double f0, *g0, *g, *p, *x0, *y, *s, *z, *H, *C, *tv; double w,v, alfa, am, h; f0 = *f = (*fun)(x,n); if (noisy>2) { printf ("\n\nIterating by ming2\nInitial: fx= %12.6f\nx=", f0); FOR (i,n) printf ("%8.4f", x[i]); FPN (F0); } g0=space; g=g0+n; p=g+n; x0=p+n; y=x0+n; s=y+n; z=s+n; H=z+n; C=H+n*n, tv=C+n*n; xmark=(int*)(tv+2*n); ix=xmark+n; FOR (i,n) { xmark[i]=0; ix[i]=i; } xtoy (x, x0, n); if (dfun) (*dfun) (x0, &f0, g0, n); else gradient2 (n, x0, f0, g0, fun, tv, xmark); SIZEp=0; identity (H,nfr); FOR (Iround, maxround) { for (i=0,zero(p,n); i0 && (xb[i][1]-x0[i])/p[i].1 && noisy>2) printf("\nSIZEp:%9.4f Iround:%5d", SIZEp, Iround+1); Iround=maxround; break; } else { if(noisy>2) printf("\a.. "); identity(H,nfr); fail=1; } } else { fail=0; FOR(i,n) x[i]=x0[i]+alfa*p[i]; if (fout) { fprintf (fout, "\n%3d %7.4f%14.6f x", Iround+1, SIZEp, *f); FOR (i,n) fprintf (fout, "%8.5f ", x[i]); fflush (fout); } if (SIZEp<0.0001 && H_end (x0,x,f0,*f,e,e,n)) { xtoy(x,x0,n); break; } } if (dfun) (*dfun) (x, f, g, n); else gradient2 (n, x, *f, g, fun, tv, xmark); /* modify the working set */ FOR (i, n) { /* add constraints, reduce H */ if (xmark[i]) continue; if (fabs(x[i]-xb[i][0])<1e-6 && -g[i]<0) xmark[i]=-1; else if (fabs(x[i]-xb[i][1])<1e-6 && -g[i]>0) xmark[i]=1; if (xmark[i]==0) continue; xtoy (H, C, nfr*nfr); FOR (it, nfr) if (ix[it]==i) break; for (i1=it; i1=it))*(nfr+1) + i2+(i2>=it)]; } for (i=0,it=0,w=0; iw) { it=i; w=-g[i]; } else if (xmark[i]==1 && -g[i]<-w) { it=i; w=g[i]; } } if (w>10*SIZEp/nfr) { /* *** */ xtoy (H, C, nfr*nfr); FOR (i1,nfr) FOR (i2,nfr) H[i1*(nfr+1)+i2]=C[i1*nfr+i2]; FOR (i1,nfr+1) H[i1*(nfr+1)+nfr]=H[nfr*(nfr+1)+i1]=0; H[(nfr+1)*(nfr+1)-1]=1; xmark[it]=0; ix[nfr++]=it; } if (noisy>2) { printf (" |%3d free /%d:", nfr, n); /* FOR (i,n) if (xmark[i]) printf ("%4d", i+1); */ } for (i=0,f0=*f; i