Tuesday, February 20, 2024

Fitting an ordered discrete character evolution (Mk) model where the rates of change between adjacent levels vary according to a functional form

Yesterday, I posted (1, 2) about fitting an ordered, discrete character evolution (Mk) model in which the rates of change between adjacent states was vary according to a functional form.

I now have an R function for this, that will probably eventually be added to phytools. I’m still working on it, but this is what I have so far – including print, plot, and logLik methods. This code will change before these functions go into the package!

fitFnMk<-function(tree,x,model="quadratic",...){
	if(hasArg(trace)) trace<-list(...)$trace
	else trace<-0
	if(is.matrix(x)){
		levs<-colnames(x)
	} else if(is.numeric(x)){
		levs<-min(x):max(x)
		x<-to.matrix(x,levs)
	} else if(is.factor(x)){
		if(suppressWarnings(all(!is.na(as.numeric(levels(x)))))){
			levs<-min(as.numeric(levels(x))):max(as.numeric(levels(x)))
			x<-to.matrix(x,levs)
		} else {
			levs<-sort(levels(x))
			x<-to.matrix(x,levs)
		}
	} else if(is.character(x)){
		if(suppressWarnings(all(!is.na(as.numeric(x))))){
			levs<-min(as.numeric(x)):max(as.numeric(x))
			x<-to.matrix(x,levs)
		} else {
			levs<-sort(unique(x))
			x<-to.matrix(x,levs)
		}
	}
	k<-ncol(x)	
	if(hasArg(pi)) pi<-list(...)$pi
	else pi<-"equal"
	if(is.numeric(pi)) root.prior<-"given"
	if(pi[1]=="equal"){ 
		pi<-setNames(rep(1/k,k),levs)
		root.prior<-"flat"
	} else if(pi[1]=="fitzjohn"){ 
		root.prior<-"nuisance"
	} else if(pi[1]=="mle") root.prior<-"it's MLE"	
	lik<-function(par,pw,X,pi=pi){
		k<-ncol(X)
		x<-1:(k-1)-0.5
		q1<-par[1]*x^2+par[2]*x+par[3]
		q2<-par[4]*x^2+par[5]*x+par[6]
		q1[q1<0]<-0
		q2[q2<0]<-0
		if(all(q1<0)||all(q2<0)){ 
			return(Inf)
		} else {
			MODEL<-matrix(0,k,k,dimnames=list(colnames(X),colnames(X)))
			MODEL[cbind(1:(k-1),2:k)]<-1:(k-1)
			MODEL[cbind(2:k,1:(k-1))]<-k:(2*k-2)
			return(-pruning(c(q1,q2),pw,X,model=MODEL,pi=pi))
		}
	}
	pw<-reorder(tree,"postorder")
	q1_start<-q2_start<--1
	while(any(q1_start<0)||any(q2_start<0)){
		xx<-0:(k-2)+0.5
		start<-runif(n=6,)
		q1_start<-start[1]*xx^2-start[2]*xx+start[3]
		q2_start<-start[4]*xx^2+start[5]*xx+start[6]
	}
	fit<-nlminb(start,lik,pw=pw,X=x,pi=pi,control=list(trace=trace))
	q1_est<-fit$par[1]*xx^2+fit$par[2]*xx+fit$par[3]
	q2_est<-fit$par[4]*xx^2+fit$par[5]*xx+fit$par[6]
	q1_est[q1_est<0]<-0
	q2_est[q2_est<0]<-0
	index.matrix<-matrix(0,k,k,dimnames=list(colnames(x),colnames(x)))
	index.matrix[cbind(1:(k-1),2:k)]<-1:(k-1)
	index.matrix[cbind(2:k,1:(k-1))]<-k:(2*k-2)
	lik.f<-function(par) lik(par,pw=pw,X=x,pi=pi)
	object<-list(
		logLik=-fit$objective,
		rates=c(q1_est,q2_est),
		index.matrix=index.matrix,
		states=levs,
		pi=pi,
		method="nlminb",
		root.prior=root.prior,
		opt_results=fit[c("convergence","iterations","evaluations","message")],
		par=fit$par,
		data=x,
		tree=tree,
		lik=lik.f)
	class(object)<-c("fitFnMk","fitMk")
	object
}

plot.fitFnMk<-function(x,...){
	k<-length(x$states)
	q1<-x$rates[1:(k-1)]
	q2<-x$rates[k:(2*k-2)]
	xx<-0:(k-2)+0.5
	plot(xx,q1,type="b",col="blue",bty="n",las=1,
		axes=FALSE,xlab="",ylab="transition rate (q)",
		ylim=c(0,max(c(q1,q2))))
	lines(xx,q2,type="b",col="red")
	axis(1,at=seq(0.5,k-1.5,by=1),paste(x$states[1:(k-1)],"↔",
		x$states[2:k]),cex.axis=0.7,las=3)
	axis(2,las=1,cex.axis=0.8)
	grid()
	legend("bottomleft",c("forward","backward"),
		col=c("blue","red"),
		lty="solid",pch=1,cex=0.8)
}

logLik.fitFnMk<-function(object,...){
	lik<-object$logLik
	attr(lik,"df")<-length(object$par)
	lik
}

## print method for objects of class "fitMk"
print.fitFnMk<-function(x,digits=6,...){
	cat("Object of class \"fitFnMk\".\n\n")
	cat("Fitted (or set) value of Q:\n")
	Q<-matrix(NA,length(x$states),length(x$states))
	Q[]<-c(0,x$rates)[x$index.matrix+1]
	diag(Q)<-0
	diag(Q)<--rowSums(Q)
	colnames(Q)<-rownames(Q)<-x$states
	print(round(Q,digits))
	cat("\nFitted (or set) value of pi:\n")
	print(round(x$pi,digits))
	cat(paste("due to treating the root prior as (a) ",x$root.prior,".\n",
		sep=""))
	cat(paste("\nLog-likelihood:",round(x$logLik,digits),"\n"))
	cat(paste("\nOptimization method used was \"",x$method,"\"\n\n",
		sep=""))
	if(!is.null(x$opt_results$convergence)){
		if(x$opt_results$convergence==0) 
			cat("R thinks it has found the ML solution.\n\n")
		else cat("R thinks optimization may not have converged.\n\n")
	}
}

Let’s try it out.

Here, I’m going to simulate backward & forward rates that are both upward parabolic, but with different centers & different curvature.

k<-13
x<-0:(k-2)+0.5
q1<-0.4*x^2-5.6*x+19.7
q2<-0.3*x^2-3*x+7.9
par(mar=c(2.1,4.1,2.1,1.1))
plot(x,q1,type="b",col="blue",bty="n",las=1,
  axes=FALSE,xlab="",ylab="transition rate (q)",
  ylim=c(0,max(c(q1,q2))))
lines(x,q2,type="b",col="red")
axis(1,at=seq(0.5,k-1.5,by=1),paste(0:(k-2),"↔",1:(k-1)),
  cex.axis=0.7)
axis(2,las=1,cex.axis=0.8)
grid()
legend("bottomleft",c("forward","backward"),
  col=c("blue","red"),
  lty="solid",pch=1,cex=0.8)

plot of chunk unnamed-chunk-21

Q<-matrix(0,k,k,dimnames=list(0:(k-1),0:(k-1)))
Q[cbind(1:(k-1),2:k)]<-q1
Q[cbind(2:k,1:(k-1))]<-q2
diag(Q)<--rowSums(Q)
round(Q,2)
##         0      1      2     3     4     5     6     7     8     9     10     11     12
## 0  -17.00  17.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 1    6.48 -18.68  12.20  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 2    0.00   4.08 -12.27  8.20  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 3    0.00   0.00   2.28 -7.28  5.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 4    0.00   0.00   0.00  1.08 -3.67  2.60  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 5    0.00   0.00   0.00  0.00  0.48 -1.48  1.00  0.00  0.00  0.00   0.00   0.00   0.00
## 6    0.00   0.00   0.00  0.00  0.00  0.47 -0.68  0.20  0.00  0.00   0.00   0.00   0.00
## 7    0.00   0.00   0.00  0.00  0.00  0.00  1.07 -1.27  0.20  0.00   0.00   0.00   0.00
## 8    0.00   0.00   0.00  0.00  0.00  0.00  0.00  2.28 -3.28  1.00   0.00   0.00   0.00
## 9    0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  4.08 -6.68   2.60   0.00   0.00
## 10   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  6.47 -11.48   5.00   0.00
## 11   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   9.47 -17.68   8.20
## 12   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00  13.07 -13.07
tree<-pbtree(n=2000)
y<-sim.Mk(tree,Q,anc=as.character(0))
head(y,20)
##  t435  t952 t1204 t1205 t1208 t1234 t1235  t581 t1576 t1800 t1801 t1590 t1591 t1002 t1865 t1866 t1873 
##     5     6     4     5     6     7     6     6     5     5     6     5     5     5     5     5     5 
## t1874 t1835 t1836 
##     5     6     6 
## Levels: 0 1 10 12 2 3 4 5 6 7 8 9

OK, now let’s see if we can fit the model to our data!

library(phytools)
pruning<-phytools:::pruning
fit<-fitFnMk(tree,y,trace=1)
##   0:     5619.5046: 0.440322 0.248076 0.564876 0.767566 0.553588 0.397636
##   1:     4545.6963: 0.592545 0.286181 0.576451 0.655545 0.523996 0.387347
##   2:     3967.3999: 0.192107 0.622472 0.749289 0.442418 0.0612808 0.164274
##   3:     3812.1610: 0.115245 0.688030 0.782642 0.389890 -0.0310360 0.120634
##   4:     3745.3094: 0.167485 0.701877 0.786293 0.327331 -0.0472463 0.116171
##   5:     3518.9996: 0.0997763 0.724955 0.798367 0.325377 -0.0874861 0.100532
##   6:     3181.9647: 0.0188891 0.781517 0.825029 0.237321 -0.183003 0.0660158
##   7:     3091.6312: -0.00139599 0.791387 0.829933 0.221442 -0.200770 0.0596822
##   8:     2919.5065: -0.0451439 0.809005 0.838947 0.189719 -0.234215 0.0481494
##   9:     2871.5962: -0.0530649 0.811923 0.840456 0.181530 -0.240359 0.0460994
##  10:     2763.9412: -0.0666893 0.815887 0.842589 0.161348 -0.250685 0.0428893
##  11:     2741.5435: -0.0701045 0.815759 0.842657 0.157298 -0.251725 0.0426885
##  12:     2720.1886: -0.0768597 0.815502 0.842792 0.149093 -0.253654 0.0424275
##  13:     2679.6975: -0.0763914 0.816377 0.843218 0.128280 -0.259213 0.0410025
##  14:     2676.7504: -0.0768192 0.816323 0.843212 0.128304 -0.259231 0.0409948
##  15:     2676.2248: -0.0769047 0.816311 0.843211 0.128308 -0.259235 0.0409930
##  16:     2675.3110: -0.0770756 0.816288 0.843209 0.128316 -0.259245 0.0409890
##  17:     2675.1645: -0.0771097 0.816283 0.843208 0.128318 -0.259247 0.0409880
##  18:     2674.9385: -0.0771776 0.816273 0.843207 0.128322 -0.259253 0.0409857
##  19:     2674.8652: -0.0772344 0.816261 0.843206 0.128326 -0.259261 0.0409824
##  20:     2674.8547: -0.0772138 0.816271 0.843214 0.128330 -0.259310 0.0409629
##  21:     2674.8340: -0.0772391 0.816330 0.843241 0.128315 -0.259398 0.0409293
##  22:     2674.8069: -0.0772280 0.816439 0.843293 0.128293 -0.259584 0.0408575
##  23:     2674.7683: -0.0772769 0.816707 0.843407 0.128216 -0.259918 0.0407301
##  24:     2674.7386: -0.0772997 0.817255 0.843637 0.128059 -0.260582 0.0404765
##  25:     2674.7353: -0.0773654 0.817497 0.843739 0.127989 -0.260875 0.0403650
##  26:     2674.7112: -0.0773249 0.817366 0.843687 0.128035 -0.260741 0.0404157
##  27:     2674.7023: -0.0773165 0.817095 0.843580 0.128129 -0.260472 0.0405173
##  28:     2674.6824: -0.0773503 0.817591 0.843787 0.127983 -0.261059 0.0402942
##  29:     2674.5504: -0.0772715 0.816383 0.843352 0.128522 -0.260182 0.0406200
##  30:     2674.5126: -0.0772328 0.816382 0.843352 0.128525 -0.260186 0.0406180
##  31:     2674.5038: -0.0772462 0.816395 0.843360 0.128523 -0.260218 0.0406059
##  32:     2674.4914: -0.0772311 0.816423 0.843375 0.128520 -0.260283 0.0405805
##  33:     2674.4756: -0.0772575 0.816511 0.843413 0.128492 -0.260393 0.0405384
##  34:     2674.4586: -0.0772550 0.816692 0.843489 0.128439 -0.260617 0.0404533
##  35:     2674.4458: -0.0773168 0.817078 0.843645 0.128311 -0.261035 0.0402951
##  36:     2674.4022: -0.0772591 0.816619 0.843478 0.128448 -0.260696 0.0404287
##  37:     2674.3302: -0.0773320 0.817294 0.843774 0.128205 -0.261617 0.0400871
##  38:     2674.1435: -0.0771998 0.816271 0.843435 0.128570 -0.261141 0.0402768
##  39:     2674.1422: -0.0771964 0.815265 0.843096 0.128905 -0.260605 0.0404894
##  40:     2673.9742: -0.0770713 0.814769 0.842927 0.129064 -0.260347 0.0405936
##  41:     2673.8289: -0.0769384 0.813075 0.842382 0.129530 -0.259689 0.0408800
##  42:     2673.7608: -0.0769598 0.812977 0.842371 0.129482 -0.259842 0.0408222
##  43:     2651.9230: -0.0717324 0.754353 0.833554 0.121711 -0.321808 0.0258428
##  44:     2651.6084: -0.0718092 0.754338 0.833552 0.121750 -0.321806 0.0258421
##  45:     2650.3677: -0.0716484 0.754199 0.833524 0.122493 -0.321777 0.0258278
##  46:     2502.9946: -0.0249576 0.224502 0.723123 0.116577 -0.581382 -0.0341298
##  47:     2434.9627: -0.0318817 0.306658  1.02720 0.223652 -1.08006 0.0468792
##  48:     2353.0401: 0.0135939 -0.422639  3.09311 0.239315 -1.29180 0.202375
##  49:     2351.1890: 0.0106633 -0.375380  3.03597 0.255054 -1.35912 0.209446
##  50:     2349.8156: 0.0142770 -0.411182  3.07014 0.256840 -1.39806 0.289162
##  51:     2345.6122: 0.0138391 -0.410399  3.05685 0.282820 -1.56206 0.421821
##  52:     2322.4292: 0.0114309 -0.380089  3.01556 0.345312 -2.03472  1.12581
##  53:     2301.8112: 0.00223427 -0.262308  2.85564 0.585538 -3.87700  3.97492
##  54:     2295.4585: 0.0109939 -0.372558  3.20722 0.609907 -4.04704  4.20349
##  55:     2290.4361: 0.0245763 -0.556720  3.78341 0.668199 -4.47305  4.79992
##  56:     2288.5906: 0.0324223 -0.641078  4.02218 0.717567 -4.84156  5.34951
##  57:     2286.5509: 0.0444851 -0.758302  4.32179 0.800522 -5.47313  6.31436
##  58:     2274.4481: 0.0543189 -0.841158  4.50363 0.877403 -6.06211  7.23256
##  59:     2273.6906: 0.0928808 -1.18087  5.29929  1.18231 -8.40153  10.9012
##  60:     2271.5123: 0.0911586 -1.18513  5.31944  1.12089 -7.94024  10.1914
##  61:     2270.1599: 0.0819639 -1.12439  5.18159 0.971487 -6.81044  8.48299
##  62:     2269.8328: 0.0843078 -1.13762  5.20538  1.00062 -7.03717  8.85474
##  63:     2269.5876: 0.0841675 -1.13294  5.18286 0.986612 -6.94229  8.75673
##  64:     2269.0439: 0.0844437 -1.13133  5.14353 0.932938 -6.57355  8.34570
##  65:     2268.7399: 0.0866145 -1.15485  5.17441 0.884917 -6.24833  7.99969
##  66:     2268.4569: 0.0903963 -1.20045  5.28376 0.862249 -6.10504  7.88919
##  67:     2267.6268: 0.104375 -1.37488  5.75042 0.819641 -5.86595  7.84261
##  68:     2266.8479: 0.117383 -1.54302  6.24137 0.801588 -5.80306  8.03179
##  69:     2265.4679: 0.139849 -1.84336  7.16874 0.781484 -5.79437  8.58034
##  70:     2264.0105: 0.159218 -2.11595  8.05326 0.759234 -5.78774  9.20844
##  71:     2249.9811: 0.186079 -2.52296  9.43102 0.702827 -5.66844  10.2429
##  72:     2186.4800: 0.299134 -4.22858  15.1268 0.428593 -4.86207  14.0303
##  73:     2099.0156: 0.283520 -4.04895  14.4785 0.550645 -5.53801  14.2049
##  74:     2065.4341: 0.273250 -3.82245  13.7248 0.572593 -5.55000  13.6263
##  75:     2056.6584: 0.280244 -3.92684  14.0673 0.562538 -5.53726  13.9019
##  76:     2054.2095: 0.279049 -3.92703  14.0673 0.562815 -5.53722  13.9020
##  77:     2054.0517: 0.279044 -3.92685  14.0665 0.562376 -5.53713  13.9011
##  78:     2054.0166: 0.278872 -3.92642  14.0649 0.562369 -5.53675  13.8993
##  79:     2053.4202: 0.273099 -3.83822  13.7508 0.560982 -5.46698  13.5719
##  80:     2051.0832: 0.268404 -3.75161  13.3631 0.529922 -5.17325  12.8723
##  81:     2048.1828: 0.245418 -3.39228  11.9435 0.418450 -4.09217  10.2582
##  82:     2047.4418: 0.247373 -3.43298  12.1349 0.426701 -4.16427  10.4217
##  83:     2045.3762: 0.251017 -3.51520  12.5219 0.419925 -4.08563  10.2185
##  84:     2040.9328: 0.267980 -3.79566  13.6336 0.396413 -3.83720  9.60309
##  85:     2033.7634: 0.325473 -4.62925  16.6212 0.359750 -3.47043  8.75527
##  86:     2028.8334: 0.392702 -5.53713  19.6588 0.348532 -3.40128  8.73543
##  87:     2026.2280: 0.450516 -6.29270  22.0911 0.345545 -3.43338  8.98771
##  88:     2025.6653: 0.466136 -6.47872  22.6441 0.359403 -3.58329  9.37616
##  89:     2025.5206: 0.465926 -6.45954  22.5221 0.364920 -3.63963  9.50292
##  90:     2025.4854: 0.458713 -6.35512  22.1502 0.363369 -3.61459  9.40737
##  91:     2025.4833: 0.456885 -6.33121  22.0719 0.361979 -3.59853  9.36125
##  92:     2025.4832: 0.456503 -6.32676  22.0590 0.361533 -3.59381  9.34889
##  93:     2025.4832: 0.456445 -6.32613  22.0574 0.361408 -3.59265  9.34620
##  94:     2025.4832: 0.456438 -6.32606  22.0572 0.361400 -3.59261  9.34619
##  95:     2025.4832: 0.456438 -6.32606  22.0572 0.361400 -3.59261  9.34622
fit
## Object of class "fitFnMk".
## 
## Fitted (or set) value of Q:
##             0          1          2         3         4         5         6         7         8
## 0  -19.008277  19.008277   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 1    7.640263 -21.235358  13.595095  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 2    0.000000   4.770449 -13.865238  9.094790  0.000000  0.000000  0.000000  0.000000  0.000000
## 3    0.000000   0.000000   2.623435 -8.130795  5.507360  0.000000  0.000000  0.000000  0.000000
## 4    0.000000   0.000000   0.000000  1.199222 -4.032029  2.832806  0.000000  0.000000  0.000000
## 5    0.000000   0.000000   0.000000  0.000000  0.497811 -1.568939  1.071128  0.000000  0.000000
## 6    0.000000   0.000000   0.000000  0.000000  0.000000  0.519199 -0.741526  0.222326  0.000000
## 7    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  1.263389 -1.549789  0.286400
## 8    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  2.730379 -3.993729
## 9    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  4.920170
## 10   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 11   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 12   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##            9         10         11         12
## 0   0.000000   0.000000   0.000000   0.000000
## 1   0.000000   0.000000   0.000000   0.000000
## 2   0.000000   0.000000   0.000000   0.000000
## 3   0.000000   0.000000   0.000000   0.000000
## 4   0.000000   0.000000   0.000000   0.000000
## 5   0.000000   0.000000   0.000000   0.000000
## 6   0.000000   0.000000   0.000000   0.000000
## 7   0.000000   0.000000   0.000000   0.000000
## 8   1.263349   0.000000   0.000000   0.000000
## 9  -8.073345   3.153175   0.000000   0.000000
## 10  7.832762 -13.788639   5.955876   0.000000
## 11  0.000000  11.468155 -21.139608   9.671454
## 12  0.000000   0.000000  15.826348 -15.826348
## 
## Fitted (or set) value of pi:
##        0        1        2        3        4        5        6        7        8        9       10 
## 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 
##       11       12 
## 0.076923 0.076923 
## due to treating the root prior as (a) flat.
## 
## Log-likelihood: -2025.48317 
## 
## Optimization method used was "nlminb"
## 
## R thinks it has found the ML solution.
plot(fit)

plot of chunk unnamed-chunk-25

To get a better picture, let’s overlay our generating model to see how close we were.

par(mar=c(5.1,4.1,1.1,1.1))
plot(fit)
lines(x[1:length(fit$states)],q1[fit$states+1],col="blue",lty="dotted")
lines(x[1:length(fit$states)],q2[fit$states+1],col="red",lty="dotted")
legend("bottomright","generating",lty="dotted",cex=0.8)

plot of chunk unnamed-chunk-26

That’s not bad.

No comments:

Post a Comment

Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.