Phylogenetic Tools for Comparative Biology: Fitting an ordered discrete character evolution (Mk) model where the rates of change between adjacent levels vary according to a functional form

Yesterday, I posted (1, 2) about fitting an ordered, discrete character evolution (Mk) model in which the rates of change between adjacent states was vary according to a functional form.

I now have an R function for this, that will probably eventually be added to phytools. I’m still working on it, but this is what I have so far – including print, plot, and logLik methods. This code will change before these functions go into the package!

fitFnMk<-function(tree,x,model="quadratic",...){
	if(hasArg(trace)) trace<-list(...)$trace
	else trace<-0
	if(is.matrix(x)){
		levs<-colnames(x)
	} else if(is.numeric(x)){
		levs<-min(x):max(x)
		x<-to.matrix(x,levs)
	} else if(is.factor(x)){
		if(suppressWarnings(all(!is.na(as.numeric(levels(x)))))){
			levs<-min(as.numeric(levels(x))):max(as.numeric(levels(x)))
			x<-to.matrix(x,levs)
		} else {
			levs<-sort(levels(x))
			x<-to.matrix(x,levs)
		}
	} else if(is.character(x)){
		if(suppressWarnings(all(!is.na(as.numeric(x))))){
			levs<-min(as.numeric(x)):max(as.numeric(x))
			x<-to.matrix(x,levs)
		} else {
			levs<-sort(unique(x))
			x<-to.matrix(x,levs)
		}
	}
	k<-ncol(x)	
	if(hasArg(pi)) pi<-list(...)$pi
	else pi<-"equal"
	if(is.numeric(pi)) root.prior<-"given"
	if(pi[1]=="equal"){ 
		pi<-setNames(rep(1/k,k),levs)
		root.prior<-"flat"
	} else if(pi[1]=="fitzjohn"){ 
		root.prior<-"nuisance"
	} else if(pi[1]=="mle") root.prior<-"it's MLE"	
	lik<-function(par,pw,X,pi=pi){
		k<-ncol(X)
		x<-1:(k-1)-0.5
		q1<-par[1]*x^2+par[2]*x+par[3]
		q2<-par[4]*x^2+par[5]*x+par[6]
		q1[q1<0]<-0
		q2[q2<0]<-0
		if(all(q1<0)||all(q2<0)){ 
			return(Inf)
		} else {
			MODEL<-matrix(0,k,k,dimnames=list(colnames(X),colnames(X)))
			MODEL[cbind(1:(k-1),2:k)]<-1:(k-1)
			MODEL[cbind(2:k,1:(k-1))]<-k:(2*k-2)
			return(-pruning(c(q1,q2),pw,X,model=MODEL,pi=pi))
		}
	}
	pw<-reorder(tree,"postorder")
	q1_start<-q2_start<--1
	while(any(q1_start<0)||any(q2_start<0)){
		xx<-0:(k-2)+0.5
		start<-runif(n=6,)
		q1_start<-start[1]*xx^2-start[2]*xx+start[3]
		q2_start<-start[4]*xx^2+start[5]*xx+start[6]
	}
	fit<-nlminb(start,lik,pw=pw,X=x,pi=pi,control=list(trace=trace))
	q1_est<-fit$par[1]*xx^2+fit$par[2]*xx+fit$par[3]
	q2_est<-fit$par[4]*xx^2+fit$par[5]*xx+fit$par[6]
	q1_est[q1_est<0]<-0
	q2_est[q2_est<0]<-0
	index.matrix<-matrix(0,k,k,dimnames=list(colnames(x),colnames(x)))
	index.matrix[cbind(1:(k-1),2:k)]<-1:(k-1)
	index.matrix[cbind(2:k,1:(k-1))]<-k:(2*k-2)
	lik.f<-function(par) lik(par,pw=pw,X=x,pi=pi)
	object<-list(
		logLik=-fit$objective,
		rates=c(q1_est,q2_est),
		index.matrix=index.matrix,
		states=levs,
		pi=pi,
		method="nlminb",
		root.prior=root.prior,
		opt_results=fit[c("convergence","iterations","evaluations","message")],
		par=fit$par,
		data=x,
		tree=tree,
		lik=lik.f)
	class(object)<-c("fitFnMk","fitMk")
	object
}

plot.fitFnMk<-function(x,...){
	k<-length(x$states)
	q1<-x$rates[1:(k-1)]
	q2<-x$rates[k:(2*k-2)]
	xx<-0:(k-2)+0.5
	plot(xx,q1,type="b",col="blue",bty="n",las=1,
		axes=FALSE,xlab="",ylab="transition rate (q)",
		ylim=c(0,max(c(q1,q2))))
	lines(xx,q2,type="b",col="red")
	axis(1,at=seq(0.5,k-1.5,by=1),paste(x$states[1:(k-1)],"↔",
		x$states[2:k]),cex.axis=0.7,las=3)
	axis(2,las=1,cex.axis=0.8)
	grid()
	legend("bottomleft",c("forward","backward"),
		col=c("blue","red"),
		lty="solid",pch=1,cex=0.8)
}

logLik.fitFnMk<-function(object,...){
	lik<-object$logLik
	attr(lik,"df")<-length(object$par)
	lik
}

## print method for objects of class "fitMk"
print.fitFnMk<-function(x,digits=6,...){
	cat("Object of class \"fitFnMk\".\n\n")
	cat("Fitted (or set) value of Q:\n")
	Q<-matrix(NA,length(x$states),length(x$states))
	Q[]<-c(0,x$rates)[x$index.matrix+1]
	diag(Q)<-0
	diag(Q)<--rowSums(Q)
	colnames(Q)<-rownames(Q)<-x$states
	print(round(Q,digits))
	cat("\nFitted (or set) value of pi:\n")
	print(round(x$pi,digits))
	cat(paste("due to treating the root prior as (a) ",x$root.prior,".\n",
		sep=""))
	cat(paste("\nLog-likelihood:",round(x$logLik,digits),"\n"))
	cat(paste("\nOptimization method used was \"",x$method,"\"\n\n",
		sep=""))
	if(!is.null(x$opt_results$convergence)){
		if(x$opt_results$convergence==0) 
			cat("R thinks it has found the ML solution.\n\n")
		else cat("R thinks optimization may not have converged.\n\n")
	}
}

Let’s try it out.

Here, I’m going to simulate backward & forward rates that are both upward parabolic, but with different centers & different curvature.

k<-13
x<-0:(k-2)+0.5
q1<-0.4*x^2-5.6*x+19.7
q2<-0.3*x^2-3*x+7.9
par(mar=c(2.1,4.1,2.1,1.1))
plot(x,q1,type="b",col="blue",bty="n",las=1,
  axes=FALSE,xlab="",ylab="transition rate (q)",
  ylim=c(0,max(c(q1,q2))))
lines(x,q2,type="b",col="red")
axis(1,at=seq(0.5,k-1.5,by=1),paste(0:(k-2),"↔",1:(k-1)),
  cex.axis=0.7)
axis(2,las=1,cex.axis=0.8)
grid()
legend("bottomleft",c("forward","backward"),
  col=c("blue","red"),
  lty="solid",pch=1,cex=0.8)

plot of chunk unnamed-chunk-21

Q<-matrix(0,k,k,dimnames=list(0:(k-1),0:(k-1)))
Q[cbind(1:(k-1),2:k)]<-q1
Q[cbind(2:k,1:(k-1))]<-q2
diag(Q)<--rowSums(Q)
round(Q,2)

##         0      1      2     3     4     5     6     7     8     9     10     11     12
## 0  -17.00  17.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 1    6.48 -18.68  12.20  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 2    0.00   4.08 -12.27  8.20  0.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 3    0.00   0.00   2.28 -7.28  5.00  0.00  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 4    0.00   0.00   0.00  1.08 -3.67  2.60  0.00  0.00  0.00  0.00   0.00   0.00   0.00
## 5    0.00   0.00   0.00  0.00  0.48 -1.48  1.00  0.00  0.00  0.00   0.00   0.00   0.00
## 6    0.00   0.00   0.00  0.00  0.00  0.47 -0.68  0.20  0.00  0.00   0.00   0.00   0.00
## 7    0.00   0.00   0.00  0.00  0.00  0.00  1.07 -1.27  0.20  0.00   0.00   0.00   0.00
## 8    0.00   0.00   0.00  0.00  0.00  0.00  0.00  2.28 -3.28  1.00   0.00   0.00   0.00
## 9    0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  4.08 -6.68   2.60   0.00   0.00
## 10   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  6.47 -11.48   5.00   0.00
## 11   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   9.47 -17.68   8.20
## 12   0.00   0.00   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00   0.00  13.07 -13.07

tree<-pbtree(n=2000)
y<-sim.Mk(tree,Q,anc=as.character(0))
head(y,20)

##  t435  t952 t1204 t1205 t1208 t1234 t1235  t581 t1576 t1800 t1801 t1590 t1591 t1002 t1865 t1866 t1873 
##     5     6     4     5     6     7     6     6     5     5     6     5     5     5     5     5     5 
## t1874 t1835 t1836 
##     5     6     6 
## Levels: 0 1 10 12 2 3 4 5 6 7 8 9

OK, now let’s see if we can fit the model to our data!

library(phytools)
pruning<-phytools:::pruning
fit<-fitFnMk(tree,y,trace=1)

##   0:     5619.5046: 0.440322 0.248076 0.564876 0.767566 0.553588 0.397636
##   1:     4545.6963: 0.592545 0.286181 0.576451 0.655545 0.523996 0.387347
##   2:     3967.3999: 0.192107 0.622472 0.749289 0.442418 0.0612808 0.164274
##   3:     3812.1610: 0.115245 0.688030 0.782642 0.389890 -0.0310360 0.120634
##   4:     3745.3094: 0.167485 0.701877 0.786293 0.327331 -0.0472463 0.116171
##   5:     3518.9996: 0.0997763 0.724955 0.798367 0.325377 -0.0874861 0.100532
##   6:     3181.9647: 0.0188891 0.781517 0.825029 0.237321 -0.183003 0.0660158
##   7:     3091.6312: -0.00139599 0.791387 0.829933 0.221442 -0.200770 0.0596822
##   8:     2919.5065: -0.0451439 0.809005 0.838947 0.189719 -0.234215 0.0481494
##   9:     2871.5962: -0.0530649 0.811923 0.840456 0.181530 -0.240359 0.0460994
##  10:     2763.9412: -0.0666893 0.815887 0.842589 0.161348 -0.250685 0.0428893
##  11:     2741.5435: -0.0701045 0.815759 0.842657 0.157298 -0.251725 0.0426885
##  12:     2720.1886: -0.0768597 0.815502 0.842792 0.149093 -0.253654 0.0424275
##  13:     2679.6975: -0.0763914 0.816377 0.843218 0.128280 -0.259213 0.0410025
##  14:     2676.7504: -0.0768192 0.816323 0.843212 0.128304 -0.259231 0.0409948
##  15:     2676.2248: -0.0769047 0.816311 0.843211 0.128308 -0.259235 0.0409930
##  16:     2675.3110: -0.0770756 0.816288 0.843209 0.128316 -0.259245 0.0409890
##  17:     2675.1645: -0.0771097 0.816283 0.843208 0.128318 -0.259247 0.0409880
##  18:     2674.9385: -0.0771776 0.816273 0.843207 0.128322 -0.259253 0.0409857
##  19:     2674.8652: -0.0772344 0.816261 0.843206 0.128326 -0.259261 0.0409824
##  20:     2674.8547: -0.0772138 0.816271 0.843214 0.128330 -0.259310 0.0409629
##  21:     2674.8340: -0.0772391 0.816330 0.843241 0.128315 -0.259398 0.0409293
##  22:     2674.8069: -0.0772280 0.816439 0.843293 0.128293 -0.259584 0.0408575
##  23:     2674.7683: -0.0772769 0.816707 0.843407 0.128216 -0.259918 0.0407301
##  24:     2674.7386: -0.0772997 0.817255 0.843637 0.128059 -0.260582 0.0404765
##  25:     2674.7353: -0.0773654 0.817497 0.843739 0.127989 -0.260875 0.0403650
##  26:     2674.7112: -0.0773249 0.817366 0.843687 0.128035 -0.260741 0.0404157
##  27:     2674.7023: -0.0773165 0.817095 0.843580 0.128129 -0.260472 0.0405173
##  28:     2674.6824: -0.0773503 0.817591 0.843787 0.127983 -0.261059 0.0402942
##  29:     2674.5504: -0.0772715 0.816383 0.843352 0.128522 -0.260182 0.0406200
##  30:     2674.5126: -0.0772328 0.816382 0.843352 0.128525 -0.260186 0.0406180
##  31:     2674.5038: -0.0772462 0.816395 0.843360 0.128523 -0.260218 0.0406059
##  32:     2674.4914: -0.0772311 0.816423 0.843375 0.128520 -0.260283 0.0405805
##  33:     2674.4756: -0.0772575 0.816511 0.843413 0.128492 -0.260393 0.0405384
##  34:     2674.4586: -0.0772550 0.816692 0.843489 0.128439 -0.260617 0.0404533
##  35:     2674.4458: -0.0773168 0.817078 0.843645 0.128311 -0.261035 0.0402951
##  36:     2674.4022: -0.0772591 0.816619 0.843478 0.128448 -0.260696 0.0404287
##  37:     2674.3302: -0.0773320 0.817294 0.843774 0.128205 -0.261617 0.0400871
##  38:     2674.1435: -0.0771998 0.816271 0.843435 0.128570 -0.261141 0.0402768
##  39:     2674.1422: -0.0771964 0.815265 0.843096 0.128905 -0.260605 0.0404894
##  40:     2673.9742: -0.0770713 0.814769 0.842927 0.129064 -0.260347 0.0405936
##  41:     2673.8289: -0.0769384 0.813075 0.842382 0.129530 -0.259689 0.0408800
##  42:     2673.7608: -0.0769598 0.812977 0.842371 0.129482 -0.259842 0.0408222
##  43:     2651.9230: -0.0717324 0.754353 0.833554 0.121711 -0.321808 0.0258428
##  44:     2651.6084: -0.0718092 0.754338 0.833552 0.121750 -0.321806 0.0258421
##  45:     2650.3677: -0.0716484 0.754199 0.833524 0.122493 -0.321777 0.0258278
##  46:     2502.9946: -0.0249576 0.224502 0.723123 0.116577 -0.581382 -0.0341298
##  47:     2434.9627: -0.0318817 0.306658  1.02720 0.223652 -1.08006 0.0468792
##  48:     2353.0401: 0.0135939 -0.422639  3.09311 0.239315 -1.29180 0.202375
##  49:     2351.1890: 0.0106633 -0.375380  3.03597 0.255054 -1.35912 0.209446
##  50:     2349.8156: 0.0142770 -0.411182  3.07014 0.256840 -1.39806 0.289162
##  51:     2345.6122: 0.0138391 -0.410399  3.05685 0.282820 -1.56206 0.421821
##  52:     2322.4292: 0.0114309 -0.380089  3.01556 0.345312 -2.03472  1.12581
##  53:     2301.8112: 0.00223427 -0.262308  2.85564 0.585538 -3.87700  3.97492
##  54:     2295.4585: 0.0109939 -0.372558  3.20722 0.609907 -4.04704  4.20349
##  55:     2290.4361: 0.0245763 -0.556720  3.78341 0.668199 -4.47305  4.79992
##  56:     2288.5906: 0.0324223 -0.641078  4.02218 0.717567 -4.84156  5.34951
##  57:     2286.5509: 0.0444851 -0.758302  4.32179 0.800522 -5.47313  6.31436
##  58:     2274.4481: 0.0543189 -0.841158  4.50363 0.877403 -6.06211  7.23256
##  59:     2273.6906: 0.0928808 -1.18087  5.29929  1.18231 -8.40153  10.9012
##  60:     2271.5123: 0.0911586 -1.18513  5.31944  1.12089 -7.94024  10.1914
##  61:     2270.1599: 0.0819639 -1.12439  5.18159 0.971487 -6.81044  8.48299
##  62:     2269.8328: 0.0843078 -1.13762  5.20538  1.00062 -7.03717  8.85474
##  63:     2269.5876: 0.0841675 -1.13294  5.18286 0.986612 -6.94229  8.75673
##  64:     2269.0439: 0.0844437 -1.13133  5.14353 0.932938 -6.57355  8.34570
##  65:     2268.7399: 0.0866145 -1.15485  5.17441 0.884917 -6.24833  7.99969
##  66:     2268.4569: 0.0903963 -1.20045  5.28376 0.862249 -6.10504  7.88919
##  67:     2267.6268: 0.104375 -1.37488  5.75042 0.819641 -5.86595  7.84261
##  68:     2266.8479: 0.117383 -1.54302  6.24137 0.801588 -5.80306  8.03179
##  69:     2265.4679: 0.139849 -1.84336  7.16874 0.781484 -5.79437  8.58034
##  70:     2264.0105: 0.159218 -2.11595  8.05326 0.759234 -5.78774  9.20844
##  71:     2249.9811: 0.186079 -2.52296  9.43102 0.702827 -5.66844  10.2429
##  72:     2186.4800: 0.299134 -4.22858  15.1268 0.428593 -4.86207  14.0303
##  73:     2099.0156: 0.283520 -4.04895  14.4785 0.550645 -5.53801  14.2049
##  74:     2065.4341: 0.273250 -3.82245  13.7248 0.572593 -5.55000  13.6263
##  75:     2056.6584: 0.280244 -3.92684  14.0673 0.562538 -5.53726  13.9019
##  76:     2054.2095: 0.279049 -3.92703  14.0673 0.562815 -5.53722  13.9020
##  77:     2054.0517: 0.279044 -3.92685  14.0665 0.562376 -5.53713  13.9011
##  78:     2054.0166: 0.278872 -3.92642  14.0649 0.562369 -5.53675  13.8993
##  79:     2053.4202: 0.273099 -3.83822  13.7508 0.560982 -5.46698  13.5719
##  80:     2051.0832: 0.268404 -3.75161  13.3631 0.529922 -5.17325  12.8723
##  81:     2048.1828: 0.245418 -3.39228  11.9435 0.418450 -4.09217  10.2582
##  82:     2047.4418: 0.247373 -3.43298  12.1349 0.426701 -4.16427  10.4217
##  83:     2045.3762: 0.251017 -3.51520  12.5219 0.419925 -4.08563  10.2185
##  84:     2040.9328: 0.267980 -3.79566  13.6336 0.396413 -3.83720  9.60309
##  85:     2033.7634: 0.325473 -4.62925  16.6212 0.359750 -3.47043  8.75527
##  86:     2028.8334: 0.392702 -5.53713  19.6588 0.348532 -3.40128  8.73543
##  87:     2026.2280: 0.450516 -6.29270  22.0911 0.345545 -3.43338  8.98771
##  88:     2025.6653: 0.466136 -6.47872  22.6441 0.359403 -3.58329  9.37616
##  89:     2025.5206: 0.465926 -6.45954  22.5221 0.364920 -3.63963  9.50292
##  90:     2025.4854: 0.458713 -6.35512  22.1502 0.363369 -3.61459  9.40737
##  91:     2025.4833: 0.456885 -6.33121  22.0719 0.361979 -3.59853  9.36125
##  92:     2025.4832: 0.456503 -6.32676  22.0590 0.361533 -3.59381  9.34889
##  93:     2025.4832: 0.456445 -6.32613  22.0574 0.361408 -3.59265  9.34620
##  94:     2025.4832: 0.456438 -6.32606  22.0572 0.361400 -3.59261  9.34619
##  95:     2025.4832: 0.456438 -6.32606  22.0572 0.361400 -3.59261  9.34622

fit

## Object of class "fitFnMk".
## 
## Fitted (or set) value of Q:
##             0          1          2         3         4         5         6         7         8
## 0  -19.008277  19.008277   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 1    7.640263 -21.235358  13.595095  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 2    0.000000   4.770449 -13.865238  9.094790  0.000000  0.000000  0.000000  0.000000  0.000000
## 3    0.000000   0.000000   2.623435 -8.130795  5.507360  0.000000  0.000000  0.000000  0.000000
## 4    0.000000   0.000000   0.000000  1.199222 -4.032029  2.832806  0.000000  0.000000  0.000000
## 5    0.000000   0.000000   0.000000  0.000000  0.497811 -1.568939  1.071128  0.000000  0.000000
## 6    0.000000   0.000000   0.000000  0.000000  0.000000  0.519199 -0.741526  0.222326  0.000000
## 7    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  1.263389 -1.549789  0.286400
## 8    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  2.730379 -3.993729
## 9    0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  4.920170
## 10   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 11   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
## 12   0.000000   0.000000   0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000
##            9         10         11         12
## 0   0.000000   0.000000   0.000000   0.000000
## 1   0.000000   0.000000   0.000000   0.000000
## 2   0.000000   0.000000   0.000000   0.000000
## 3   0.000000   0.000000   0.000000   0.000000
## 4   0.000000   0.000000   0.000000   0.000000
## 5   0.000000   0.000000   0.000000   0.000000
## 6   0.000000   0.000000   0.000000   0.000000
## 7   0.000000   0.000000   0.000000   0.000000
## 8   1.263349   0.000000   0.000000   0.000000
## 9  -8.073345   3.153175   0.000000   0.000000
## 10  7.832762 -13.788639   5.955876   0.000000
## 11  0.000000  11.468155 -21.139608   9.671454
## 12  0.000000   0.000000  15.826348 -15.826348
## 
## Fitted (or set) value of pi:
##        0        1        2        3        4        5        6        7        8        9       10 
## 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 
##       11       12 
## 0.076923 0.076923 
## due to treating the root prior as (a) flat.
## 
## Log-likelihood: -2025.48317 
## 
## Optimization method used was "nlminb"
## 
## R thinks it has found the ML solution.

plot(fit)

plot of chunk unnamed-chunk-25

To get a better picture, let’s overlay our generating model to see how close we were.

par(mar=c(5.1,4.1,1.1,1.1))
plot(fit)
lines(x[1:length(fit$states)],q1[fit$states+1],col="blue",lty="dotted")
lines(x[1:length(fit$states)],q2[fit$states+1],col="red",lty="dotted")
legend("bottomright","generating",lty="dotted",cex=0.8)

plot of chunk unnamed-chunk-26

That’s not bad.

Phylogenetic Tools for Comparative Biology

Pages

Tuesday, February 20, 2024

Fitting an ordered discrete character evolution (Mk) model where the rates of change between adjacent levels vary according to a functional form

No comments:

Post a Comment