Yesterday, I posted (1, 2) about fitting an ordered, discrete character evolution (Mk) model in which the rates of change between adjacent states was vary according to a functional form.
I now have an R function for this, that will probably eventually be added to phytools. I’m still working on it, but this is what I have so far – including print
, plot
, and logLik
methods. This code will change before these functions go into the package!
fitFnMk<-function(tree,x,model="quadratic",...){
if(hasArg(trace)) trace<-list(...)$trace
else trace<-0
if(is.matrix(x)){
levs<-colnames(x)
} else if(is.numeric(x)){
levs<-min(x):max(x)
x<-to.matrix(x,levs)
} else if(is.factor(x)){
if(suppressWarnings(all(!is.na(as.numeric(levels(x)))))){
levs<-min(as.numeric(levels(x))):max(as.numeric(levels(x)))
x<-to.matrix(x,levs)
} else {
levs<-sort(levels(x))
x<-to.matrix(x,levs)
}
} else if(is.character(x)){
if(suppressWarnings(all(!is.na(as.numeric(x))))){
levs<-min(as.numeric(x)):max(as.numeric(x))
x<-to.matrix(x,levs)
} else {
levs<-sort(unique(x))
x<-to.matrix(x,levs)
}
}
k<-ncol(x)
if(hasArg(pi)) pi<-list(...)$pi
else pi<-"equal"
if(is.numeric(pi)) root.prior<-"given"
if(pi[1]=="equal"){
pi<-setNames(rep(1/k,k),levs)
root.prior<-"flat"
} else if(pi[1]=="fitzjohn"){
root.prior<-"nuisance"
} else if(pi[1]=="mle") root.prior<-"it's MLE"
lik<-function(par,pw,X,pi=pi){
k<-ncol(X)
x<-1:(k-1)-0.5
q1<-par[1]*x^2+par[2]*x+par[3]
q2<-par[4]*x^2+par[5]*x+par[6]
q1[q1<0]<-0
q2[q2<0]<-0
if(all(q1<0)||all(q2<0)){
return(Inf)
} else {
MODEL<-matrix(0,k,k,dimnames=list(colnames(X),colnames(X)))
MODEL[cbind(1:(k-1),2:k)]<-1:(k-1)
MODEL[cbind(2:k,1:(k-1))]<-k:(2*k-2)
return(-pruning(c(q1,q2),pw,X,model=MODEL,pi=pi))
}
}
pw<-reorder(tree,"postorder")
q1_start<-q2_start<--1
while(any(q1_start<0)||any(q2_start<0)){
xx<-0:(k-2)+0.5
start<-runif(n=6,)
q1_start<-start[1]*xx^2-start[2]*xx+start[3]
q2_start<-start[4]*xx^2+start[5]*xx+start[6]
}
fit<-nlminb(start,lik,pw=pw,X=x,pi=pi,control=list(trace=trace))
q1_est<-fit$par[1]*xx^2+fit$par[2]*xx+fit$par[3]
q2_est<-fit$par[4]*xx^2+fit$par[5]*xx+fit$par[6]
q1_est[q1_est<0]<-0
q2_est[q2_est<0]<-0
index.matrix<-matrix(0,k,k,dimnames=list(colnames(x),colnames(x)))
index.matrix[cbind(1:(k-1),2:k)]<-1:(k-1)
index.matrix[cbind(2:k,1:(k-1))]<-k:(2*k-2)
lik.f<-function(par) lik(par,pw=pw,X=x,pi=pi)
object<-list(
logLik=-fit$objective,
rates=c(q1_est,q2_est),
index.matrix=index.matrix,
states=levs,
pi=pi,
method="nlminb",
root.prior=root.prior,
opt_results=fit[c("convergence","iterations","evaluations","message")],
par=fit$par,
data=x,
tree=tree,
lik=lik.f)
class(object)<-c("fitFnMk","fitMk")
object
}
plot.fitFnMk<-function(x,...){
k<-length(x$states)
q1<-x$rates[1:(k-1)]
q2<-x$rates[k:(2*k-2)]
xx<-0:(k-2)+0.5
plot(xx,q1,type="b",col="blue",bty="n",las=1,
axes=FALSE,xlab="",ylab="transition rate (q)",
ylim=c(0,max(c(q1,q2))))
lines(xx,q2,type="b",col="red")
axis(1,at=seq(0.5,k-1.5,by=1),paste(x$states[1:(k-1)],"↔",
x$states[2:k]),cex.axis=0.7,las=3)
axis(2,las=1,cex.axis=0.8)
grid()
legend("bottomleft",c("forward","backward"),
col=c("blue","red"),
lty="solid",pch=1,cex=0.8)
}
logLik.fitFnMk<-function(object,...){
lik<-object$logLik
attr(lik,"df")<-length(object$par)
lik
}
## print method for objects of class "fitMk"
print.fitFnMk<-function(x,digits=6,...){
cat("Object of class \"fitFnMk\".\n\n")
cat("Fitted (or set) value of Q:\n")
Q<-matrix(NA,length(x$states),length(x$states))
Q[]<-c(0,x$rates)[x$index.matrix+1]
diag(Q)<-0
diag(Q)<--rowSums(Q)
colnames(Q)<-rownames(Q)<-x$states
print(round(Q,digits))
cat("\nFitted (or set) value of pi:\n")
print(round(x$pi,digits))
cat(paste("due to treating the root prior as (a) ",x$root.prior,".\n",
sep=""))
cat(paste("\nLog-likelihood:",round(x$logLik,digits),"\n"))
cat(paste("\nOptimization method used was \"",x$method,"\"\n\n",
sep=""))
if(!is.null(x$opt_results$convergence)){
if(x$opt_results$convergence==0)
cat("R thinks it has found the ML solution.\n\n")
else cat("R thinks optimization may not have converged.\n\n")
}
}
Let’s try it out.
Here, I’m going to simulate backward & forward rates that are both upward parabolic, but with different centers & different curvature.
k<-13
x<-0:(k-2)+0.5
q1<-0.4*x^2-5.6*x+19.7
q2<-0.3*x^2-3*x+7.9
par(mar=c(2.1,4.1,2.1,1.1))
plot(x,q1,type="b",col="blue",bty="n",las=1,
axes=FALSE,xlab="",ylab="transition rate (q)",
ylim=c(0,max(c(q1,q2))))
lines(x,q2,type="b",col="red")
axis(1,at=seq(0.5,k-1.5,by=1),paste(0:(k-2),"↔",1:(k-1)),
cex.axis=0.7)
axis(2,las=1,cex.axis=0.8)
grid()
legend("bottomleft",c("forward","backward"),
col=c("blue","red"),
lty="solid",pch=1,cex=0.8)
Q<-matrix(0,k,k,dimnames=list(0:(k-1),0:(k-1)))
Q[cbind(1:(k-1),2:k)]<-q1
Q[cbind(2:k,1:(k-1))]<-q2
diag(Q)<--rowSums(Q)
round(Q,2)
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 0 -17.00 17.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## 1 6.48 -18.68 12.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## 2 0.00 4.08 -12.27 8.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## 3 0.00 0.00 2.28 -7.28 5.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## 4 0.00 0.00 0.00 1.08 -3.67 2.60 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## 5 0.00 0.00 0.00 0.00 0.48 -1.48 1.00 0.00 0.00 0.00 0.00 0.00 0.00
## 6 0.00 0.00 0.00 0.00 0.00 0.47 -0.68 0.20 0.00 0.00 0.00 0.00 0.00
## 7 0.00 0.00 0.00 0.00 0.00 0.00 1.07 -1.27 0.20 0.00 0.00 0.00 0.00
## 8 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.28 -3.28 1.00 0.00 0.00 0.00
## 9 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4.08 -6.68 2.60 0.00 0.00
## 10 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.47 -11.48 5.00 0.00
## 11 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 9.47 -17.68 8.20
## 12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 13.07 -13.07
tree<-pbtree(n=2000)
y<-sim.Mk(tree,Q,anc=as.character(0))
head(y,20)
## t435 t952 t1204 t1205 t1208 t1234 t1235 t581 t1576 t1800 t1801 t1590 t1591 t1002 t1865 t1866 t1873
## 5 6 4 5 6 7 6 6 5 5 6 5 5 5 5 5 5
## t1874 t1835 t1836
## 5 6 6
## Levels: 0 1 10 12 2 3 4 5 6 7 8 9
OK, now let’s see if we can fit the model to our data!
library(phytools)
pruning<-phytools:::pruning
fit<-fitFnMk(tree,y,trace=1)
## 0: 5619.5046: 0.440322 0.248076 0.564876 0.767566 0.553588 0.397636
## 1: 4545.6963: 0.592545 0.286181 0.576451 0.655545 0.523996 0.387347
## 2: 3967.3999: 0.192107 0.622472 0.749289 0.442418 0.0612808 0.164274
## 3: 3812.1610: 0.115245 0.688030 0.782642 0.389890 -0.0310360 0.120634
## 4: 3745.3094: 0.167485 0.701877 0.786293 0.327331 -0.0472463 0.116171
## 5: 3518.9996: 0.0997763 0.724955 0.798367 0.325377 -0.0874861 0.100532
## 6: 3181.9647: 0.0188891 0.781517 0.825029 0.237321 -0.183003 0.0660158
## 7: 3091.6312: -0.00139599 0.791387 0.829933 0.221442 -0.200770 0.0596822
## 8: 2919.5065: -0.0451439 0.809005 0.838947 0.189719 -0.234215 0.0481494
## 9: 2871.5962: -0.0530649 0.811923 0.840456 0.181530 -0.240359 0.0460994
## 10: 2763.9412: -0.0666893 0.815887 0.842589 0.161348 -0.250685 0.0428893
## 11: 2741.5435: -0.0701045 0.815759 0.842657 0.157298 -0.251725 0.0426885
## 12: 2720.1886: -0.0768597 0.815502 0.842792 0.149093 -0.253654 0.0424275
## 13: 2679.6975: -0.0763914 0.816377 0.843218 0.128280 -0.259213 0.0410025
## 14: 2676.7504: -0.0768192 0.816323 0.843212 0.128304 -0.259231 0.0409948
## 15: 2676.2248: -0.0769047 0.816311 0.843211 0.128308 -0.259235 0.0409930
## 16: 2675.3110: -0.0770756 0.816288 0.843209 0.128316 -0.259245 0.0409890
## 17: 2675.1645: -0.0771097 0.816283 0.843208 0.128318 -0.259247 0.0409880
## 18: 2674.9385: -0.0771776 0.816273 0.843207 0.128322 -0.259253 0.0409857
## 19: 2674.8652: -0.0772344 0.816261 0.843206 0.128326 -0.259261 0.0409824
## 20: 2674.8547: -0.0772138 0.816271 0.843214 0.128330 -0.259310 0.0409629
## 21: 2674.8340: -0.0772391 0.816330 0.843241 0.128315 -0.259398 0.0409293
## 22: 2674.8069: -0.0772280 0.816439 0.843293 0.128293 -0.259584 0.0408575
## 23: 2674.7683: -0.0772769 0.816707 0.843407 0.128216 -0.259918 0.0407301
## 24: 2674.7386: -0.0772997 0.817255 0.843637 0.128059 -0.260582 0.0404765
## 25: 2674.7353: -0.0773654 0.817497 0.843739 0.127989 -0.260875 0.0403650
## 26: 2674.7112: -0.0773249 0.817366 0.843687 0.128035 -0.260741 0.0404157
## 27: 2674.7023: -0.0773165 0.817095 0.843580 0.128129 -0.260472 0.0405173
## 28: 2674.6824: -0.0773503 0.817591 0.843787 0.127983 -0.261059 0.0402942
## 29: 2674.5504: -0.0772715 0.816383 0.843352 0.128522 -0.260182 0.0406200
## 30: 2674.5126: -0.0772328 0.816382 0.843352 0.128525 -0.260186 0.0406180
## 31: 2674.5038: -0.0772462 0.816395 0.843360 0.128523 -0.260218 0.0406059
## 32: 2674.4914: -0.0772311 0.816423 0.843375 0.128520 -0.260283 0.0405805
## 33: 2674.4756: -0.0772575 0.816511 0.843413 0.128492 -0.260393 0.0405384
## 34: 2674.4586: -0.0772550 0.816692 0.843489 0.128439 -0.260617 0.0404533
## 35: 2674.4458: -0.0773168 0.817078 0.843645 0.128311 -0.261035 0.0402951
## 36: 2674.4022: -0.0772591 0.816619 0.843478 0.128448 -0.260696 0.0404287
## 37: 2674.3302: -0.0773320 0.817294 0.843774 0.128205 -0.261617 0.0400871
## 38: 2674.1435: -0.0771998 0.816271 0.843435 0.128570 -0.261141 0.0402768
## 39: 2674.1422: -0.0771964 0.815265 0.843096 0.128905 -0.260605 0.0404894
## 40: 2673.9742: -0.0770713 0.814769 0.842927 0.129064 -0.260347 0.0405936
## 41: 2673.8289: -0.0769384 0.813075 0.842382 0.129530 -0.259689 0.0408800
## 42: 2673.7608: -0.0769598 0.812977 0.842371 0.129482 -0.259842 0.0408222
## 43: 2651.9230: -0.0717324 0.754353 0.833554 0.121711 -0.321808 0.0258428
## 44: 2651.6084: -0.0718092 0.754338 0.833552 0.121750 -0.321806 0.0258421
## 45: 2650.3677: -0.0716484 0.754199 0.833524 0.122493 -0.321777 0.0258278
## 46: 2502.9946: -0.0249576 0.224502 0.723123 0.116577 -0.581382 -0.0341298
## 47: 2434.9627: -0.0318817 0.306658 1.02720 0.223652 -1.08006 0.0468792
## 48: 2353.0401: 0.0135939 -0.422639 3.09311 0.239315 -1.29180 0.202375
## 49: 2351.1890: 0.0106633 -0.375380 3.03597 0.255054 -1.35912 0.209446
## 50: 2349.8156: 0.0142770 -0.411182 3.07014 0.256840 -1.39806 0.289162
## 51: 2345.6122: 0.0138391 -0.410399 3.05685 0.282820 -1.56206 0.421821
## 52: 2322.4292: 0.0114309 -0.380089 3.01556 0.345312 -2.03472 1.12581
## 53: 2301.8112: 0.00223427 -0.262308 2.85564 0.585538 -3.87700 3.97492
## 54: 2295.4585: 0.0109939 -0.372558 3.20722 0.609907 -4.04704 4.20349
## 55: 2290.4361: 0.0245763 -0.556720 3.78341 0.668199 -4.47305 4.79992
## 56: 2288.5906: 0.0324223 -0.641078 4.02218 0.717567 -4.84156 5.34951
## 57: 2286.5509: 0.0444851 -0.758302 4.32179 0.800522 -5.47313 6.31436
## 58: 2274.4481: 0.0543189 -0.841158 4.50363 0.877403 -6.06211 7.23256
## 59: 2273.6906: 0.0928808 -1.18087 5.29929 1.18231 -8.40153 10.9012
## 60: 2271.5123: 0.0911586 -1.18513 5.31944 1.12089 -7.94024 10.1914
## 61: 2270.1599: 0.0819639 -1.12439 5.18159 0.971487 -6.81044 8.48299
## 62: 2269.8328: 0.0843078 -1.13762 5.20538 1.00062 -7.03717 8.85474
## 63: 2269.5876: 0.0841675 -1.13294 5.18286 0.986612 -6.94229 8.75673
## 64: 2269.0439: 0.0844437 -1.13133 5.14353 0.932938 -6.57355 8.34570
## 65: 2268.7399: 0.0866145 -1.15485 5.17441 0.884917 -6.24833 7.99969
## 66: 2268.4569: 0.0903963 -1.20045 5.28376 0.862249 -6.10504 7.88919
## 67: 2267.6268: 0.104375 -1.37488 5.75042 0.819641 -5.86595 7.84261
## 68: 2266.8479: 0.117383 -1.54302 6.24137 0.801588 -5.80306 8.03179
## 69: 2265.4679: 0.139849 -1.84336 7.16874 0.781484 -5.79437 8.58034
## 70: 2264.0105: 0.159218 -2.11595 8.05326 0.759234 -5.78774 9.20844
## 71: 2249.9811: 0.186079 -2.52296 9.43102 0.702827 -5.66844 10.2429
## 72: 2186.4800: 0.299134 -4.22858 15.1268 0.428593 -4.86207 14.0303
## 73: 2099.0156: 0.283520 -4.04895 14.4785 0.550645 -5.53801 14.2049
## 74: 2065.4341: 0.273250 -3.82245 13.7248 0.572593 -5.55000 13.6263
## 75: 2056.6584: 0.280244 -3.92684 14.0673 0.562538 -5.53726 13.9019
## 76: 2054.2095: 0.279049 -3.92703 14.0673 0.562815 -5.53722 13.9020
## 77: 2054.0517: 0.279044 -3.92685 14.0665 0.562376 -5.53713 13.9011
## 78: 2054.0166: 0.278872 -3.92642 14.0649 0.562369 -5.53675 13.8993
## 79: 2053.4202: 0.273099 -3.83822 13.7508 0.560982 -5.46698 13.5719
## 80: 2051.0832: 0.268404 -3.75161 13.3631 0.529922 -5.17325 12.8723
## 81: 2048.1828: 0.245418 -3.39228 11.9435 0.418450 -4.09217 10.2582
## 82: 2047.4418: 0.247373 -3.43298 12.1349 0.426701 -4.16427 10.4217
## 83: 2045.3762: 0.251017 -3.51520 12.5219 0.419925 -4.08563 10.2185
## 84: 2040.9328: 0.267980 -3.79566 13.6336 0.396413 -3.83720 9.60309
## 85: 2033.7634: 0.325473 -4.62925 16.6212 0.359750 -3.47043 8.75527
## 86: 2028.8334: 0.392702 -5.53713 19.6588 0.348532 -3.40128 8.73543
## 87: 2026.2280: 0.450516 -6.29270 22.0911 0.345545 -3.43338 8.98771
## 88: 2025.6653: 0.466136 -6.47872 22.6441 0.359403 -3.58329 9.37616
## 89: 2025.5206: 0.465926 -6.45954 22.5221 0.364920 -3.63963 9.50292
## 90: 2025.4854: 0.458713 -6.35512 22.1502 0.363369 -3.61459 9.40737
## 91: 2025.4833: 0.456885 -6.33121 22.0719 0.361979 -3.59853 9.36125
## 92: 2025.4832: 0.456503 -6.32676 22.0590 0.361533 -3.59381 9.34889
## 93: 2025.4832: 0.456445 -6.32613 22.0574 0.361408 -3.59265 9.34620
## 94: 2025.4832: 0.456438 -6.32606 22.0572 0.361400 -3.59261 9.34619
## 95: 2025.4832: 0.456438 -6.32606 22.0572 0.361400 -3.59261 9.34622
fit
## Object of class "fitFnMk".
##
## Fitted (or set) value of Q:
## 0 1 2 3 4 5 6 7 8
## 0 -19.008277 19.008277 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
## 1 7.640263 -21.235358 13.595095 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
## 2 0.000000 4.770449 -13.865238 9.094790 0.000000 0.000000 0.000000 0.000000 0.000000
## 3 0.000000 0.000000 2.623435 -8.130795 5.507360 0.000000 0.000000 0.000000 0.000000
## 4 0.000000 0.000000 0.000000 1.199222 -4.032029 2.832806 0.000000 0.000000 0.000000
## 5 0.000000 0.000000 0.000000 0.000000 0.497811 -1.568939 1.071128 0.000000 0.000000
## 6 0.000000 0.000000 0.000000 0.000000 0.000000 0.519199 -0.741526 0.222326 0.000000
## 7 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.263389 -1.549789 0.286400
## 8 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.730379 -3.993729
## 9 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 4.920170
## 10 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
## 11 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
## 12 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
## 9 10 11 12
## 0 0.000000 0.000000 0.000000 0.000000
## 1 0.000000 0.000000 0.000000 0.000000
## 2 0.000000 0.000000 0.000000 0.000000
## 3 0.000000 0.000000 0.000000 0.000000
## 4 0.000000 0.000000 0.000000 0.000000
## 5 0.000000 0.000000 0.000000 0.000000
## 6 0.000000 0.000000 0.000000 0.000000
## 7 0.000000 0.000000 0.000000 0.000000
## 8 1.263349 0.000000 0.000000 0.000000
## 9 -8.073345 3.153175 0.000000 0.000000
## 10 7.832762 -13.788639 5.955876 0.000000
## 11 0.000000 11.468155 -21.139608 9.671454
## 12 0.000000 0.000000 15.826348 -15.826348
##
## Fitted (or set) value of pi:
## 0 1 2 3 4 5 6 7 8 9 10
## 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923 0.076923
## 11 12
## 0.076923 0.076923
## due to treating the root prior as (a) flat.
##
## Log-likelihood: -2025.48317
##
## Optimization method used was "nlminb"
##
## R thinks it has found the ML solution.
plot(fit)
To get a better picture, let’s overlay our generating model to see how close we were.
par(mar=c(5.1,4.1,1.1,1.1))
plot(fit)
lines(x[1:length(fit$states)],q1[fit$states+1],col="blue",lty="dotted")
lines(x[1:length(fit$states)],q2[fit$states+1],col="red",lty="dotted")
legend("bottomright","generating",lty="dotted",cex=0.8)
That’s not bad.
No comments:
Post a Comment
Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.