During the review of our manuscript describing a method (implemented in
the phytools function ratebytree
) the editor correctly
suggested that we could use the same general principle (of summing the
log-likelihoods) to compare the rate of evolution of a discrete character,
which we added to the revision, and even the rates of diversification, which
we did not.
I have now coded this after first posting functions to fit birth-death & Yule diversification models for incomplete sampling fractions using likelihood (1, 2).
The modified ratebytree
function, and the internal optimization
function that it uses, are below. I will push to GitHub in a bit so that
this update can be installed easily by the user. Note that I have also
slightly
modified
fit.bd
(used internally) so that the optimization routine is
nlminb
instead of optim
. The advantage of this is
really just that it blows through -Inf
log-likelihood values
during box-constrained optimization which can sometimes occur when the
optimizer tries very low values of the birth-rate.
Here is the code:
ratebytree<-function(trees,x,...){
if(hasArg(type)) type<-list(...)$type
else if(!missing(x)&&!is.null(x)) {
if(is.factor(unlist(x))||is.character(unlist(x)))
type<-"discrete"
else type<-"continuous"
} else type<-"diversification"
if(type=="continuous") obj<-rbt.cont(trees,x,...)
else if(type=="discrete") obj<-rbt.disc(trees,x,...)
else if(type=="diversification") obj<-rbt.div(trees,...)
else {
cat(paste("type =",type,"not recognized.\n"))
obj<-NULL
}
obj
}
## diversification ratebytree
rbt.div<-function(trees,...){
if(hasArg(trace)) trace<-list(...)$trace
else trace<-FALSE
if(hasArg(digits)) digits<-list(...)$digits
else digits<-4
if(hasArg(test)) test<-list(...)$test
else test<-"chisq"
if(hasArg(quiet)) quiet<-list(...)$quiet
else quiet<-FALSE
if(hasArg(model)) model<-list(...)$model
else model<-"birth-death"
if(hasArg(rho)) rho<-list(...)$rho
else rho<-rep(1,length(trees))
if(hasArg(tol)) tol<-list(...)$tol
else tol<-1e-12
if(!inherits(trees,"multiPhylo"))
stop("trees should be object of class \"multiPhylo\".")
foo<-if(model=="birth-death") fit.bd else
if(model=="Yule"||model=="yule") fit.yule
fit.multi<-mapply(foo,tree=trees,rho=rho,SIMPLIFY=FALSE)
logL.multi<-sum(sapply(fit.multi,logLik))
t<-lapply(trees,function(phy) sort(branching.times(phy),
decreasing=TRUE))
lik.onerate<-function(theta,t,rho,model,trace=FALSE){
logL<-sum(-mapply(lik.bd,t=t,rho=rho,
MoreArgs=list(theta=theta)))
if(trace) cat(paste(theta[1],"\t",theta[2],"\t",logL,"\n"))
-logL
}
init.b<-mean(sapply(fit.multi,function(x) x$b))
init.d<-mean(sapply(fit.multi,function(x) x$d))
fit.onerate<-nlminb(c(init.b,init.d),lik.onerate,t=t,rho=rho,
model=model,trace=trace,lower=c(0,0),upper=rep(Inf,2))
rates.multi<-cbind(sapply(fit.multi,function(x) x$b),
sapply(fit.multi,function(x) x$d))
if(!is.null(names(trees))) rownames(trees)<-names(trees)
else rownames(rates.multi)<-paste("tree",1:length(trees),sep="")
colnames(rates.multi)<-c("b","d")
LR<-2*(logL.multi+fit.onerate$objective)
km<-2*length(trees)
k1<-2
P.chisq<-pchisq(LR,df=km-k1,lower.tail=FALSE)
obj<-list(
multi.rate.model=list(
logL=logL.multi,
rates=rates.multi,
method="nlminb"),
common.rate.model=list(
logL=-fit.onerate$objective,
rates=setNames(fit.onerate$par,c("b","d")),
method="nlminb"),
model=model,N=length(trees),
n=sapply(trees,Ntip),
likelihood.ratio=LR,P.chisq=P.chisq,
type="diversification")
class(obj)<-"ratebytree"
obj
}
I also had to update the S3 print
method of the object class
so that we can sensibly review our results:
print.ratebytree<-function(x,...){
if(hasArg(digits)) digits<-list(...)$digits
else digits<-4
if(x$type=="continuous") prbt.cont(x,digits=digits)
else if(x$type=="discrete") prbt.disc(x,digits=digits)
else if(x$type=="diversification") prbt.div(x,digits=digits)
else print(x)
}
prbt.div<-function(x,digits=digits){
cat("ML common diversification-rate model:")
cat("\n\tb\td\tk\tlog(L)")
cat(paste("\nvalue",round(x$common.rate.model$rates[1],digits),
round(x$common.rate.model$rates[2],digits),
2,round(x$common.rate.model$logL,digits),sep="\t"))
cat("\n\nML multi diversification-rate model:")
cat(paste("\n",paste(paste("b[",1:x$N,"]",sep=""),collapse="\t"),
paste(paste("d[",1:x$N,"]",sep=""),collapse="\t"),"k\tlog(L)",
sep="\t"))
cat(paste("\nvalue",paste(round(x$multi.rate.model$rates[,"b"],digits),
collapse="\t"),paste(round(x$multi.rate.model$rates[,"d"],digits),
collapse="\t"),2*x$N,round(x$multi.rate.model$logL,digits),
sep="\t"))
cat(paste("\n\nModel fitting method was \"",x$multi.rate.model$method,
"\".\n",sep=""))
cat(paste("\nLikelihood ratio:",round(x$likelihood.ratio,digits),"\n"))
cat(paste("P-value (based on X^2):",round(x$P.chisq,digits),"\n\n"))
}
Here's a simple example in which the rates are equal:
library(phytools)
t1<-pbtree(b=0.039,t=100,nsim=3)
fit<-ratebytree(t1,type="diversification")
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
fit
## ML common diversification-rate model:
## b d k log(L)
## value 0.0405 0.0038 2 -111.1671
##
## ML multi diversification-rate model:
## b[1] b[2] b[3] d[1] d[2] d[3] k log(L)
## value 0.0404 0.0397 0.0452 0.0091 0 0.0102 6 -110.2116
##
## Model fitting method was "nlminb".
##
## Likelihood ratio: 1.911
## P-value (based on X^2): 0.7521
and here's another one in which the speciation & extinction rates have been simulated to be different (but with an identical net diversification rate):
t2<-c(pbtree(b=0.039,t=100),pbtree(b=0.069,d=0.02,t=100,extant.only=T))
obj<-ltt(t2[[1]],log.lineages=FALSE,xlim=c(0,110),log="y",
ylim=c(1,max(sapply(t2,Ntip))),col="blue",lwd=2)
text(max(obj$times),max(obj$ltt),"tree #1",pos=4)
obj<-ltt(t2[[2]],log.lineages=FALSE,xlim=c(0,110),add=T,
col="orange",lwd=2)
text(max(obj$times),max(obj$ltt),"tree #2",pos=4)
title(main="Can you see the pull of the pull of the present?")
fit<-ratebytree(t2,type="diversification")
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
fit
## ML common diversification-rate model:
## b d k log(L)
## value 0.0659 0.0208 2 795.3924
##
## ML multi diversification-rate model:
## b[1] b[2] d[1] d[2] k log(L)
## value 0.0449 0.075 0.0052 0.0223 4 807.0186
##
## Model fitting method was "nlminb".
##
## Likelihood ratio: 23.2525
## P-value (based on X^2): 0
Neat.
No comments:
Post a Comment
Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.