## Thursday, October 26, 2017

### Comparing the rates of diversification (speciation & extinction) between two or more trees

During the review of our manuscript describing a method (implemented in the phytools function `ratebytree`) the editor correctly suggested that we could use the same general principle (of summing the log-likelihoods) to compare the rate of evolution of a discrete character, which we added to the revision, and even the rates of diversification, which we did not.

I have now coded this after first posting functions to fit birth-death & Yule diversification models for incomplete sampling fractions using likelihood (1, 2).

The modified `ratebytree` function, and the internal optimization function that it uses, are below. I will push to GitHub in a bit so that this update can be installed easily by the user. Note that I have also slightly modified `fit.bd` (used internally) so that the optimization routine is `nlminb` instead of `optim`. The advantage of this is really just that it blows through `-Inf` log-likelihood values during box-constrained optimization which can sometimes occur when the optimizer tries very low values of the birth-rate.

Here is the code:

``````ratebytree<-function(trees,x,...){
if(hasArg(type)) type<-list(...)\$type
else if(!missing(x)&&!is.null(x)) {
if(is.factor(unlist(x))||is.character(unlist(x)))
type<-"discrete"
else type<-"continuous"
} else type<-"diversification"
if(type=="continuous") obj<-rbt.cont(trees,x,...)
else if(type=="discrete") obj<-rbt.disc(trees,x,...)
else if(type=="diversification") obj<-rbt.div(trees,...)
else {
cat(paste("type =",type,"not recognized.\n"))
obj<-NULL
}
obj
}

## diversification ratebytree
rbt.div<-function(trees,...){
if(hasArg(trace)) trace<-list(...)\$trace
else trace<-FALSE
if(hasArg(digits)) digits<-list(...)\$digits
else digits<-4
if(hasArg(test)) test<-list(...)\$test
else test<-"chisq"
if(hasArg(quiet)) quiet<-list(...)\$quiet
else quiet<-FALSE
if(hasArg(model)) model<-list(...)\$model
else model<-"birth-death"
if(hasArg(rho)) rho<-list(...)\$rho
else rho<-rep(1,length(trees))
if(hasArg(tol)) tol<-list(...)\$tol
else tol<-1e-12
if(!inherits(trees,"multiPhylo"))
stop("trees should be object of class \"multiPhylo\".")
foo<-if(model=="birth-death") fit.bd else
if(model=="Yule"||model=="yule") fit.yule
fit.multi<-mapply(foo,tree=trees,rho=rho,SIMPLIFY=FALSE)
logL.multi<-sum(sapply(fit.multi,logLik))
t<-lapply(trees,function(phy) sort(branching.times(phy),
decreasing=TRUE))
lik.onerate<-function(theta,t,rho,model,trace=FALSE){
logL<-sum(-mapply(lik.bd,t=t,rho=rho,
MoreArgs=list(theta=theta)))
if(trace) cat(paste(theta[1],"\t",theta[2],"\t",logL,"\n"))
-logL
}
init.b<-mean(sapply(fit.multi,function(x) x\$b))
init.d<-mean(sapply(fit.multi,function(x) x\$d))
fit.onerate<-nlminb(c(init.b,init.d),lik.onerate,t=t,rho=rho,
model=model,trace=trace,lower=c(0,0),upper=rep(Inf,2))
rates.multi<-cbind(sapply(fit.multi,function(x) x\$b),
sapply(fit.multi,function(x) x\$d))
if(!is.null(names(trees))) rownames(trees)<-names(trees)
else rownames(rates.multi)<-paste("tree",1:length(trees),sep="")
colnames(rates.multi)<-c("b","d")
LR<-2*(logL.multi+fit.onerate\$objective)
km<-2*length(trees)
k1<-2
P.chisq<-pchisq(LR,df=km-k1,lower.tail=FALSE)
obj<-list(
multi.rate.model=list(
logL=logL.multi,
rates=rates.multi,
method="nlminb"),
common.rate.model=list(
logL=-fit.onerate\$objective,
rates=setNames(fit.onerate\$par,c("b","d")),
method="nlminb"),
model=model,N=length(trees),
n=sapply(trees,Ntip),
likelihood.ratio=LR,P.chisq=P.chisq,
type="diversification")
class(obj)<-"ratebytree"
obj
}
``````

I also had to update the S3 `print` method of the object class so that we can sensibly review our results:

``````print.ratebytree<-function(x,...){
if(hasArg(digits)) digits<-list(...)\$digits
else digits<-4
if(x\$type=="continuous") prbt.cont(x,digits=digits)
else if(x\$type=="discrete") prbt.disc(x,digits=digits)
else if(x\$type=="diversification") prbt.div(x,digits=digits)
else print(x)
}

prbt.div<-function(x,digits=digits){
cat("ML common diversification-rate model:")
cat("\n\tb\td\tk\tlog(L)")
cat(paste("\nvalue",round(x\$common.rate.model\$rates[1],digits),
round(x\$common.rate.model\$rates[2],digits),
2,round(x\$common.rate.model\$logL,digits),sep="\t"))
cat("\n\nML multi diversification-rate model:")
cat(paste("\n",paste(paste("b[",1:x\$N,"]",sep=""),collapse="\t"),
paste(paste("d[",1:x\$N,"]",sep=""),collapse="\t"),"k\tlog(L)",
sep="\t"))
cat(paste("\nvalue",paste(round(x\$multi.rate.model\$rates[,"b"],digits),
collapse="\t"),paste(round(x\$multi.rate.model\$rates[,"d"],digits),
collapse="\t"),2*x\$N,round(x\$multi.rate.model\$logL,digits),
sep="\t"))
cat(paste("\n\nModel fitting method was \"",x\$multi.rate.model\$method,
"\".\n",sep=""))
cat(paste("\nLikelihood ratio:",round(x\$likelihood.ratio,digits),"\n"))
cat(paste("P-value (based on X^2):",round(x\$P.chisq,digits),"\n\n"))
}
``````

Here's a simple example in which the rates are equal:

``````library(phytools)
t1<-pbtree(b=0.039,t=100,nsim=3)
fit<-ratebytree(t1,type="diversification")
``````
``````## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation

## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation

## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
``````
``````fit
``````
``````## ML common diversification-rate model:
##          b       d       k   log(L)
## value    0.0405  0.0038  2   -111.1671
##
## ML multi diversification-rate model:
##          b[1]    b[2]    b[3]    d[1]    d[2]    d[3]    k   log(L)
## value    0.0404  0.0397  0.0452  0.0091  0       0.0102  6   -110.2116
##
## Model fitting method was "nlminb".
##
## Likelihood ratio: 1.911
## P-value (based on X^2): 0.7521
``````

and here's another one in which the speciation & extinction rates have been simulated to be different (but with an identical net diversification rate):

``````t2<-c(pbtree(b=0.039,t=100),pbtree(b=0.069,d=0.02,t=100,extant.only=T))
obj<-ltt(t2[[1]],log.lineages=FALSE,xlim=c(0,110),log="y",
ylim=c(1,max(sapply(t2,Ntip))),col="blue",lwd=2)
text(max(obj\$times),max(obj\$ltt),"tree #1",pos=4)
col="orange",lwd=2)
text(max(obj\$times),max(obj\$ltt),"tree #2",pos=4)
title(main="Can you see the pull of the pull of the present?")
``````

``````fit<-ratebytree(t2,type="diversification")
``````
``````## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
``````
``````fit
``````
``````## ML common diversification-rate model:
##          b       d       k   log(L)
## value    0.0659  0.0208  2   795.3924
##
## ML multi diversification-rate model:
##          b[1]    b[2]    d[1]    d[2]    k   log(L)
## value    0.0449  0.075   0.0052  0.0223  4   807.0186
##
## Model fitting method was "nlminb".
##
## Likelihood ratio: 23.2525
## P-value (based on X^2): 0
``````

Neat.