Thursday, October 26, 2017

Comparing the rates of diversification (speciation & extinction) between two or more trees

During the review of our manuscript describing a method (implemented in the phytools function ratebytree) the editor correctly suggested that we could use the same general principle (of summing the log-likelihoods) to compare the rate of evolution of a discrete character, which we added to the revision, and even the rates of diversification, which we did not.

I have now coded this after first posting functions to fit birth-death & Yule diversification models for incomplete sampling fractions using likelihood (1, 2).

The modified ratebytree function, and the internal optimization function that it uses, are below. I will push to GitHub in a bit so that this update can be installed easily by the user. Note that I have also slightly modified fit.bd (used internally) so that the optimization routine is nlminb instead of optim. The advantage of this is really just that it blows through -Inf log-likelihood values during box-constrained optimization which can sometimes occur when the optimizer tries very low values of the birth-rate.

Here is the code:

ratebytree<-function(trees,x,...){
    if(hasArg(type)) type<-list(...)$type
    else if(!missing(x)&&!is.null(x)) {
        if(is.factor(unlist(x))||is.character(unlist(x))) 
            type<-"discrete"
        else type<-"continuous"
    } else type<-"diversification"
    if(type=="continuous") obj<-rbt.cont(trees,x,...)
    else if(type=="discrete") obj<-rbt.disc(trees,x,...)
    else if(type=="diversification") obj<-rbt.div(trees,...)
    else {
        cat(paste("type =",type,"not recognized.\n"))
        obj<-NULL
    }
    obj
}

## diversification ratebytree
rbt.div<-function(trees,...){
    if(hasArg(trace)) trace<-list(...)$trace
    else trace<-FALSE
    if(hasArg(digits)) digits<-list(...)$digits
    else digits<-4
    if(hasArg(test)) test<-list(...)$test
    else test<-"chisq"
    if(hasArg(quiet)) quiet<-list(...)$quiet
    else quiet<-FALSE
    if(hasArg(model)) model<-list(...)$model
    else model<-"birth-death"
    if(hasArg(rho)) rho<-list(...)$rho
    else rho<-rep(1,length(trees))
    if(hasArg(tol)) tol<-list(...)$tol
    else tol<-1e-12
    if(!inherits(trees,"multiPhylo")) 
        stop("trees should be object of class \"multiPhylo\".")
    foo<-if(model=="birth-death") fit.bd else 
        if(model=="Yule"||model=="yule") fit.yule
    fit.multi<-mapply(foo,tree=trees,rho=rho,SIMPLIFY=FALSE)
    logL.multi<-sum(sapply(fit.multi,logLik))
    t<-lapply(trees,function(phy) sort(branching.times(phy),
        decreasing=TRUE))
    lik.onerate<-function(theta,t,rho,model,trace=FALSE){
        logL<-sum(-mapply(lik.bd,t=t,rho=rho,
            MoreArgs=list(theta=theta)))
        if(trace) cat(paste(theta[1],"\t",theta[2],"\t",logL,"\n"))
        -logL
    }
    init.b<-mean(sapply(fit.multi,function(x) x$b))
    init.d<-mean(sapply(fit.multi,function(x) x$d))
    fit.onerate<-nlminb(c(init.b,init.d),lik.onerate,t=t,rho=rho,
        model=model,trace=trace,lower=c(0,0),upper=rep(Inf,2))
    rates.multi<-cbind(sapply(fit.multi,function(x) x$b),
            sapply(fit.multi,function(x) x$d))
    if(!is.null(names(trees))) rownames(trees)<-names(trees)
    else rownames(rates.multi)<-paste("tree",1:length(trees),sep="")
    colnames(rates.multi)<-c("b","d")
    LR<-2*(logL.multi+fit.onerate$objective)
    km<-2*length(trees)
    k1<-2
    P.chisq<-pchisq(LR,df=km-k1,lower.tail=FALSE)
    obj<-list(
        multi.rate.model=list(
            logL=logL.multi,
            rates=rates.multi,
            method="nlminb"),
        common.rate.model=list(
            logL=-fit.onerate$objective,
            rates=setNames(fit.onerate$par,c("b","d")),
            method="nlminb"),
        model=model,N=length(trees),
        n=sapply(trees,Ntip),
        likelihood.ratio=LR,P.chisq=P.chisq,
        type="diversification")
    class(obj)<-"ratebytree"
    obj
}

I also had to update the S3 print method of the object class so that we can sensibly review our results:

print.ratebytree<-function(x,...){
    if(hasArg(digits)) digits<-list(...)$digits
    else digits<-4
    if(x$type=="continuous") prbt.cont(x,digits=digits)
    else if(x$type=="discrete") prbt.disc(x,digits=digits)
    else if(x$type=="diversification") prbt.div(x,digits=digits)
    else print(x)
}

prbt.div<-function(x,digits=digits){
    cat("ML common diversification-rate model:")
    cat("\n\tb\td\tk\tlog(L)")
    cat(paste("\nvalue",round(x$common.rate.model$rates[1],digits),
        round(x$common.rate.model$rates[2],digits),
        2,round(x$common.rate.model$logL,digits),sep="\t"))
    cat("\n\nML multi diversification-rate model:")
    cat(paste("\n",paste(paste("b[",1:x$N,"]",sep=""),collapse="\t"),
        paste(paste("d[",1:x$N,"]",sep=""),collapse="\t"),"k\tlog(L)",
        sep="\t"))
    cat(paste("\nvalue",paste(round(x$multi.rate.model$rates[,"b"],digits),
        collapse="\t"),paste(round(x$multi.rate.model$rates[,"d"],digits),
        collapse="\t"),2*x$N,round(x$multi.rate.model$logL,digits),
        sep="\t"))
    cat(paste("\n\nModel fitting method was \"",x$multi.rate.model$method,
        "\".\n",sep=""))
    cat(paste("\nLikelihood ratio:",round(x$likelihood.ratio,digits),"\n"))
    cat(paste("P-value (based on X^2):",round(x$P.chisq,digits),"\n\n"))
}

Here's a simple example in which the rates are equal:

library(phytools)
t1<-pbtree(b=0.039,t=100,nsim=3)
fit<-ratebytree(t1,type="diversification")
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation

## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation

## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
fit
## ML common diversification-rate model:
##          b       d       k   log(L)
## value    0.0405  0.0038  2   -111.1671
## 
## ML multi diversification-rate model:
##          b[1]    b[2]    b[3]    d[1]    d[2]    d[3]    k   log(L)
## value    0.0404  0.0397  0.0452  0.0091  0       0.0102  6   -110.2116
## 
## Model fitting method was "nlminb".
## 
## Likelihood ratio: 1.911 
## P-value (based on X^2): 0.7521

and here's another one in which the speciation & extinction rates have been simulated to be different (but with an identical net diversification rate):

t2<-c(pbtree(b=0.039,t=100),pbtree(b=0.069,d=0.02,t=100,extant.only=T))
obj<-ltt(t2[[1]],log.lineages=FALSE,xlim=c(0,110),log="y",
    ylim=c(1,max(sapply(t2,Ntip))),col="blue",lwd=2)
text(max(obj$times),max(obj$ltt),"tree #1",pos=4)
obj<-ltt(t2[[2]],log.lineages=FALSE,xlim=c(0,110),add=T,
    col="orange",lwd=2)
text(max(obj$times),max(obj$ltt),"tree #2",pos=4)
title(main="Can you see the pull of the pull of the present?")

plot of chunk unnamed-chunk-4

fit<-ratebytree(t2,type="diversification")
## Warning in nlminb(c(init.b, init.d), lik.bd, t = T, rho = rho, lower =
## rep(0, : NA/NaN function evaluation
fit
## ML common diversification-rate model:
##          b       d       k   log(L)
## value    0.0659  0.0208  2   795.3924
## 
## ML multi diversification-rate model:
##          b[1]    b[2]    d[1]    d[2]    k   log(L)
## value    0.0449  0.075   0.0052  0.0223  4   807.0186
## 
## Model fitting method was "nlminb".
## 
## Likelihood ratio: 23.2525 
## P-value (based on X^2): 0

Neat.

No comments:

Post a Comment