Saturday, October 29, 2016

Testing hypotheses about the number of changes of different types on the tree using stochastic mapping

A phytools user recently asked me the following:

“I have been using the function countSimmap to count the transitions between short snouts and long snouts. However, I cannot figure out how to statistically test if there have been significantly more transitions to one state than the other. Is there a function for this in phytools?”

Well, stochastic character mapping is a Bayesian method - so we can't exactly test for 'significance', per se. However, we could, for example, sample transitions matrices (Q) and character histories from their posterior distribution, and then ask whether HPD intervals from the two overlap. We can even do some other cool stuff, like visualize the posterior probability distribution of changes across the two trees.

Here's a quick example with some simulated data:

library(phytools)
tree
## 
## Phylogenetic tree with 200 tips and 199 internal nodes.
## 
## Tip labels:
##  t185, t186, t168, t146, t187, t188, ...
## 
## Rooted; includes branch lengths.
x
## t185 t186 t168 t146 t187 t188 t111  t59  t60  t36  t27  t19  t20 t117 t118 
##    b    b    a    a    b    b    b    a    b    b    b    a    a    b    a 
## t103  t82 t123 t124  t28 t167 t195 t196 t126 t177 t178 t153 t154  t49 t147 
##    a    a    b    b    a    a    a    a    a    a    a    a    a    a    b 
## t148 t136 t137 t108 t109 t144 t145  t77  t78   t8   t9 t142 t143 t104 t105 
##    b    b    b    a    a    b    b    b    b    b    a    b    b    b    b 
## t175 t176  t45  t64 t115 t116 t157 t158   t5  t57  t58  t53  t47  t65 t149 
##    b    b    b    b    b    b    b    b    b    b    b    b    b    a    b 
## t150 t113  t91  t51  t52 t119 t120  t10  t11  t66  t67  t46  t15  t23  t95 
##    a    a    b    a    b    a    a    a    a    b    a    b    a    b    b 
##  t96  t99 t100  t43  t16  t62  t63  t86  t87 t181 t182  t84 t101 t102  t68 
##    b    a    a    a    b    a    b    b    b    b    b    b    b    b    b 
## t112 t155 t156  t70  t21  t50 t169 t170  t61 t110 t189 t190  t13  t85  t89 
##    b    b    b    b    b    a    a    a    a    a    a    a    a    a    a 
##  t90  t40 t165 t166  t17  t18  t34  t97  t98  t72  t73 t173 t174  t48  t30 
##    a    b    b    b    a    a    b    b    b    a    a    a    a    b    a 
##  t69 t127 t128  t81 t179 t180 t159 t160  t29  t37  t38 t199 t200 t163 t164 
##    b    b    b    b    a    a    a    a    a    b    b    a    a    a    a 
##   t7  t22  t44  t92 t131 t132  t74 t191 t192 t114  t12  t31  t32  t41  t42 
##    a    b    a    b    b    b    b    b    b    b    b    b    b    b    b 
##   t4  t14  t33  t55 t193 t194  t26  t54  t79  t80 t106 t107 t138 t139 t133 
##    b    b    b    b    b    b    b    b    b    b    b    b    a    a    a 
##  t39  t75  t76   t6  t83 t140 t141  t56  t24  t25  t71 t171 t172 t134 t135 
##    a    a    a    b    a    a    a    a    b    b    b    b    b    b    b 
## t125 t183 t184  t93  t94 t121 t122  t35   t2 t197 t198  t88 t129 t130 t161 
##    b    b    b    b    b    b    b    b    b    b    b    b    b    b    b 
## t162   t3 t151 t152   t1 
##    b    b    b    b    a 
## Levels: a b
trees<-make.simmap(tree,x,model="ARD",Q="mcmc",nsim=200)
## Running MCMC burn-in. Please wait....
## Running 20000 generations of MCMC, sampling every 100 generations.
## Please wait....
## 
## make.simmap is simulating with a sample of Q from
## the posterior distribution
## 
## Mean Q from the posterior is
## Q =
##            a          b
## a -1.4479062  1.4479062
## b  0.5834969 -0.5834969
## and (mean) root node prior probabilities
## pi =
## [1] 0.5 0.5
## Done.
obj<-summary(trees)
ab<-obj$count[,2]
ba<-obj$count[,3]
class(ab)<-class(ba)<-"mcmc"
library(coda)
hpd.ab<-HPDinterval(ab)
hpd.ba<-HPDinterval(ba)
hpd.ab
##      lower upper
## var1    17    40
## attr(,"Probability")
## [1] 0.95
hpd.ba
##      lower upper
## var1     4    27
## attr(,"Probability")
## [1] 0.95

We can see that in this case at least the posterior distributions broadly overlap.

Let's go a step further, and visualize these distributions:

p.ab<-hist(ab,breaks=seq(min(c(ab,ba))-1.5,
    max(c(ab,ba))+1.5,1),plot=FALSE)
p.ba<-hist(ba,breaks=seq(min(c(ab,ba))-1.5,
    max(c(ab,ba))+1.5,1),plot=FALSE)
plot(p.ab$mids,p.ab$density,xlim=c(min(c(ab,ba))-1,
    max(c(ab,ba))+1),ylim=c(0,1.2*max(c(p.ab$density,
    p.ba$density))),
    type="n",xlab="number of changes",
    ylab="relative frequency across stochastic maps")
y2<-rep(p.ab$density,each=2)
y2<-y2[-length(y2)]
x2<-rep(p.ab$mids,each=2)[-1]
x3<-c(min(x2),x2,max(x2))
y3<-c(0,y2,0)
polygon(x3,y3,col=make.transparent("blue",0.3),border=FALSE)
lines(p.ab$mids,p.ab$density,type="s")
y2<-rep(p.ba$density,each=2)
y2<-y2[-length(y2)]
x2<-rep(p.ba$mids,each=2)[-1]
x3<-c(min(x2),x2,max(x2))
y3<-c(0,y2,0)
polygon(x3,y3,col=make.transparent("red",0.3),border=FALSE)
lines(p.ba$mids,p.ba$density,type="s")
add.simmap.legend(colors=setNames(c(make.transparent("blue",0.3),
    make.transparent("red",0.3)),c("a?b","b?a")),
    prompt=FALSE,x=min(c(ab,ba)),y=0.95*par()$usr[4])
lines(hpd.ab,rep(1.01*max(p.ab$density),2))
lines(rep(hpd.ab[1],2),c(1.01*max(p.ab$density),
    1.01*max(p.ab$density)-0.005))
lines(rep(hpd.ab[2],2),c(1.01*max(p.ab$density),
    1.01*max(p.ab$density)-0.005))
text(mean(hpd.ab),1.01*max(p.ab$density),"HPD(a?b)",
    pos=3)
lines(hpd.ba,rep(1.01*max(p.ba$density),2))
lines(rep(hpd.ba[1],2),c(1.01*max(p.ba$density),
    1.01*max(p.ba$density)-0.005))
lines(rep(hpd.ba[2],2),c(1.01*max(p.ba$density),
    1.01*max(p.ba$density)-0.005))
text(mean(hpd.ba),1.01*max(p.ba$density),"HPD(b?a)",
    pos=3)

plot of chunk unnamed-chunk-1

Pretty cool right?

Note that in a likelihood framework we can look at the problem a different way - say by testing the null hypothesis that backward & forward rates are equal, or that one or the other rate is zero.

Finally, the data for this exercise were simulated as follows:

Q<-matrix(c(-1.2,1.2,0.5,-0.5),2,2,byrow=TRUE)
rownames(Q)<-colnames(Q)<-c("a","b")
tree<-pbtree(n=200,scale=1)
x<-sim.history(tree,Q,anc="a")$states
x<-as.factor(x)

Thursday, October 27, 2016

Plotting multiple pie charts at nodes

An R-sig-phylo question asked:

“I have made an ancestral state reconstruction for three discrete traits using the R package "phytools”,and I don't know how to draw the three asr pies on the node in a single plot.“

Although this can get messy, it is possible to do this using the argument adj. For instance:

library(phytools)
plotTree(tree)
nodelabels(pie=p1,adj=c(0.5,0.9),cex=0.5)
nodelabels(pie=p2,adj=c(0.5,0.5),cex=0.5)
nodelabels(pie=p3,adj=c(0.5,0.1),cex=0.5)

plot of chunk unnamed-chunk-1

to arrange vertically. To arrange horizontally, do:

plotTree(tree)
nodelabels(pie=p1,adj=c(0.445,0.5),cex=0.5)
nodelabels(pie=p2,adj=c(0.5,0.5),cex=0.5)
nodelabels(pie=p3,adj=c(0.555,0.5),cex=0.5)

plot of chunk unnamed-chunk-2

The specific values for adj will depend on the scale of the tree & the number of tips.

Tuesday, October 25, 2016

Mapping sampled changes on the tree from stochastic character mapping

A cool (in my opinion) phytools plotting function that I've yet to see applied in a published study is markChanges, which can be used to visualize the posterior density of changes (as opposed to trait values) from a sample of stochastically mapped trees.

Here are a couple of different examples - one with lots of uncertainty about the location of trait changes on the tree (feeding mode in elopomorph eel evolution; from Collar et al. 2014); and another in which there should be much less (piscivory vs. non-piscivory in Centrarchidae).

First, the elopomorph dataset:

library(phytools)
eel.tree
## 
## Phylogenetic tree with 61 tips and 60 internal nodes.
## 
## Tip labels:
##  Moringua_edwardsi, Kaupichthys_nuchalis, Gorgasia_taiwanensis, Heteroconger_hassi, Venefica_proboscidea, Anguilla_rostrata, ...
## 
## Rooted; includes branch lengths.
feed.mode
##                 Albula_vulpes             Anguilla_anguilla 
##                       suction                       suction 
##              Anguilla_bicolor             Anguilla_japonica 
##                       suction                       suction 
##             Anguilla_rostrata                Ariosoma_anago 
##                       suction                       suction 
##           Ariosoma_balearicum           Ariosoma_shiroanago 
##                       suction                       suction 
##        Bathyuroconger_vicinus   Brachysomophis_crocodilinus 
##                          bite                          bite 
##              Conger_japonicus              Conger_myriaster 
##                       suction                       suction 
##               Conger_verreaxi                Conger_wilsoni 
##                       suction                       suction 
##        Congresox_talabonoides            Cynoponticus_ferox 
##                       suction                          bite 
##            Dysomma_anguillare                  Elops_saurus 
##                          bite                       suction 
##         Facciolella_gilbertii          Gavialiceps_taeniola 
##                          bite                          bite 
##         Gnathophis_longicauda          Gorgasia_taiwanensis 
##                       suction                       suction 
##         Gymnothorax_castaneus   Gymnothorax_flavimarginatus 
##                          bite                          bite 
##            Gymnothorax_kidako           Gymnothorax_moringa 
##                          bite                          bite 
## Gymnothorax_pseudothyrsoideus       Gymnothorax_reticularis 
##                          bite                          bite 
##            Heteroconger_hassi          Ichthyapus_ophioneus 
##                       suction                          bite 
##      Kaupichthys_hyoproroides          Kaupichthys_nuchalis 
##                          bite                          bite 
##          Megalops_cyprinoides             Moringua_edwardsi 
##                       suction                          bite 
##             Moringua_javanica              Muraenesox_bagio 
##                          bite                          bite 
##           Muraenesox_cinereus          Myrichthys_breviceps 
##                          bite                       suction 
##          Myrichthys_maculosus         Myrichthys_magnificus 
##                       suction                       suction 
##                Myrophis_vafer        Nemichthys_scolopaceus 
##                          bite                          bite 
##          Nettastoma_melanurum        Ophichthus_serpentinus 
##                          bite                       suction 
##          Ophichthus_zophochir        Oxyconger_leptognathus 
##                       suction                          bite 
## Parabathymyrus_macrophthalmus           Paraconger_notialis 
##                       suction                       suction 
##      Pisodonophis_cancrivorus          Poeciloconger_kapala 
##                          bite                       suction 
##         Rhinomuraena_quaesita          Rhynchoconger_flavus 
##                          bite                       suction 
##        Saurenchelys_fierasfer      Scolecenchelys_breviceps 
##                          bite                       suction 
##            Scuticaria_tigrina             Serrivomer_beanii 
##                          bite                          bite 
##             Serrivomer_sector        Simenchelys_parasitica 
##                          bite                       suction 
##            Uroconger_lepturus      Uropterygius_micropterus 
##                       suction                          bite 
##          Venefica_proboscidea 
##                          bite 
## Levels: bite suction
mtrees<-make.simmap(eel.tree,feed.mode,nsim=100)
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##                bite     suction
## bite    -0.01582783  0.01582783
## suction  0.01582783 -0.01582783
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##    bite suction 
##     0.5     0.5
## Done.
cols<-setNames(c("red","blue"),c("bite","suction"))
dotTree(eel.tree,feed.mode,colors=cols,fsize=0.7,ftype="i",
    legend=FALSE)
add.simmap.legend(x=0,y=-4,colors=cols,prompt=FALSE,
    vertical=FALSE,shape="circle")
nulo<-sapply(mtrees,markChanges,sapply(cols,
    make.transparent,0.1))
add.simmap.legend(colors=sapply(setNames(cols[2:1],
    c("bite->suction","suction->bite")),
    make.transparent,0.1),prompt=FALSE,x=50,y=-4,
    vertical=FALSE)

plot of chunk unnamed-chunk-1

Just to clarify the interpretation of this plot, the hash marks show the entire posterior sample of changes on the tree - across 100 samples from the posterior distribution. Any individual tree will have fewer (but still a lot of changes):

pd<-summary(mtrees)
pd
## 100 trees with a mapped discrete character with states:
##  bite, suction 
## 
## trees have 30.95 changes between states on average
## 
## changes are of the following types:
##      bite,suction suction,bite
## x->y        17.58        13.37
## 
## mean total time spent in each state is:
##              bite     suction    total
## raw  1155.3150660 828.7861757 1984.101
## prop    0.5822863   0.4177137    1.000

Now here is the second dataset. This one piscivory vs. non-piscovory in Centrarchidae:

cent.tree
## 
## Phylogenetic tree with 28 tips and 27 internal nodes.
## 
## Tip labels:
##  Acantharchus_pomotis, Lepomis_gibbosus, Lepomis_microlophus, Lepomis_punctatus, Lepomis_miniatus, Lepomis_auritus, ...
## 
## Rooted; includes branch lengths.
piscivory
##    Acantharchus_pomotis        Lepomis_gibbosus     Lepomis_microlophus 
##                    pisc                     non                     non 
##       Lepomis_punctatus        Lepomis_miniatus         Lepomis_auritus 
##                     non                     non                     non 
##      Lepomis_marginatus       Lepomis_megalotis         Lepomis_humilis 
##                     non                     non                     non 
##     Lepomis_macrochirus         Lepomis_gulosus     Lepomis_symmetricus 
##                     non                    pisc                     non 
##       Lepomis_cyanellus      Micropterus_coosae      Micropterus_notius 
##                    pisc                    pisc                    pisc 
##     Micropterus_treculi   Micropterus_salmoides  Micropterus_floridanus 
##                    pisc                    pisc                    pisc 
## Micropterus_punctulatus    Micropterus_dolomieu Centrarchus_macropterus 
##                    pisc                    pisc                     non 
##      Enneacantus_obesus       Pomoxis_annularis  Pomoxis_nigromaculatus 
##                     non                    pisc                    pisc 
##  Archolites_interruptus    Ambloplites_ariommus   Ambloplites_rupestris 
##                    pisc                    pisc                    pisc 
##   Ambloplites_cavifrons 
##                    pisc 
## Levels: non pisc
mtrees<-make.simmap(cent.tree,piscivory,nsim=100)
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##            non      pisc
## non  -4.228856  4.228856
## pisc  4.228856 -4.228856
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##  non pisc 
##  0.5  0.5
## Done.
cols<-setNames(c("red","blue"),c("pisc","non"))
dotTree(cent.tree,piscivory,colors=cols,ftype="i")
nulo<-sapply(mtrees,markChanges,sapply(cols,
    make.transparent,0.2))
add.simmap.legend(colors=sapply(setNames(cols[2:1],
    c("non->pisc","pisc->non")),
    make.transparent,0.2),prompt=FALSE,
    x=par()$usr[1]+0.1*max(nodeHeights(cent.tree)),
    y=-1.2,vertical=FALSE)

plot of chunk unnamed-chunk-3

For comparison to our previous analysis:

summary(mtrees)
## 100 trees with a mapped discrete character with states:
##  non, pisc 
## 
## trees have 6.95 changes between states on average
## 
## changes are of the following types:
##      non,pisc pisc,non
## x->y     3.27     3.68
## 
## mean total time spent in each state is:
##            non     pisc    total
## raw  0.6532209 1.041858 1.695079
## prop 0.3853630 0.614637 1.000000

That's it.