Saturday, February 10, 2018

Annotating a plotted phylogeny with multiple discrete characters at the tips of the tree

Today a phytools user contacted me to ask me about the possibility of plotting a grid of discrete character states, for various characters, to the right of the tip labels of a plotted tree. There are some functions that can already do related things in phytools, but it is also possible to do this 'manually' (that is, by scripting), as I'll show below.

First, let's imagine up some data consisting of (say) a phylogeny with 80 tips & 4 discrete characters containing 2-4 states as follows:

library(phytools)
tree<-rtree(n=80)
plotTree(tree,fsize=0.8)

plot of chunk unnamed-chunk-1

Q1<-matrix(c(-1,1,1,-1),2,2,dimnames=list(0:1,0:1))
x1<-sim.Mk(tree,Q1)
Q2<-matrix(c(-1,1,0,1,-2,1,0,1,-1),3,3,dimnames=list(letters[1:3],
    letters[1:3]))
x2<-sim.Mk(tree,Q2)
Q3<-matrix(c(-0.5,0.5,0.5,-0.5),2,2,dimnames=list(c("rough","smooth"),
    c("rough","smooth")))
x3<-sim.Mk(tree,Q3)
Q4<-matrix(c(-3,1,1,1,1,-3,1,1,1,1,-3,1,1,1,1,-3),4,4,
    dimnames=list(LETTERS[1:4],LETTERS[1:4]))
x4<-sim.Mk(tree,Q4)
X<-data.frame(x1,x2,x3,x4)
## here's our data:
X
##     x1 x2     x3 x4
## t17  0  b  rough  A
## t58  0  c  rough  C
## t48  0  b  rough  A
## t10  1  c  rough  C
## t1   1  a  rough  D
## t38  0  c smooth  D
## t69  1  b  rough  B
## t40  1  b  rough  D
## t4   1  a smooth  C
## t52  1  b  rough  B
## t13  0  a smooth  C
## t44  0  c smooth  D
## t70  0  c smooth  B
## t77  1  c smooth  D
## t22  0  c  rough  D
## t9   0  c  rough  D
## t20  0  b smooth  D
## t49  0  b smooth  A
## t15  1  b smooth  A
## t33  0  c  rough  B
## t62  0  b smooth  D
## t55  1  c  rough  B
## t79  1  c  rough  B
## t47  0  b smooth  A
## t23  0  c smooth  D
## t14  1  b  rough  B
## t6   1  a smooth  D
## t54  0  b smooth  A
## t80  1  c  rough  B
## t43  1  b  rough  B
## t36  1  c  rough  B
## t78  1  b  rough  D
## t53  1  c smooth  D
## t68  0  a  rough  B
## t74  1  c  rough  A
## t50  0  a  rough  C
## t51  1  c  rough  B
## t16  0  a smooth  C
## t57  1  a  rough  A
## t3   0  b  rough  C
## t21  0  b smooth  C
## t30  0  a smooth  D
## t35  0  b  rough  D
## t7   0  b smooth  A
## t39  0  a smooth  D
## t24  0  a smooth  D
## t56  0  c  rough  B
## t25  1  c  rough  D
## t65  0  a  rough  B
## t2   0  b  rough  C
## t64  1  b smooth  D
## t63  1  c  rough  C
## t26  0  a smooth  A
## t8   1  b smooth  D
## t66  1  a  rough  A
## t71  0  c smooth  C
## t73  0  b  rough  C
## t34  1  c  rough  C
## t18  1  b  rough  C
## t5   0  a  rough  B
## t75  0  a  rough  D
## t11  0  b  rough  C
## t41  0  a  rough  A
## t59  0  b  rough  C
## t46  1  a smooth  C
## t76  0  a  rough  A
## t67  0  c  rough  D
## t72  0  c  rough  C
## t45  1  b smooth  D
## t61  1  a  rough  A
## t27  1  b smooth  C
## t60  1  c smooth  C
## t31  1  c smooth  D
## t19  0  c smooth  D
## t32  0  a  rough  C
## t29  0  a  rough  B
## t28  0  b  rough  D
## t12  1  c  rough  A
## t37  0  b  rough  C
## t42  0  b  rough  D

Now let's try to plot it:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.5,xy[2]+0.5,xy[2]+0.5,xy[2]-0.5)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.5*asp,xy[1]-0.5*asp,xy[1]+0.5*asp,xy[1]+0.5*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-2

Obviously, we can modify any of the plotting parameters as we see fit to achieve different colors, etc.

In the example that was set to me the boxes were spaced by a small amount. Obviously, we can do this if we like (although with so many tips I don't know if it's a good idea.)

Let's try it anyway:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.42,xy[2]+0.42,xy[2]+0.42,xy[2]-0.42)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.42*asp,xy[1]-0.42*asp,xy[1]+0.42*asp,xy[1]+0.42*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-3

That's it.

2 comments:

  1. Do you think similar would be achievable for a continuous character signified by a colour gradient?

    ReplyDelete
  2. HI Liam, I am your fan thanks for this awesome blog. I am new in the world of phylogenetic methods but I am using your blog as guide.
    Can you maybe elucidate how to construct the figure presented in this paper??
    https://onlinelibrary.wiley.com/doi/pdf/10.1111/evo.13559

    Thanks once again,
    cheers Rick

    ReplyDelete

Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.