Saturday, February 10, 2018

Annotating a plotted phylogeny with multiple discrete characters at the tips of the tree

Today a phytools user contacted me to ask me about the possibility of plotting a grid of discrete character states, for various characters, to the right of the tip labels of a plotted tree. There are some functions that can already do related things in phytools, but it is also possible to do this 'manually' (that is, by scripting), as I'll show below.

First, let's imagine up some data consisting of (say) a phylogeny with 80 tips & 4 discrete characters containing 2-4 states as follows:

library(phytools)
tree<-rtree(n=80)
plotTree(tree,fsize=0.8)

plot of chunk unnamed-chunk-1

Q1<-matrix(c(-1,1,1,-1),2,2,dimnames=list(0:1,0:1))
x1<-sim.Mk(tree,Q1)
Q2<-matrix(c(-1,1,0,1,-2,1,0,1,-1),3,3,dimnames=list(letters[1:3],
    letters[1:3]))
x2<-sim.Mk(tree,Q2)
Q3<-matrix(c(-0.5,0.5,0.5,-0.5),2,2,dimnames=list(c("rough","smooth"),
    c("rough","smooth")))
x3<-sim.Mk(tree,Q3)
Q4<-matrix(c(-3,1,1,1,1,-3,1,1,1,1,-3,1,1,1,1,-3),4,4,
    dimnames=list(LETTERS[1:4],LETTERS[1:4]))
x4<-sim.Mk(tree,Q4)
X<-data.frame(x1,x2,x3,x4)
## here's our data:
X
##     x1 x2     x3 x4
## t17  0  b  rough  A
## t58  0  c  rough  C
## t48  0  b  rough  A
## t10  1  c  rough  C
## t1   1  a  rough  D
## t38  0  c smooth  D
## t69  1  b  rough  B
## t40  1  b  rough  D
## t4   1  a smooth  C
## t52  1  b  rough  B
## t13  0  a smooth  C
## t44  0  c smooth  D
## t70  0  c smooth  B
## t77  1  c smooth  D
## t22  0  c  rough  D
## t9   0  c  rough  D
## t20  0  b smooth  D
## t49  0  b smooth  A
## t15  1  b smooth  A
## t33  0  c  rough  B
## t62  0  b smooth  D
## t55  1  c  rough  B
## t79  1  c  rough  B
## t47  0  b smooth  A
## t23  0  c smooth  D
## t14  1  b  rough  B
## t6   1  a smooth  D
## t54  0  b smooth  A
## t80  1  c  rough  B
## t43  1  b  rough  B
## t36  1  c  rough  B
## t78  1  b  rough  D
## t53  1  c smooth  D
## t68  0  a  rough  B
## t74  1  c  rough  A
## t50  0  a  rough  C
## t51  1  c  rough  B
## t16  0  a smooth  C
## t57  1  a  rough  A
## t3   0  b  rough  C
## t21  0  b smooth  C
## t30  0  a smooth  D
## t35  0  b  rough  D
## t7   0  b smooth  A
## t39  0  a smooth  D
## t24  0  a smooth  D
## t56  0  c  rough  B
## t25  1  c  rough  D
## t65  0  a  rough  B
## t2   0  b  rough  C
## t64  1  b smooth  D
## t63  1  c  rough  C
## t26  0  a smooth  A
## t8   1  b smooth  D
## t66  1  a  rough  A
## t71  0  c smooth  C
## t73  0  b  rough  C
## t34  1  c  rough  C
## t18  1  b  rough  C
## t5   0  a  rough  B
## t75  0  a  rough  D
## t11  0  b  rough  C
## t41  0  a  rough  A
## t59  0  b  rough  C
## t46  1  a smooth  C
## t76  0  a  rough  A
## t67  0  c  rough  D
## t72  0  c  rough  C
## t45  1  b smooth  D
## t61  1  a  rough  A
## t27  1  b smooth  C
## t60  1  c smooth  C
## t31  1  c smooth  D
## t19  0  c smooth  D
## t32  0  a  rough  C
## t29  0  a  rough  B
## t28  0  b  rough  D
## t12  1  c  rough  A
## t37  0  b  rough  C
## t42  0  b  rough  D

Now let's try to plot it:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.5,xy[2]+0.5,xy[2]+0.5,xy[2]-0.5)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.5*asp,xy[1]-0.5*asp,xy[1]+0.5*asp,xy[1]+0.5*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-2

Obviously, we can modify any of the plotting parameters as we see fit to achieve different colors, etc.

In the example that was set to me the boxes were spaced by a small amount. Obviously, we can do this if we like (although with so many tips I don't know if it's a good idea.)

Let's try it anyway:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.42,xy[2]+0.42,xy[2]+0.42,xy[2]-0.42)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.42*asp,xy[1]-0.42*asp,xy[1]+0.42*asp,xy[1]+0.42*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-3

That's it.

No comments:

Post a Comment