Saturday, February 10, 2018

Annotating a plotted phylogeny with multiple discrete characters at the tips of the tree

Today a phytools user contacted me to ask me about the possibility of plotting a grid of discrete character states, for various characters, to the right of the tip labels of a plotted tree. There are some functions that can already do related things in phytools, but it is also possible to do this 'manually' (that is, by scripting), as I'll show below.

First, let's imagine up some data consisting of (say) a phylogeny with 80 tips & 4 discrete characters containing 2-4 states as follows:

library(phytools)
tree<-rtree(n=80)
plotTree(tree,fsize=0.8)

plot of chunk unnamed-chunk-1

Q1<-matrix(c(-1,1,1,-1),2,2,dimnames=list(0:1,0:1))
x1<-sim.Mk(tree,Q1)
Q2<-matrix(c(-1,1,0,1,-2,1,0,1,-1),3,3,dimnames=list(letters[1:3],
    letters[1:3]))
x2<-sim.Mk(tree,Q2)
Q3<-matrix(c(-0.5,0.5,0.5,-0.5),2,2,dimnames=list(c("rough","smooth"),
    c("rough","smooth")))
x3<-sim.Mk(tree,Q3)
Q4<-matrix(c(-3,1,1,1,1,-3,1,1,1,1,-3,1,1,1,1,-3),4,4,
    dimnames=list(LETTERS[1:4],LETTERS[1:4]))
x4<-sim.Mk(tree,Q4)
X<-data.frame(x1,x2,x3,x4)
## here's our data:
X
##     x1 x2     x3 x4
## t17  0  b  rough  A
## t58  0  c  rough  C
## t48  0  b  rough  A
## t10  1  c  rough  C
## t1   1  a  rough  D
## t38  0  c smooth  D
## t69  1  b  rough  B
## t40  1  b  rough  D
## t4   1  a smooth  C
## t52  1  b  rough  B
## t13  0  a smooth  C
## t44  0  c smooth  D
## t70  0  c smooth  B
## t77  1  c smooth  D
## t22  0  c  rough  D
## t9   0  c  rough  D
## t20  0  b smooth  D
## t49  0  b smooth  A
## t15  1  b smooth  A
## t33  0  c  rough  B
## t62  0  b smooth  D
## t55  1  c  rough  B
## t79  1  c  rough  B
## t47  0  b smooth  A
## t23  0  c smooth  D
## t14  1  b  rough  B
## t6   1  a smooth  D
## t54  0  b smooth  A
## t80  1  c  rough  B
## t43  1  b  rough  B
## t36  1  c  rough  B
## t78  1  b  rough  D
## t53  1  c smooth  D
## t68  0  a  rough  B
## t74  1  c  rough  A
## t50  0  a  rough  C
## t51  1  c  rough  B
## t16  0  a smooth  C
## t57  1  a  rough  A
## t3   0  b  rough  C
## t21  0  b smooth  C
## t30  0  a smooth  D
## t35  0  b  rough  D
## t7   0  b smooth  A
## t39  0  a smooth  D
## t24  0  a smooth  D
## t56  0  c  rough  B
## t25  1  c  rough  D
## t65  0  a  rough  B
## t2   0  b  rough  C
## t64  1  b smooth  D
## t63  1  c  rough  C
## t26  0  a smooth  A
## t8   1  b smooth  D
## t66  1  a  rough  A
## t71  0  c smooth  C
## t73  0  b  rough  C
## t34  1  c  rough  C
## t18  1  b  rough  C
## t5   0  a  rough  B
## t75  0  a  rough  D
## t11  0  b  rough  C
## t41  0  a  rough  A
## t59  0  b  rough  C
## t46  1  a smooth  C
## t76  0  a  rough  A
## t67  0  c  rough  D
## t72  0  c  rough  C
## t45  1  b smooth  D
## t61  1  a  rough  A
## t27  1  b smooth  C
## t60  1  c smooth  C
## t31  1  c smooth  D
## t19  0  c smooth  D
## t32  0  a  rough  C
## t29  0  a  rough  B
## t28  0  b  rough  D
## t12  1  c  rough  A
## t37  0  b  rough  C
## t42  0  b  rough  D

Now let's try to plot it:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.5,xy[2]+0.5,xy[2]+0.5,xy[2]-0.5)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.5*asp,xy[1]-0.5*asp,xy[1]+0.5*asp,xy[1]+0.5*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-2

Obviously, we can modify any of the plotting parameters as we see fit to achieve different colors, etc.

In the example that was set to me the boxes were spaced by a small amount. Obviously, we can do this if we like (although with so many tips I don't know if it's a good idea.)

Let's try it anyway:

library(RColorBrewer)
tree<-reorder(tree,"cladewise")
X<-X[tree$tip.label,]
plotTree(tree,plot=FALSE)
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
plotTree(tree,lwd=1,ylim=c(0,obj$y.lim[2]*1.05),xlim=c(0,obj$x.lim[2]*1.3),
    ftype="off")
obj<-get("last_plot.phylo",envir=.PlotPhyloEnv)
h<-max(obj$xx)
fsize<-0.6
for(i in 1:Ntip(tree)){
    lines(c(obj$xx[i],h),rep(obj$yy[i],2),lty="dotted")
    text(h,obj$yy[i],tree$tip.label[i],cex=fsize,pos=4,font=3,offset=0.1)
}
s<-max(fsize*strwidth(tree$tip.label))
start.x<-1.05*h+s
palettes<-c("OrRd","PuOr","RdYlBu","Spectral")
cols<-list()
for(i in 1:ncol(X)){
    text(start.x,max(obj$yy)+1,paste("trait",colnames(X)[i]),pos=4,srt=60,
        cex=0.8,offset=0)
    cols[[i]]<-setNames(sample(brewer.pal(max(3,length(levels(X[[i]]))),
        palettes[i]),length(levels(X[[i]]))),levels(X[[i]]))
    for(j in 1:nrow(X)){
        xy<-c(start.x,obj$yy[j])
        y<-c(xy[2]-0.42,xy[2]+0.42,xy[2]+0.42,xy[2]-0.42)
        asp<-(par()$usr[2]-par()$usr[1])/(par()$usr[4]-par()$usr[3])*
            par()$pin[2]/par()$pin[1]
        x<-c(xy[1]-0.42*asp,xy[1]-0.42*asp,xy[1]+0.42*asp,xy[1]+0.42*asp)
        polygon(x,y,col=cols[[i]][as.character(X[[i]][j])])
    }
    start.x<-start.x+2*asp
}
start.y<-max(obj$yy)
for(i in 1:ncol(X)){
    text(start.x,start.y,paste("trait",colnames(X)[i]),pos=4,cex=0.9,
        offset=0)
    add.simmap.legend(colors=cols[[i]],shape="square",prompt=FALSE,
        x=start.x,y=start.y-2*strheight("W")*0.9,fsize=0.9)
    start.y<-start.y-1.5*0.9*strheight("W")*(length(cols[[i]])-1)-6
}

plot of chunk unnamed-chunk-3

That's it.

2 comments:

  1. The BestRay-Ban UK. Here you can find almost swiss brand replica watches.Replica watches,one of the most famous brands,Best active-sunglasses/ ,Specialities watch for sale,Fast delivery and free shipping!

    ReplyDelete
  2. Do you think similar would be achievable for a continuous character signified by a colour gradient?

    ReplyDelete