Wednesday, January 22, 2025

A general clade box function for phytools

A few days ago, I posted some code illustrating the geometry of drawing a “box” around a clade on an arc- or fan-style plotted tree in R.

Here’s my tweet about the post which includes a nice illustration of its application:

I asked a colleague if he thought this might be worth adding to phytools and his response was “yes, definitely useful.” The working function for that is below.

cladebox<-function(node,col="#0000FF40",...){
  pp<-get("last_plot.phylo",envir=.PlotPhyloEnv)
  phy<-list(edge=pp$edge,Nnode=pp$Nnode,
    tip.label=1:pp$Ntip)
  class(phy)<-"phylo"
  dd<-getDescendants(phy,node)
  dd<-dd[which(dd<=Ntip(phy))]
  if(hasArg(tip_buffer)) tip_buffer<-list(...)$tip_buffer
  else tip_buffer<-NULL
  if(pp$type%in%c("fan","arc")){
    if(node==(Ntip(phy)+1)) stem<-Inf
    else {
      parent<-phy$edge[which(phy$edge[,2]==node),1]
      stem<-sqrt(pp$xx[node]^2+pp$yy[node]^2)-
        sqrt(pp$xx[parent]^2+pp$yy[parent]^2)
    }
    if(hasArg(npoints)) npoints<-list(...)$npoints
    else npoints<-30
    if(hasArg(part)) part<-list(...)$part
    else {
      if(all(pp$xx>=0)&&all(pp$yy>=0)) part<-0.25
      else if(all(pp$yy>=0)) part<-0.5
      else part<-1
    }
    theta<-atan(pp$yy[dd]/pp$xx[dd])
    theta[pp$xx[dd]<0]<-pi+theta[pp$xx[dd]<0]
    theta[pp$xx[dd]>0&pp$yy[dd]<0]<-
      2*pi+theta[pp$xx[dd]>0&pp$yy[dd]<0]
    max_theta<-max(theta)+0.4*2*pi/Ntip(phy)*
      max(c(part,0.5))
    min_theta<-min(theta)-0.4*2*pi/Ntip(phy)*
      max(c(part,0.5))
    max_h<-max(sqrt(pp$xx^2+pp$yy^2))
    ff<-if(part>0.5) 1.6 else 
      if(0.25<part&&part<=0.5) 1.2 else 0.8
    if(is.null(tip_buffer)&&pp$show.tip.label){
      tip_buffer<-ff*(pp$x.lim[2]-max_h)
    } else tip_buffer<-0.02*max_h
    h1<-sqrt((pp$xx[node]^2+pp$yy[node]^2))-
      min(c(0.5*stem,0.02*max_h))
    h2<-max(sqrt(pp$xx[dd]^2+pp$yy[dd]^2))+tip_buffer
    tt<-seq(min_theta,max_theta,length.out=npoints)
    out_arc.x<-h2*cos(tt)
    out_arc.y<-h2*sin(tt)
    in_arc.x<-h1*cos(tt)
    in_arc.y<-h1*sin(tt)
    xx<-c(out_arc.x,in_arc.x[npoints:1])
    yy<-c(out_arc.y,in_arc.y[npoints:1])
    polygon(xx,yy,lwd=5,col=col,border=FALSE)
  } else if(pp$type%in%c("phylogram","cladogram")){
    if(pp$direction=="rightwards"){
      if(node==(Ntip(phy)+1)) stem<-0
      else {
        parent<-phy$edge[which(phy$edge[,2]==node),1]
        stem<-pp$xx[node]-pp$xx[parent]
      }
      max_h<-max(pp$xx)
      tip_buffer<-
        if(is.null(tip_buffer)&&pp$show.tip.label) 
          pp$x.lim[2]-max_h
        else 0.02*max_h
      h1<-pp$xx[node]-min(c(0.5*stem,0.02*max_h))
      h2<-max(pp$xx[dd])+tip_buffer
      xx<-c(h1,h2,h2,h1)
      yy<-c(rep(min(pp$yy[dd])-0.4,2),
        rep(max(pp$yy[dd])+0.4,2))
      polygon(xx,yy,border=FALSE,col=col)
    } else if(pp$direction=="leftwards"){
      if(node==(Ntip(phy)+1)) stem<-0
      else {
        parent<-phy$edge[which(phy$edge[,2]==node),1]
        stem<-pp$xx[parent]-pp$xx[node]
      }
      max_h<-max(pp$xx)
      tip_buffer<-
        if(is.null(tip_buffer)&&pp$show.tip.label) 
          pp$x.lim[1]
      else -0.02*max_h
      h1<-pp$xx[node]+min(c(0.02*max_h,0.5*stem))
      h2<-max(pp$xx[dd])+tip_buffer
      xx<-c(h1,h2,h2,h1)
      yy<-c(rep(min(pp$yy[dd])-0.4,2),
        rep(max(pp$yy[dd])+0.4,2))
      polygon(xx,yy,border=FALSE,col=col)
    } else if(pp$direction=="upwards"){
      if(node==(Ntip(phy)+1)) stem<-0
      else {
        parent<-phy$edge[which(phy$edge[,2]==node),1]
        stem<-pp$yy[node]-pp$yy[parent]
      }
      max_h<-max(pp$yy)
      tip_buffer<-
        if(is.null(tip_buffer)&&pp$show.tip.label) 
          pp$y.lim[2]-max_h
      else 0.02*max_h
      xx<-c(rep(min(pp$xx[dd])-0.4,2),
        rep(max(pp$xx[dd])+0.4,2))
      h1<-pp$yy[node]-min(c(0.02*max_h,0.5*stem))
      h2<-max(pp$yy[dd])+tip_buffer
      yy<-c(h1,h2,h2,h1)
      polygon(xx,yy,border=FALSE,col=col)
    } else if(pp$direction=="downwards"){
      if(node==(Ntip(phy)+1)) stem<-0
      else {
        parent<-phy$edge[which(phy$edge[,2]==node),1]
        stem<-pp$yy[parent]-pp$yy[node]
      }
      max_h<-max(pp$yy)
      tip_buffer<-
        if(is.null(tip_buffer)&&pp$show.tip.label) 
          pp$y.lim[1]
      else -0.02*max_h
      xx<-c(rep(min(pp$xx[dd])-0.4,2),
        rep(max(pp$xx[dd])+0.4,2))
      h1<-pp$yy[node]+min(c(0.02*max_h,0.5*stem))
      h2<-max(pp$yy[dd])+tip_buffer
      yy<-c(h1,h2,h2,h1)
      polygon(xx,yy,border=FALSE,col=col)
    }
  }
  invisible(list(x=xx,y=yy))
}

I’m not going to peel through all the layers of the onion, but some of the major ideas include that the method will plot an appropriate clade box depending on our plot style, and leave a buffer around the box whose width depends on the amount of space our plotting label left in our device for the tip labels. It even adjusts the “rootward” buffer depending on how closely spaced the MRCA of our focal is to its parent.

Let’s test it out.

library(phytools)
data("bonyfish.tree")
bonyfish.tree
## 
## Phylogenetic tree with 90 tips and 89 internal nodes.
## 
## Tip labels:
##   Xenomystus_nigri, Chirocentrus_dorab, Talismania_bifurcata, Alepocephalus_tenebrosus, Misgurnus_bipartitus, Opsariichthys_bidens, ...
## 
## Rooted; includes branch length(s).
plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.6,ftype="i")
nodelabels(cex=0.5,bg="white")

plot of chunk unnamed-chunk-4

Unfortunately, I don’t have any interesting clades to label in this tree, so what I’m going to do is draw boxes around the clades descended from the nodes numbered 96, 112, 126, 139, and 168 of this plot as follows.

plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.6,ftype="i")
cladebox(node=96)
cladebox(node=112)
cladebox(node=139)
cladebox(node=168)

plot of chunk unnamed-chunk-5

We can also draw our clade boxes first (as I illustrated last time), and then overlay our tree. Here’s what that might look like for the same tree & set of clade boxes.

plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.6,ftype="i",plot=FALSE)
cols<-RColorBrewer::brewer.pal(n=4,"Pastel1")
cladebox(node=96,col=cols[1])
cladebox(node=112,col=cols[2])
cladebox(node=139,col=cols[3])
cladebox(node=168,col=cols[4])
plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.6,ftype="i",add=TRUE)

plot of chunk unnamed-chunk-6

Of course, the point of my prior post on this topic was how to solve the much more complicated task of overlaying clade boxes on top of a circular fan or arc-style tree.

Naturally, our function does that too.

par(fg="transparent")
plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.7,ftype="i",type="arc",arc_height=0.25,
  color="transparent")
par(fg="black")
cols<-RColorBrewer::brewer.pal(n=4,"Pastel1")
cladebox(node=96,col=cols[1])
cladebox(node=112,col=cols[2])
cladebox(node=139,col=cols[3])
cladebox(node=168,col=cols[4])
plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.7,ftype="i",type="arc",arc_height=0.25,
  add=TRUE)

plot of chunk unnamed-chunk-7

The function also invisibly returns the coordinates of the polygons, so we can even overlay them in a custom way if we want.

par(fg="transparent")
plotTree(bonyfish.tree,direction="upwards",
  fsize=0.7,ftype="i",type="arc",arc_height=0.25,
  lwd=3,color="grey")
par(fg="black")
cols<-RColorBrewer::brewer.pal(n=4,"Pastel1")
cb_96<-cladebox(node=96,col="transparent")
polygon(cb_96,col=make.transparent(cols[1],0.4),
  density=20,border=cols[1],lwd=2)
cb_112<-cladebox(node=112,col="transparent")
polygon(cb_112,col=make.transparent(cols[2],0.4),
  density=20,border=cols[2],lwd=2)
cb_139<-cladebox(node=139,col="transparent")
polygon(cb_139,col=make.transparent(cols[3],0.4),
  density=20,border=cols[3],lwd=2)
cb_168<-cladebox(node=168,col="transparent")
polygon(cb_168,col=make.transparent(cols[4],0.4),
  density=20,border=cols[4],lwd=2)
plotTree(bonyfish.tree,direction="upwards",lwd=1,
  fsize=0.7,ftype="i",type="arc",arc_height=0.25,
  add=TRUE)

plot of chunk unnamed-chunk-8

Overall, I think this is kind of cool.

No comments:

Post a Comment

Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.