Friday, November 6, 2015

Ancestral state estimation for continuous characters with some nodes fixed

I just added the method to fix some ancestral states during ML ancestral state estimation for continuous characters that I used in contMap and previously described in the pages of this blog to the phytools function fastAnc. Interested readers can look at the update here, and it can also be obtained by updating phytools from GitHub.

Here's a quick example for simulated data in which the internal states are known:

library(phytools)
## simulate a tree
tree<-pbtree(n=26,tip.label=LETTERS)
plotTree(tree)

plot of chunk unnamed-chunk-1

## simulate data for tips & nodes
xa<-fastBM(tree,internal=TRUE)
a<-xa[as.character(1:tree$Nnode+Ntip(tree))]
a ## simulated ancestral states
##          27          28          29          30          31          32 
##  0.00000000 -0.56601239 -0.14105673 -1.19661359 -1.66581973 -0.93686310 
##          33          34          35          36          37          38 
## -0.97710948 -2.14034585 -0.53578250  0.09301371 -0.46628240 -0.94499655 
##          39          40          41          42          43          44 
## -1.56438497  0.24762819 -2.29351327 -1.04066230 -1.40266486 -2.00346322 
##          45          46          47          48          49          50 
##  0.08526085  0.28998116  1.42932620  1.77921782  1.58183599  1.31419370 
##          51 
## -1.16905491
x<-xa[tree$tip.label] ## simulated tip data
x
##          A          B          C          D          E          F 
##  0.5683050 -1.5702611 -0.6006650 -0.8855694 -2.3290004 -2.2543046 
##          G          H          I          J          K          L 
##  0.2865251 -0.7192771 -1.2468277 -1.5512149  0.1103867  0.5530111 
##          M          N          O          P          Q          R 
## -2.4353686 -2.5456411 -1.3246120 -0.7733470 -1.4208267 -0.1892435 
##          S          T          U          V          W          X 
##  0.6573859  1.3248485  1.7718648  1.9577381  1.3256986  0.9330483 
##          Y          Z 
## -1.5227223 -0.7031367
## visualize
obj<-contMap(tree,x,anc.states=a,plot=FALSE)
plot(obj)

plot of chunk unnamed-chunk-1

Now let's proceed to estimate ancestral states - first with no known states, then with some, then (in the trivial case) with all:

## first with no internal state data
anc1<-fastAnc(tree,x,CI=TRUE)
anc1
## Ancestral character estimates using fastAnc:
##        27        28        29        30        31        32        33 
## -0.458331 -0.555336 -0.531588 -0.613868 -0.944556 -1.006013 -1.299123 
##        34        35        36        37        38        39        40 
## -2.246533 -0.306729 -0.248593 -0.687350 -0.724508 -1.116524 -1.087959 
##        41        42        43        44        45        46        47 
## -2.407758 -0.965980 -1.127005 -1.085506  0.054002  0.370668  1.315666 
##        48        49        50        51 
##  1.616995  1.585510  1.231706 -0.886161 
## 
## Lower & upper 95% CIs:
##        lower     upper
## 27 -1.732581  0.815918
## 28 -1.618844  0.508172
## 29 -1.568306  0.505131
## 30 -1.680216  0.452481
## 31 -1.940270  0.051157
## 32 -1.984038 -0.027989
## 33 -2.253199 -0.345047
## 34 -2.587840 -1.905225
## 35 -1.420263  0.806805
## 36 -1.348681  0.851494
## 37 -1.331019 -0.043681
## 38 -1.280478 -0.168539
## 39 -1.587909 -0.645139
## 40 -2.170496 -0.005422
## 41 -2.909779 -1.905736
## 42 -1.978196  0.046235
## 43 -1.928715 -0.325295
## 44 -1.771882 -0.399130
## 45 -0.966783  1.074788
## 46 -0.593459  1.334794
## 47  0.864155  1.767176
## 48  1.310525  1.923465
## 49  1.317635  1.853386
## 50  0.829624  1.633787
## 51 -1.607091 -0.165230
## now with 8 (of 25) internal nodes known
anc.states<-sample(a,8)
anc.states
##         37         44         49         28         30         39 
## -0.4662824 -2.0034632  1.5818360 -0.5660124 -1.1966136 -1.5643850 
##         47         34 
##  1.4293262 -2.1403459
anc2<-fastAnc(tree,x,CI=TRUE,anc.states=anc.states)
anc2
## Ancestral character estimates using fastAnc:
##        27        28        29        30        31        32        33 
## -0.462569 -0.566012 -0.626395 -1.196614 -1.243811 -1.227162 -1.383056 
##        34        35        36        37        38        39        40 
## -2.140346 -0.324441 -0.199393 -0.466282 -0.779036 -1.564385 -1.262843 
##        41        42        43        44        45        46        47 
## -2.418075 -1.243738 -1.710369 -2.003463  0.083770  0.411412  1.429326 
##        48        49        50        51 
##  1.631835  1.581836  1.294140 -0.880376 
## 
## Lower & upper 95% CIs:
##        lower     upper
## 27 -1.419954  0.494815
## 28 -0.566012 -0.566012
## 29 -1.133676 -0.119115
## 30 -1.196614 -1.196614
## 31 -2.136108 -0.351515
## 32 -2.181969 -0.272355
## 33 -2.388912 -0.377201
## 34 -2.140346 -2.140346
## 35 -1.385475  0.736594
## 36 -1.326371  0.927585
## 37 -0.466282 -0.466282
## 38 -1.195829 -0.362243
## 39 -1.564385 -1.564385
## 40 -2.354270 -0.171416
## 41 -2.965873 -1.870277
## 42 -2.295524 -0.191951
## 43 -2.441551 -0.979187
## 44 -2.003463 -2.003463
## 45 -1.002757  1.170297
## 46 -0.615960  1.438785
## 47  1.429326  1.429326
## 48  1.379314  1.884356
## 49  1.581836  1.581836
## 50  0.948393  1.639887
## 51 -1.666475 -0.094277

It's probably worth noting that for any node with a fixed state the 95% CI has a width of zero - just as we'd expect.

Finally:

anc3<-fastAnc(tree,x,CI=TRUE,anc.states=a)
anc3
## Ancestral character estimates using fastAnc:
##        27        28        29        30        31        32        33 
##  0.000000 -0.566012 -0.141057 -1.196614 -1.665820 -0.936863 -0.977109 
##        34        35        36        37        38        39        40 
## -2.140346 -0.535783  0.093014 -0.466282 -0.944997 -1.564385  0.247628 
##        41        42        43        44        45        46        47 
## -2.293513 -1.040662 -1.402665 -2.003463  0.085261  0.289981  1.429326 
##        48        49        50        51 
##  1.779218  1.581836  1.314194 -1.169055 
## 
## Lower & upper 95% CIs:
##        lower     upper
## 27  0.000000  0.000000
## 28 -0.566012 -0.566012
## 29 -0.141057 -0.141057
## 30 -1.196614 -1.196614
## 31 -1.665820 -1.665820
## 32 -0.936863 -0.936863
## 33 -0.977109 -0.977109
## 34 -2.140346 -2.140346
## 35 -0.535783 -0.535783
## 36  0.093014  0.093014
## 37 -0.466282 -0.466282
## 38 -0.944997 -0.944997
## 39 -1.564385 -1.564385
## 40  0.247628  0.247628
## 41 -2.293513 -2.293513
## 42 -1.040662 -1.040662
## 43 -1.402665 -1.402665
## 44 -2.003463 -2.003463
## 45  0.085261  0.085261
## 46  0.289981  0.289981
## 47  1.429326  1.429326
## 48  1.779218  1.779218
## 49  1.581836  1.581836
## 50  1.314194  1.314194
## 51 -1.169055 -1.169055
## now
plot(a,anc1$ace,xlab="true states",ylab="estimated states",
    main="No internal nodes fixed") ## no internal node information
lines(c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    lty="dashed") ## 1:1 line

plot of chunk unnamed-chunk-3

plot(a,anc2$ace,xlab="true states",ylab="estimated states",
    main="Some internal nodes fixed") ## some internal node information
lines(c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    lty="dashed") ## 1:1 line

plot of chunk unnamed-chunk-3

plot(a,anc3$ace,xlab="true states",ylab="estimated states",
    main="All internal nodes fixed") ## all internal node information
lines(c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    c(min(par()$usr[c(1,3)]),max(par()$usr[c(2,4)])),
    lty="dashed") ## 1:1 line

plot of chunk unnamed-chunk-3

That's it.

No comments:

Post a Comment

Note: due to the very large amount of spam, all comments are now automatically submitted for moderation.