Nearly two years ago I posted about fitting a PGLS model taking into account sampling variance (uncertainty) in y.
Inspired by a student dataset & problem here at los Andes, the following is a slightly differently implemented solution that should lead to the same result:
pgls.SEy<-function(model,data,corClass=corBrownian,tree=tree,
se=NULL,method=c("REML","ML"),interval=c(0,1000),...){
corfunc<-corClass
## preliminaries
data<-data[tree$tip.label,]
if(is.null(se)) se<-setNames(rep(0,Ntip(tree),
tree$tip.label))
## likelihood function
lk<-function(sig2e,data,tree,model,ve){
tree$edge.length<-tree$edge.length*sig2e
ii<-sapply(1:Ntip(tree),function(x,e) which(e==x),
e=tree$edge[,2])
tree$edge.length[ii]<-tree$edge.length[ii]+
ve[tree$tip.label]
v<-diag(vcv(tree))
vf<-varFixed(~v)
COR<-corfunc(1,tree,...)
fit<-gls(model,data=data,correlation=COR,weights=vf)
-logLik(fit)
}
## estimate sig2[e]
fit<-optimize(lk,interval=interval,
data=data,tree=tree,model=model,ve=se^2)
tree$edge.length<-tree$edge.length*fit$minimum
ii<-sapply(1:Ntip(tree),function(x,e) which(e==x),
e=tree$edge[,2])
tree$edge.length[ii]<-tree$edge.length[ii]+
se[tree$tip.label]^2
v<-diag(vcv(tree))
vf<-varFixed(~v)
## fit & return model
gls(model,data,correlation=corfunc(1,tree),weights=vf,
method=method)
}
Now let's try to apply it:
library(phytools)
library(nlme)
## here's our data
tree
##
## Phylogenetic tree with 100 tips and 99 internal nodes.
##
## Tip labels:
## t65, t66, t29, t30, t13, t78, ...
##
## Rooted; includes branch lengths.
X
## ye x1 x2
## t65 -0.77900192 -0.933177258 -0.50289849
## t66 0.84646891 -0.360779073 -0.67343194
## t29 1.95846040 -0.004324650 -0.86464732
## t30 0.24373967 -0.283674892 -0.05950573
## t13 -5.54444003 -0.260222231 1.69823520
## t78 1.88453970 -0.030577008 -0.12238217
## t79 -0.59317590 -0.242023433 0.32537283
## t5 1.17182511 -0.351473922 -0.86081240
## t26 3.36099814 -0.417929491 -0.87860040
## t67 -0.21357307 -0.494481915 -1.04212209
## t68 0.73039688 -0.588127345 -0.71373910
## t86 2.81142943 -0.045597241 -1.28701218
## t87 4.89158144 -0.284624727 -1.19177925
## t25 2.69692032 -0.099326903 -1.26492190
## t16 1.27290355 -0.329391279 -0.69189543
## t47 -0.57529243 -0.998675854 -0.60213264
## t48 -4.91449264 -1.135659972 -0.90384083
## t10 2.88459685 -0.170786099 -0.84404497
## t20 1.11432889 -0.116176393 -1.43439903
## t63 0.67056966 -0.271788410 -0.48803237
## t64 2.25646332 -0.122474607 -0.87832126
## t53 2.52828710 -0.188851979 -0.48504631
## t21 1.78891253 -0.666761834 -1.27216994
## t23 1.45124819 -0.263860837 -1.16042313
## t49 1.03438725 -0.143231501 -1.19002853
## t50 0.61071781 0.258295208 -0.60080140
## t80 1.00845570 -0.326915114 0.10754248
## t81 -1.01437643 -0.507894799 0.26522058
## t19 -1.71820534 -0.811681993 -0.26318414
## t84 0.71358338 -0.737986864 -1.02763382
## t85 1.56004661 -0.712068874 -0.77728970
## t40 -0.38524799 -0.933452671 -0.83696879
## t41 0.49296392 -0.229120475 -1.49848945
## t18 2.76362994 -0.204012732 -1.64088635
## t76 -0.38900308 -0.117917709 -0.77909630
## t77 0.16193894 0.287088622 -0.72342580
## t27 2.64224382 -0.358620911 -1.71171019
## t92 -0.08353593 -0.337589308 -1.45006236
## t93 0.31220124 -0.312388755 -1.60509872
## t8 -1.04234066 -0.676186538 -0.41457179
## t2 -0.24958117 -1.382336906 -0.60476399
## t95 0.30844124 0.071824226 -0.04136211
## t96 0.17344643 -0.000369423 0.04677993
## t6 -4.30672086 -1.228130839 1.24440860
## t44 -0.39925053 -0.778840190 -0.10432506
## t51 -0.06845018 -1.529691824 -0.63523463
## t52 -0.33920397 -0.671563524 0.25110920
## t72 -0.22233595 -0.208175485 0.21888791
## t73 -0.74246626 -0.280504466 -0.05425123
## t35 0.58660363 -0.432133697 -0.60337432
## t38 -0.02995503 -1.599070451 -1.12646600
## t39 -0.99818963 -1.019397388 -0.14879313
## t99 2.02180211 0.819938089 0.10457809
## t100 3.03396613 0.797765277 0.09518138
## t24 -2.28364797 0.132350015 1.25951182
## t11 -1.03843807 -0.338010923 0.63895729
## t36 3.08690868 -0.109222940 -0.84517257
## t37 1.66001718 -0.783798640 0.25056370
## t28 -0.42294326 -0.624356825 -0.18617947
## t7 1.63649995 -0.995914763 -0.82088749
## t12 0.30240267 -0.430696448 -0.37673980
## t33 -0.90310597 -0.459191936 0.14792529
## t34 -0.46084029 -1.369205857 0.15734648
## t15 -0.95017011 -0.668597155 -0.19397849
## t42 -0.40629518 -1.006046065 -0.59102696
## t43 -1.45997398 -0.657881494 -0.76035426
## t22 -0.06754584 -0.549687736 -0.49300322
## t1 -0.52189221 -0.735847227 -0.10188643
## t3 1.42350778 -1.255254006 -1.36243767
## t4 -1.66965072 0.036571658 0.12260330
## t74 3.03252085 1.178218512 -0.76133067
## t75 2.48876980 1.064090251 -0.95706527
## t31 -0.97972153 -0.435236571 0.54331428
## t59 -3.89650669 -0.567740108 0.91285849
## t60 -1.92222453 -0.516435916 0.64042685
## t9 3.53788280 1.375046920 -1.24425619
## t90 6.09565095 0.878034624 -2.01772447
## t91 6.88955565 0.935005458 -2.09172115
## t56 -0.35774725 -0.517446554 0.30596671
## t57 1.23478325 -0.116159889 0.05991413
## t14 -1.27430128 -0.906819465 0.46607917
## t61 -4.01746299 -1.312009985 1.32878564
## t62 -2.40971394 -1.612880369 1.08221968
## t69 -2.04466742 -0.242584724 0.76102366
## t70 -4.84522907 -0.396631201 1.07417172
## t88 -0.17336430 0.237257899 0.41246936
## t89 -2.05031514 0.434875538 0.30737619
## t54 -0.80195450 -0.569749931 1.07419352
## t55 -1.48926859 -0.152194258 1.08008287
## t32 -0.87454629 0.602680886 0.75156851
## t45 -3.25099532 -1.438964529 2.01395563
## t46 -2.70322247 -1.404376125 1.86329858
## t17 0.38924791 -0.217652443 0.32900077
## t58 -3.40005156 -1.498640558 1.17233451
## t82 -1.89445105 -1.348265012 1.08151593
## t83 -2.05781804 -1.381394718 1.22167672
## t71 -1.44719125 -1.170708866 1.02083131
## t94 -2.16041746 -1.200085947 1.03456095
## t97 -1.44175507 -1.213089703 1.18276260
## t98 -3.88229471 -1.402582088 1.24727944
SE
## t65 t66 t29 t30 t13 t78 t79
## 1.5378419 0.6315825 0.8879493 0.5928441 1.0271921 1.0799337 0.2015944
## t5 t26 t67 t68 t86 t87 t25
## 1.4215427 1.6140998 1.1763704 0.3116548 0.3712812 2.1127712 1.7325671
## t16 t47 t48 t10 t20 t63 t64
## 0.6036392 0.2305401 2.1632606 0.5891664 1.6319491 0.1739720 0.2086460
## t53 t21 t23 t49 t50 t80 t81
## 1.4821123 0.4555341 0.3351531 1.2322628 0.3969949 0.8365659 0.2839710
## t19 t84 t85 t40 t41 t18 t76
## 0.3487848 1.1665896 1.5049262 0.5291481 0.5503178 0.7437542 0.9244183
## t77 t27 t92 t93 t8 t2 t95
## 0.4384969 0.6420107 0.6988273 0.7246152 0.8660270 1.5637886 1.8151181
## t96 t6 t44 t51 t52 t72 t73
## 1.3655987 0.9416576 0.7521436 0.4052548 0.7403313 0.3570909 1.2056779
## t35 t38 t39 t99 t100 t24 t11
## 0.8792995 0.8277867 1.3258279 0.1751977 0.8815140 0.4549706 0.1941065
## t36 t37 t28 t7 t12 t33 t34
## 1.4607861 1.1021203 0.6038450 0.9676804 0.2473128 0.1840373 1.6307541
## t15 t42 t43 t22 t1 t3 t4
## 0.7664964 1.1059112 1.1145843 0.9188596 1.5650796 0.4641816 0.6615129
## t74 t75 t31 t59 t60 t9 t90
## 0.9735615 1.0394457 1.5125566 1.8766065 0.5621988 0.9335404 0.2087170
## t91 t56 t57 t14 t61 t62 t69
## 0.7721703 1.8066518 0.4828995 0.8829816 0.7459172 2.1854232 0.7337223
## t70 t88 t89 t54 t55 t32 t45
## 1.4301866 0.2787410 1.0083834 0.5330130 1.1466571 1.7584170 0.3040079
## t46 t17 t58 t82 t83 t71 t94
## 0.8741375 0.8283463 0.5284757 1.1277393 0.3540923 0.3703903 1.0910862
## t97 t98
## 0.9403271 1.3459290
fit<-pgls.SEy(ye~x1+x2,data=X,se=SE,tree=tree,method="ML")
fit
## Generalized least squares fit by maximum likelihood
## Model: model
## Data: data
## Log-likelihood: -166.1253
##
## Coefficients:
## (Intercept) x1 x2
## 0.6837662 1.5101552 -1.6872891
##
## Correlation Structure: corBrownian
## Formula: ~1
## Parameter estimate(s):
## numeric(0)
## Variance function:
## Structure: fixed weights
## Formula: ~v
## Degrees of freedom: 100 total; 97 residual
## Residual standard error: 1.774718
summary(fit)
## Generalized least squares fit by maximum likelihood
## Model: model
## Data: data
## AIC BIC logLik
## 340.2505 350.6712 -166.1253
##
## Correlation Structure: corBrownian
## Formula: ~1
## Parameter estimate(s):
## numeric(0)
## Variance function:
## Structure: fixed weights
## Formula: ~v
##
## Coefficients:
## Value Std.Error t-value p-value
## (Intercept) 0.6837662 0.1650265 4.143373 1e-04
## x1 1.5101552 0.1365145 11.062231 0e+00
## x2 -1.6872891 0.1153723 -14.624738 0e+00
##
## Correlation:
## (Intr) x1
## x1 0.122
## x2 0.078 0.376
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.7961712 -0.8297315 -0.2312799 0.4027741 3.0330635
##
## Residual standard error: 1.774718
## Degrees of freedom: 100 total; 97 residual
In a separate post, I'm going to look at power & type I error when SEs are ignored vs. taken into account.
Note that also taking uncertainty in the xs is account is considerably more complicated. For that we would need to employ an approach similar to that of Ives et al. (2007).
The data & tree for this exercise were simulated as follows:
## simulate tree & data
set.seed(999)
tree<-pbtree(n=100,scale=1)
X<-fastBM(tree,nsim=2)
colnames(X)<-c("x1","x2")
beta<-c(1,1,-2)
y<-cbind(rep(1,Ntip(tree)),X)%*%beta+fastBM(tree)
v<-setNames(rexp(n=Ntip(tree)),tree$tip.label)
ye<-setNames(sampleFrom(xbar=y,xvar=v,n=rep(1,length(y))),
rownames(y))
X<-as.data.frame(cbind(ye,X))
SE<-sqrt(v)
This function is now on GitHub. I recommend installing it from there & not using the above version. - Liam
ReplyDelete