## % Unreplicated correlated evolution ### Set options ##+ echo=FALSE,results=FALSE opts_chunk$set(tidy=FALSE, fig.height=5) options(show.signif.stars=FALSE) ### Bibliography ##+ echo=FALSE,results=FALSE library(knitcitations) library(methods) refs <- read.bibtex("../refs.bib") ## Most of the likelihood methods in diversitree (and other programs) ## are prone to be mislead by unreplicated associations; for example ## * If a tree has a highly diverse clade with one of the states ## overrepresented BiSSE may conclude that trait is significantly ## associated with increased rates of speciation. ## * Given a pair of binary traits, $A$ and $B$, if one clade has ## mostly species with states $A=1$, $B=1$, while the rest of the ## tree is mostly $A=0$, $B=0$, discrete ## `r citep(refs[["Pagel-1994-37"]])` or diversitree's `mkn.multitrait` ## may conclude there is significant assoociation between these ## traits (that is, the rate of evolution of one trait depends on ## the state of the other). ## ML doesn't count the number of instances of transitions, just ## strength of evidence for the alternate models. These are the sort ## of unreplicated comparisons that the comparative method is meant to ## avoid, and which are used to motivate ## `r citet(refs[["Felsenstein-1985-1"]])`. This issue was discussed by ## `r citet(refs[["Read-1995-99"]])` but it is not widely appreciated. ## Here, I show a couple of toy examples as to how the problem can ## arise under multistate Markov model and BiSSE, and how ## phylogenetically independent contrasts is not immune to this ## problem (but that it may be easily detectable). ## # Mkn multitrait / `r citet(refs[["Pagel-1994-37"]])` discrete ## First, load diversitree and simulate a pure birth tree (with rate ## .1). library(diversitree) set.seed(1) n.spp <- 30 phy <- ladderize(tree.yule(.1, max.taxa=n.spp)) ## This will be tree used for all examples ##+ fig.cap="Simulated pure birth tree" plot(phy, show.node.label=TRUE, no.margin=TRUE, cex=.8) ## nd14 will be our target node; below this point, the clade will ## behave "differently" to the rest of the clade. target <- "nd14" ## Convert this name to Ape's node index: idx <- match(target, phy$node.label) + n.spp ## Identify descendant species of this node desc <- sort(get.descendants(target, phy, tips.only=TRUE)) ## Here is our focal group, highlighted in red ##+ fig.cap="Focal group at node 'nd14'" plot(phy, tip.color=ifelse(1:n.spp %in% desc, "#ef2929", "black"), no.margin=TRUE) nodelabels(node=idx, pch=19, cex=2, col="#ef2929") ## Trait 1 is shared across this group, and with nobody else. t1 <- as.integer(1:n.spp %in% desc) names(t1) <- phy$tip.label t1 ## Determine rate coefficients for the evolution of this trait, ## assuming a Mk2 model: lik1 <- make.mk2(phy, t1) fit1 <- find.mle(lik1, rep(.1, 2)) zapsmall(coef(fit1)) ## Now, suppose we have two perfectly codistribted traits "a" and "b"; traits <- data.frame(a=t1, b=t1) ## Make a 2-trait likelihood function, assuming that the two traits ## are independent: lik2 <- make.mkn.multitrait(phy, traits, depth=0) argnames(lik2) ## And fit this model with ML: p2 <- rep(coef(fit1), 2) names(p2) <- NULL fit2 <- find.mle(lik2, p2) ## We get the same basic coefficients when treating the traits ## separately: zapsmall(coef(fit1)) zapsmall(coef(fit2)) all.equal(coef(fit2)[1:2], coef(fit1), check.attr=FALSE) ## Now, allow correlated evolution by allowing the $q_{01}$ and ## $q_{10}$ parameters to vary according to the state of the other ## trait: lik3 <- make.mkn.multitrait(phy, traits, depth=1) p3 <- rep(0, 8) names(p3) <- argnames(lik3) p3[argnames(lik2)] <- coef(fit2) ## Fit the model ##+ fit.lik3,cache=TRUE fit3 <- find.mle(lik3, rep(.1, 8)) ## Basically once trait 'a' has shifted from $0\to 1$, we instantaneously ## shift in state 'b'. zapsmall(coef(fit3)) ## This model is a statistical improvement over the uncorrelated ## model. anova(fit3, uncorrelated=fit2) ## Likelihood improvement: dll <- fit3$lnLik - fit2$lnLik dll ## Simulate 100 traits on the tree with an equal forward/backward ## transition rate of `r` r <- .02 t2 <- replicate(100, sim.character(phy, c(r, r), x=0, model="mk2"), simplify=FALSE) ## And fit these the same way: f <- function(t2) { lik0 <- make.mkn.multitrait(phy, data.frame(a=t1, b=t2), depth=0) p0 <- rep(r, 4) fit0 <- find.mle(lik0, p0) lik1 <- make.mkn.multitrait(phy, data.frame(a=t1, b=t2)) p1 <- rep(0, 8) names(p1) <- argnames(lik1) p1[argnames(lik0)] <- coef(fit0) fit1 <- find.mle(lik1, p1) fit1$lnLik - fit0$lnLik } ## This takes a minute or so, so run it in parallel: library(multicore) ##+ fit.random,cache=TRUE ans <- unlist(mclapply(t2, f)) ## Histogram of likelihood improvements, with previous case in red, ## and 5% cut-off in blue: ##+ fig.cap="" par(mar=c(4.1, 4.1, .5, .5)) hist(ans, xlim=range(ans, dll), main="", las=1, xlab="Likelihood impovement") abline(v=dll, col="red") abline(v=qchisq(1/20, 4, lower=FALSE)/2, col="blue") ## Most of the time, a random character would not be significantly ## associated with our special trait. But two perfectly co-occurring ## traits have extremely high support for a model that treats their ## evolution as non-independent, even though this is unreplicated. ## # BiSSE ## With the tree above, fit a BiSSE model ##+ bisse.ssd,cache=TRUE lik <- make.bisse(phy, t1, control=list(method="CVODES")) p <- starting.point.bisse(phy) fit.ssd <- find.mle(lik, p) ## Fit the state independent model (constrain both speciation and ## extinction rates to not vary with the trait). ##+ bisse.sid,cache=TRUE lik.sid <- constrain(lik, lambda1 ~ lambda0, mu1 ~ mu0) fit.sid <- find.mle(lik.sid, p[argnames(lik.sid)]) ## There is no significant association between the trait and ## diversification here: anova(fit.ssd, sid=fit.sid) ## Next, tweak the tree so that the focal clade is apparently ## different. This is a bit of a pain to do. First, identify all ## edges descended from that node i <- phy$edge[,2] %in% sort(get.descendants(target, phy)) ##+ fig.cap="Edges descended from focal node" plot(phy, no.margin=TRUE, edge.color=ifelse(i, "#ef2929", "black"), tip.color=ifelse(1:n.spp %in% desc, "#ef2929", "black")) ## Compress these red edges towards the present, and lengthen the stem ## lineage accordingly. Scale branches by a factor 'p' (must be less ## than 1) p <- .3 j <- match(idx, phy$edge[,2]) phy2 <- phy phy2$edge.length[i] <- phy2$edge.length[i] * p phy2$edge.length[j] <- phy$edge.length[j] + branching.times(phy)[target] * (1 - p) ## The modified tree is still ultrametric is.ultrametric(phy2) ## Here is the compressed tree: ##+ fig.cap="" plot(phy2, no.margin=TRUE, edge.color=ifelse(i, "#ef2929", "black"), tip.color=ifelse(1:n.spp %in% desc, "#ef2929", "black")) ## Fit BiSSE models to the tree, first with state-dependent ## diversification ##+ bisse2.sdd,cache=TRUE lik2 <- make.bisse(phy2, t1, control=list(method="CVODES")) p <- starting.point.bisse(phy2) fit2.ssd <- find.mle(lik2, p) ## And without ##+ bisse2.sid,cache=TRUE lik2.sid <- constrain(lik2, lambda1 ~ lambda0, mu1 ~ mu0) fit2.sid <- find.mle(lik2.sid, p[argnames(lik2.sid)]) ## There is now significant association between the trait and ## diversification, even though this is not replicated! anova(fit2.ssd, sid=fit2.sid) ## Generalise this a bit to look how the likelihood improvement varies ## with the scaling factor. g <- function(p) { phy2 <- phy phy2$edge.length[i] <- phy2$edge.length[i] * p phy2$edge.length[j] <- phy$edge.length[j] + branching.times(phy)[target] * (1 - p) lik2 <- make.bisse(phy2, t1, control=list(method="CVODES")) p <- starting.point.bisse(phy2) fit2.ssd <- find.mle(lik2, p) lik2.sid <- constrain(lik2, lambda1 ~ lambda0, mu1 ~ mu0) fit2.sid <- find.mle(lik2.sid, p[argnames(lik2.sid)]) fit2.ssd$lnLik - fit2.sid$lnLik } ## Run this with scaling parameters varying from .1 (extremely ## compressed towards the future) and 1 (unchanged from the original ## tree). ##+ bisse.scale,cache=TRUE p <- seq(1, .1, length=16) ans.b <- unlist(mclapply(p, g)) ## The improvement in the likelihood increases rapidly as the tree ## deviates more and more from the BD model: ##+ fig.cap="Likelihood improvement vs scaling factor, with critical value at the 5% level indicated with a blue line." par(mar=c(4.1, 4.1, .5, .5)) plot(ans.b ~ p, las=1, xlab="Scaling factor (1 is unchanged)", ylab="Likelihood improvement", type="o", pch=19) abline(h=qchisq(1/20, 2, lower=FALSE)/2, col="blue") ## However, we can partition the tree and allow rates to vary here. ## First check that this model will fit better without accounting for ## the state-dependent diversification: lik.bd <- make.bd(phy2) fit.bd <- find.mle(lik.bd, starting.point.bd(phy2)) ## High speciation and extinction rates, compared with the values ## used to simulate the tree (.1): coef(fit.bd) lik.bd2 <- make.bd.split(phy2, target) fit.bd2 <- find.mle(lik.bd2, c(.1, 0, .1, 0), method="subplex") ## The base partition (black branches in the figures) have rates ## similar to the simulated values. The focal clade (red in the ## figures) has strongly elevated rates. coef(fit.bd2) ## This improvement is significant; anova(fit.bd2, unsplit=fit.bd) ## Split BiSSE likelihood function lik.bs <- make.bisse.split(phy2, t1, target) ## Constrain trait evolution over the whole tree: lik.bs.sdd <- constrain(lik.bs, q01.2 ~ q01.1, q10.2 ~ q10.1, mu1.1 ~ mu0.1, mu1.2 ~ mu0.2) ## (I also disallowed state-dependent extinction as that causes ## problems trying to explain the pattern of trait evolution) ## And make a version with no state-dependent diversification: lik.bs.sid <- constrain(lik.bs.sdd, lambda1.1 ~ lambda0.1, lambda1.2 ~ lambda0.2) ## The state-independent version has 6 parameters; two speciation ## rates (one per partition), two extinction rates, and two character ## transition rates. argnames(lik.bs.sid) ## Starting point, based on the birth-death fit: p.sid <- rep(0, 6) names(p.sid) <- argnames(lik.bs.sid) p.sid[sub("\\.", "0.", names(coef(fit.bd2)))] <- coef(fit.bd2) p.sid[c("q01.1", "q10.1")] <- .1 ##+ bisse.split.sid, cache=TRUE fit.bs.sid <- find.mle(lik.bs.sid, p.sid) ## The coefficients here look like the birth-death case, with low ## rates of $0\to 1$ transition, and no reverse ($1\to 0$) ## transitions. zapsmall(coef(fit.bs.sid)) ## Expand this point to allow for state-dependent diversification: p.sdd <- coef(fit.bs.sid, TRUE)[argnames(lik.bs.sdd)] ##+ bisse.split.sdd, cache=TRUE fit.bs.sdd <- find.mle(lik.bs.sdd, p.sdd) ## This is now not significant (though the improvement in likelihood ## is greater than I would have thought). anova(fit.bs.sdd, sid=fit.bs.sid) ## I think what is going on here is that currently the stem leading to ## the focal group is included in that group. However, the (fake) ## diversification shift happens rather closer to the node. So that ## aspect of the tree still needs explaining. ## # Continuous traits and phylogenetically independent contrasts ## This is the sort of thing that ## `r citet(refs[["Felsenstein-1985-1"]])` warns against, ## but still causes a problem with continous traits, if Brownian ## motion is not a suitable model. set.seed(1) x <- sim.character(phy, .1) y <- sim.character(phy, .1) ##+ fig.cap="Random (Brownian) traits, with focal clade in red" plot(x, y, col=ifelse(1:n.spp %in% desc, "#ef2929", "black"), pch=19, las=1) ## Compute phylogenetically independent contrast with ape's `pic` ## function.: x.pic <- pic(x, phy) y.pic <- pic(y, phy) ## and run a correlation test (regression through the origin gives ## essentially the same result). cor.test(x.pic, y.pic) ## Now, replace the values of the focal clade with new trait values, ## offset by some amount, but evolved under BM with the same rate. ## First, clip out the subtree corresponding to the focal clade: desc <- sort(get.descendants(target, phy, tips.only=TRUE)) phy.sub <- drop.tip(phy, phy$tip.label[-desc]) ## These were the state values at the target node from the original ## simulation: x0 <- attr(x, "node.state")[target] y0 <- attr(y, "node.state")[target] ## Offset these values by 3 and continue the simulation, replacing the ## values in the `x` and `y` vectors x[desc] <- sim.character(phy.sub, .1, x0=x0+3) y[desc] <- sim.character(phy.sub, .1, x0=y0+3) ##+ fig.cap="Random (Brownian) traits, after perturbing focal clade" plot(x, y, col=ifelse(1:n.spp %in% desc, "#ef2929", "black"), pch=19, las=1) ## Recompute the independent contrasts: x.pic <- pic(x, phy) y.pic <- pic(y, phy) ## Do a correlation test (again, regression through the origin gives ## essentially the same answer). cor.test(x.pic, y.pic) ## This is actually quite significant; not much less so than the ## correlation of the raw data. cor.test(x, y) ## Plotting the contrasts, there is a fairly clear outlier. ##+ fig.cap="Independent contrasts in the distorted tree" par(mar=c(4.1, 4.1, .5, .5)) plot(x.pic, y.pic, las=1, pch=19) ## This is the node that is the parent of the focal clade and another ## smaller group. ## While similarly mislead, at least independent contrasts provide a ## simple way of *identifying* the misleading nodes. ## # Bibliography ##+ echo=FALSE,results="asis" bibliography() ## ## Document details ## Document compiled on `r as.character(Sys.time(), usetz=TRUE)` ## With R version `r R.version$string` ## and diversitree version `r packageVersion("diversitree")`. ## Original source: [unreplicated.R](unreplicated.R) ## [examples](..), [diversitree home](../..)