Getting the data

The two datasets are here - download:

Reconstructing evolution of a single discrete character

Two methods - ancestral state reconstruction and stochastic character mapping.

require(phytools)
## Loading required package: phytools
## Loading required package: ape
## Loading required package: maps
require(ape)
# read in the data
elop_data <- read.csv("elopomorph.csv", row.names = 1)
feedingMode <- setNames(elop_data[,1], rownames(elop_data))
table(feedingMode)
## feedingMode
##    bite suction 
##      31      30
# read in the tree
elop_tree <- read.tree("elopomorph.tre")

We can visualize our data by plotting in phytools

plotTree(elop_tree, type="fan", fsize=0.7, ftype="i", lwd=1)
# this line assigns colors to the character states
# bite = red, suction = blue
cols<-setNames(c("red", "blue"), levels(feedingMode))
tiplabels(pie=to.matrix(feedingMode[elop_tree$tip.label], levels(feedingMode)), piecol=cols, cex=0.3)
# to understand this better look at the output of a piece
# to.matrix(feedingMode[elop_tree$tip.label], levels(feedingMode))
add.simmap.legend(colors=cols, prompt=FALSE, x = 0.9*par()$usr[1], y=0.8*par()$usr[3], fsize=0.8)

estimate ancestral states under an Equal-rates (“ER”) Mk model

fitER <- ace(feedingMode, elop_tree, model="ER", type="discrete")
# summary of model fit
fitER
## 
##     Ancestral Character Estimation
## 
## Call: ace(x = feedingMode, phy = elop_tree, type = "discrete", model = "ER")
## 
##     Log-likelihood: -36.33992 
## 
## Rate index matrix:
##         bite suction
## bite       .       1
## suction    1       .
## 
## Parameter estimates:
##  rate index estimate std-err
##           1   0.0158  0.0045
## 
## Scaled likelihoods at the root (type '...$lik.anc' to get them for all nodes):
##      bite   suction 
## 0.5480037 0.4519963
# all ancestral states
fitER$lik.anc
##               bite      suction
##  [1,] 0.5480037210 0.4519962790
##  [2,] 0.6242342545 0.3757657455
##  [3,] 0.6560898907 0.3439101093
##  [4,] 0.6904137188 0.3095862812
##  [5,] 0.7066485827 0.2933514173
##  [6,] 0.7158097958 0.2841902042
##  [7,] 0.7151789283 0.2848210717
##  [8,] 0.0005280939 0.9994719061
##  [9,] 0.7709017973 0.2290982027
## [10,] 0.7691863614 0.2308136386
## [11,] 0.6440479559 0.3559520441
## [12,] 0.0442832132 0.9557167868
## [13,] 0.0037653340 0.9962346660
## [14,] 0.0172004443 0.9827995557
## [15,] 0.8217944284 0.1782055716
## [16,] 0.8569460714 0.1430539286
## [17,] 0.7393039669 0.2606960331
## [18,] 0.7174480970 0.2825519030
## [19,] 0.6974032138 0.3025967862
## [20,] 0.7051615816 0.2948384184
## [21,] 0.7765900337 0.2234099663
## [22,] 0.9679563345 0.0320436655
## [23,] 0.7763459919 0.2236540081
## [24,] 0.7355606936 0.2644393064
## [25,] 0.5593038528 0.4406961472
## [26,] 0.5493603890 0.4506396110
## [27,] 0.6441467852 0.3558532148
## [28,] 0.5530135261 0.4469864739
## [29,] 0.2983895523 0.7016104477
## [30,] 0.2715130074 0.7284869926
## [31,] 0.0184977763 0.9815022237
## [32,] 0.0425564134 0.9574435866
## [33,] 0.0093509902 0.9906490098
## [34,] 0.5714981738 0.4285018262
## [35,] 0.4984340090 0.5015659910
## [36,] 0.3238194112 0.6761805888
## [37,] 0.0317279739 0.9682720261
## [38,] 0.0115367770 0.9884632230
## [39,] 0.3773398135 0.6226601865
## [40,] 0.5502264549 0.4497735451
## [41,] 0.7532755531 0.2467244469
## [42,] 0.7565228047 0.2434771953
## [43,] 0.8568317500 0.1431682500
## [44,] 0.9665651556 0.0334348444
## [45,] 0.9844171015 0.0155828985
## [46,] 0.9800267702 0.0199732298
## [47,] 0.9939408074 0.0060591926
## [48,] 0.9993040631 0.0006959369
## [49,] 0.9994145342 0.0005854658
## [50,] 0.9990235521 0.0009764479
## [51,] 0.9803292025 0.0196707975
## [52,] 0.8689467273 0.1310532727
## [53,] 0.8126122235 0.1873877765
## [54,] 0.9847413814 0.0152586186
## [55,] 0.2057221815 0.7942778185
## [56,] 0.0438289116 0.9561710884
## [57,] 0.0454895580 0.9545104420
## [58,] 0.0032374280 0.9967625720
## [59,] 0.3028016114 0.6971983886
## [60,] 0.1537905871 0.8462094129
sort(branching.times(elop_tree))
##        69        92       119        74        91       115       111 
##  1.327567  3.516556  5.605033  6.027710  6.204794  6.295512  6.977033 
##        94        99       100       110        75       109        83 
##  9.527512  9.818728 10.403526 11.662988 11.835230 12.189883 12.774577 
##        98       114       117       112        73        87        93 
## 12.892233 13.872944 14.591340 14.790716 16.063755 16.743802 16.930879 
##       118       108       101        97        86        78       107 
## 17.152939 17.514333 21.314327 21.841974 22.541374 22.918975 23.415946 
##       113        96        77       106        85       121        72 
## 25.892148 27.458719 28.585036 29.020746 29.915529 30.498768 31.209752 
##       116        76       105        90        84        95        71 
## 32.100502 32.275225 33.497395 34.138742 34.959968 35.702324 38.936161 
##        82       104        70       103        89        81       102 
## 41.437711 43.264285 44.654361 47.927845 48.200128 49.172343 52.709468 
##        88       120        80        68        67        79        66 
## 54.619270 56.409996 58.204973 58.826976 59.088220 60.777184 66.730306 
##        65        64        63        62 
## 68.728077 78.780713 85.626186 99.000000

Now let’s plot these ancestral state marginal probabilities on the tree

plotTree(elop_tree, type="fan", fsize=0.7, ftype="i", lwd=1)
nodelabels(pie=fitER$lik.anc, piecol=cols, cex=0.4)
tiplabels(pie=to.matrix(feedingMode[elop_tree$tip.label], levels(feedingMode)), piecol =cols, cex=0.3)
add.simmap.legend(colors=cols, prompt=FALSE, x = 0.9*par()$usr[1], y=0.8*par()$usr[3], fsize=0.8)

Let’s try a model where forwards and backwards rates are different

We can use the “All Rates Different” (ARD) model to do this

fitARD <- ace(feedingMode, elop_tree, model="ARD", type="discrete")
fitARD
## 
##     Ancestral Character Estimation
## 
## Call: ace(x = feedingMode, phy = elop_tree, type = "discrete", model = "ARD")
## 
##     Log-likelihood: -36.3105 
## 
## Rate index matrix:
##         bite suction
## bite       .       2
## suction    1       .
## 
## Parameter estimates:
##  rate index estimate std-err
##           1   0.0175  0.0064
##           2   0.0156  0.0045
## 
## Scaled likelihoods at the root (type '...$lik.anc' to get them for all nodes):
##      bite   suction 
## 0.5128531 0.4871469
fitARD$lik.anc
##               bite      suction
##  [1,] 0.5128531470 0.4871468530
##  [2,] 0.5733355632 0.4266644368
##  [3,] 0.5944505875 0.4055494125
##  [4,] 0.6114008937 0.3885991063
##  [5,] 0.6228121844 0.3771878156
##  [6,] 0.6393068928 0.3606931072
##  [7,] 0.6388472597 0.3611527403
##  [8,] 0.0005291536 0.9994708464
##  [9,] 0.7063971096 0.2936028904
## [10,] 0.7051501899 0.2948498101
## [11,] 0.5868748673 0.4131251327
## [12,] 0.0398637553 0.9601362447
## [13,] 0.0034675193 0.9965324807
## [14,] 0.0150635115 0.9849364885
## [15,] 0.7669399749 0.2330600251
## [16,] 0.8098761695 0.1901238305
## [17,] 0.6911268707 0.3088731293
## [18,] 0.6276060901 0.3723939099
## [19,] 0.6065205446 0.3934794554
## [20,] 0.6265919527 0.3734080473
## [21,] 0.7085102287 0.2914897713
## [22,] 0.9588188326 0.0411811674
## [23,] 0.7124120033 0.2875879967
## [24,] 0.6732869064 0.3267130936
## [25,] 0.5044910053 0.4955089947
## [26,] 0.4979517327 0.5020482673
## [27,] 0.5569160674 0.4430839326
## [28,] 0.4731280335 0.5268719665
## [29,] 0.2569412886 0.7430587114
## [30,] 0.2488441105 0.7511558895
## [31,] 0.0155916308 0.9844083692
## [32,] 0.0373381060 0.9626618940
## [33,] 0.0082942371 0.9917057629
## [34,] 0.5029130319 0.4970869681
## [35,] 0.4344194663 0.5655805337
## [36,] 0.2722698888 0.7277301112
## [37,] 0.0255397209 0.9744602791
## [38,] 0.0090402495 0.9909597505
## [39,] 0.3355699063 0.6644300937
## [40,] 0.5149233061 0.4850766939
## [41,] 0.6755804498 0.3244195502
## [42,] 0.6835200870 0.3164799130
## [43,] 0.8015608211 0.1984391789
## [44,] 0.9479614699 0.0520385301
## [45,] 0.9740573341 0.0259426659
## [46,] 0.9716065836 0.0283934164
## [47,] 0.9915293041 0.0084706959
## [48,] 0.9989614588 0.0010385412
## [49,] 0.9991175147 0.0008824853
## [50,] 0.9987969620 0.0012030380
## [51,] 0.9745835766 0.0254164234
## [52,] 0.8326704196 0.1673295804
## [53,] 0.7795210027 0.2204789973
## [54,] 0.9793006382 0.0206993618
## [55,] 0.1886626789 0.8113373211
## [56,] 0.0411733964 0.9588266036
## [57,] 0.0420256656 0.9579743344
## [58,] 0.0030730170 0.9969269830
## [59,] 0.3123802200 0.6876197800
## [60,] 0.1618081107 0.8381918893
# model comparison using likelihood ratio test
delta <- 2*(fitARD$loglik - fitER$loglik)
1-pchisq(delta, 1)
## [1] 0.8083329

We can make another plot

plotTree(elop_tree, type="fan", fsize=0.7, ftype="i", lwd=1)
nodelabels(pie=fitARD$lik.anc, piecol=cols, cex=0.4)
tiplabels(pie=to.matrix(feedingMode[elop_tree$tip.label], levels(feedingMode)), piecol =cols, cex=0.3)
add.simmap.legend(colors=cols, prompt=FALSE, x = 0.9*par()$usr[1], y=0.8*par()$usr[3], fsize=0.8)

Sampling character histories using stochastic character mapping

We can sample particular character histories - that is, sequences of character change on the tree - that are compatible with the data. If we sample them in proportion to their likelihood given the model, then we get a set of plausible reconstructions of character change on the tree. We can summarize this set in a bunch of interesting ways.

# this function generates a tree object with a mapping on that tree
mapped_tree <- make.simmap(elop_tree, feedingMode, model="ER")
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##                bite     suction
## bite    -0.01582783  0.01582783
## suction  0.01582783 -0.01582783
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##    bite suction 
##     0.5     0.5
## Done.
plot(mapped_tree, cols, type="fan", fsize=0.7, ftype="i")

# One map on its own is not great because you don't see any uncertainty
# make many maps

mapped_set <- make.simmap(elop_tree, feedingMode, model="ER", nsim=100)
## make.simmap is sampling character histories conditioned on the transition matrix
## 
## Q =
##                bite     suction
## bite    -0.01582783  0.01582783
## suction  0.01582783 -0.01582783
## (estimated using likelihood);
## and (mean) root node prior probabilities
## pi =
##    bite suction 
##     0.5     0.5
## Done.
par(mfrow=c(10,10))
sapply(mapped_set, plotSimmap, colors=cols, lwd=1, ftype="off")

##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [36] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [71] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
simmap_summary<-summary(mapped_set)
plot(simmap_summary, fsize=0.6, ftype="i", colors=cols, ylim=c(-2, Ntip(elop_tree)))

superFancy <- densityMap(mapped_set, states=levels(feedingMode)[2:1],plot=FALSE)
## sorry - this might take a while; please be patient
plot(superFancy, fsize=c(0.6, 1))

dotTree(elop_tree, feedingMode, colors=cols, fsize=0.7, ftype="i", legend=FALSE)
extraCrap<-sapply(mapped_set, markChanges, sapply(cols, make.transparent, 0.1))