# R analyses--Chronogram estimation: A Penalized Likelihood Approach
#1) We are going to use ‘ape’ package to ultrametrize our tree. Load the following package
library(ape)
#2) Make a working directory and select it as your working. Suggestion: you can also use setwd().
# R top menus: Misc>Change Working Directory (select the directory that has our file)
setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/6_estimating_chronograms/data/ape_ultrametric")
#3) read our RAG1-phylogeny
rag1_rooted_garli <-read.tree(file = "Rag1_best_garli_rooted.newick")
#4) let's take a look to our phylogeny
plot(rag1_rooted_garli)
add.scale.bar(x=0, y=0)
#5) get names of variables in phylogeny
names(rag1_rooted_garli)
#6) get tip labels
rag1_rooted_garli$tip.label
#7) Suggestion: for comparative analyses is better to drop outgroups so we limit the effect
# of long branches on the penalized likelihood function that renders our tree ultrametric
#I will drop one tip, but for the starting BEAST tree we will use the full tree
rag1_rooted_garli_no_out <- drop.tip(rag1_rooted_garli, tip ="Polychrus_marmoratus_RAG1_FJ356748")
#8) Let's take a look to our tree with and without that tip
par(mfrow=c(1,2))
plot(rag1_rooted_garli)
add.scale.bar(x=0, y=0)
plot(rag1_rooted_garli_no_out)
add.scale.bar(x=0, y=0)
#9) We can force the tree to be ultrametric using chronos() in the APE package.
#If we don't have a calibration point, we can set the age of the whole tree to
#"1". First, we need to define the internal function 'makeChronosCalib(rag1_rooted_garli)' of the function 'chronos()'
# for relative age chronogram let's fix root at 1
cal_relative <- makeChronosCalib(rag1_rooted_garli, node = "root", age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE)
cal_relative
#10) For absolute age chronogram, we need to provide a range of possible ages of the reference nodes
# Notice the change in the parameter: interactive = FALSE to interactive = TRUE
cal_absolute <- makeChronosCalib(rag1_rooted_garli, node = "root", age.min = 66, age.max = 66, interactive = TRUE, soft.bounds = TRUE)# in macs use rigth-click (control + click) on the plot to exit
# click on the nodes with references ages (in MYA)
# Red ring (node number: 49): younger age is 15 and older age 35
# Purple ring (node number: 59): younger age is 40 and older age 62
# Green dot -- root (node number: 34): younger age is 55 and older age 76
#11) Both cal_relative and cal_absolute are data frames, so you can modify this data without the interactive mode
cal_relative
cal_absolute
#12) Let's estimate our chronogram using Penalized Likelihood and Maximum Likelihood
# we need to select a method, I prefer a relaxed clock model to account for heterogeneity among branches
# we also need to select a smoothing parameter lambda (see ref: Sanderson, 2002)
# If lambda = 0, then the parametric component dominates and rates vary as much as possible among branches,
# whereas for increasing values of lambda ( towards 1), the variation in the rates are smoother
# and tend to a clock-like model (same rate for all branches)
# I prefer BEAST for my chronogram estimation give that is more parametrized and used the actual data
# to estimate the chronogram so I will pick an arbitrary lambda based on test of different lambdas
#Relative age chronogram with three smoothing parameters
rag1_chrono_rel_garli_lambda_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0, calibration = cal_relative)
rag1_chrono_rel_garli_lambda_0_1 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0.1, calibration = cal_relative)
rag1_chrono_rel_garli_lambda_1_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 1.0, calibration = cal_relative)
rag1_chrono_abs_garli_lambda_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0, calibration = cal_absolute)
rag1_chrono_abs_garli_lambda_0_1 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0.1, calibration = cal_absolute)
rag1_chrono_abs_garli_lambda_1_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 1.0, calibration = cal_absolute)
#check the 'Penalised log-lik' values, the you want the larger likelihood;
# i.e., largest likelihood, or, more commonly, smallest negative log-likelihood.
# negative numbers are OK. A log-likelihood of -2 is better than -4.
# the best in this case corresponds to the lambda = 0 for the cal_relative (i.e., Penalised log-lik = -3.885571)
#11) After the calculations are done, we can write the trees in newick format for future use.
write.tree (rag1_chrono_rel_garli_lambda_0, file ="rag1_chrono_rel_garli_lambda_0.newick")
write.tree (rag1_chrono_rel_garli_lambda_0_1, file ="rag1_chrono_rel_garli_lambda_0_1.newick")
write.tree (rag1_chrono_rel_garli_lambda_1_0, file ="rag1_chrono_rel_garli_lambda_1_0.newick")
write.tree (rag1_chrono_abs_garli_lambda_0, file ="rag1_chrono_abs_garli_lambda_0.newick")
write.tree (rag1_chrono_abs_garli_lambda_0_1, file ="rag1_chrono_abs_garli_lambda_0_1.newick")
write.tree (rag1_chrono_abs_garli_lambda_1_0, file ="rag1_chrono_abs_garli_lambda_1_0.newick")
#12) Let's check that our trees are ultrametric
is.ultrametric(rag1_rooted_garli)
is.ultrametric(rag1_chrono_rel_garli_lambda_0)
is.ultrametric(rag1_chrono_rel_garli_lambda_0_1)
is.ultrametric(rag1_chrono_rel_garli_lambda_1_0)
is.ultrametric(rag1_chrono_abs_garli_lambda_0)
is.ultrametric(rag1_chrono_abs_garli_lambda_0_1)
is.ultrametric(rag1_chrono_abs_garli_lambda_1_0)
#13) Finally, plot both trees: non-ultrametric and ultrametric.
par(mfrow=c(1,2))
plot(rag1_rooted_garli)
add.scale.bar(x=0, y=0)
plot(rag1_chrono_abs_garli_lambda_0)
add.scale.bar(x=0, y=0)
#14) Check if we can read our trees
rag1_chrono_abs_garli_lambda_1_0_check <- read.tree ("rag1_chrono_abs_garli_lambda_1_0.newick")
#######################