# R analyses--Chronogram estimation: A Penalized Likelihood Approach #1) We are going to use ‘ape’ package to ultrametrize our tree. Load the following package library(ape) #2) Make a working directory and select it as your working. Suggestion: you can also use setwd(). # R top menus: Misc>Change Working Directory (select the directory that has our file) setwd("/Users/jcsantos/Desktop/R_class_winter_2015_home/6_estimating_chronograms/data/ape_ultrametric") #3) read our RAG1-phylogeny rag1_rooted_garli <-read.tree(file = "Rag1_best_garli_rooted.newick") #4) let's take a look to our phylogeny plot(rag1_rooted_garli) add.scale.bar(x=0, y=0) #5) get names of variables in phylogeny names(rag1_rooted_garli) #6) get tip labels rag1_rooted_garli$tip.label #7) Suggestion: for comparative analyses is better to drop outgroups so we limit the effect # of long branches on the penalized likelihood function that renders our tree ultrametric #I will drop one tip, but for the starting BEAST tree we will use the full tree rag1_rooted_garli_no_out <- drop.tip(rag1_rooted_garli, tip ="Polychrus_marmoratus_RAG1_FJ356748") #8) Let's take a look to our tree with and without that tip par(mfrow=c(1,2)) plot(rag1_rooted_garli) add.scale.bar(x=0, y=0) plot(rag1_rooted_garli_no_out) add.scale.bar(x=0, y=0) #9) We can force the tree to be ultrametric using chronos() in the APE package. #If we don't have a calibration point, we can set the age of the whole tree to #"1". First, we need to define the internal function 'makeChronosCalib(rag1_rooted_garli)' of the function 'chronos()' # for relative age chronogram let's fix root at 1 cal_relative <- makeChronosCalib(rag1_rooted_garli, node = "root", age.min = 1, age.max = 1, interactive = FALSE, soft.bounds = FALSE) cal_relative #10) For absolute age chronogram, we need to provide a range of possible ages of the reference nodes # Notice the change in the parameter: interactive = FALSE to interactive = TRUE cal_absolute <- makeChronosCalib(rag1_rooted_garli, node = "root", age.min = 66, age.max = 66, interactive = TRUE, soft.bounds = TRUE)# in macs use rigth-click (control + click) on the plot to exit # click on the nodes with references ages (in MYA) # Red ring (node number: 49): younger age is 15 and older age 35 # Purple ring (node number: 59): younger age is 40 and older age 62 # Green dot -- root (node number: 34): younger age is 55 and older age 76 #11) Both cal_relative and cal_absolute are data frames, so you can modify this data without the interactive mode cal_relative cal_absolute #12) Let's estimate our chronogram using Penalized Likelihood and Maximum Likelihood # we need to select a method, I prefer a relaxed clock model to account for heterogeneity among branches # we also need to select a smoothing parameter lambda (see ref: Sanderson, 2002) # If lambda = 0, then the parametric component dominates and rates vary as much as possible among branches, # whereas for increasing values of lambda ( towards 1), the variation in the rates are smoother # and tend to a clock-like model (same rate for all branches) # I prefer BEAST for my chronogram estimation give that is more parametrized and used the actual data # to estimate the chronogram so I will pick an arbitrary lambda based on test of different lambdas #Relative age chronogram with three smoothing parameters rag1_chrono_rel_garli_lambda_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0, calibration = cal_relative) rag1_chrono_rel_garli_lambda_0_1 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0.1, calibration = cal_relative) rag1_chrono_rel_garli_lambda_1_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 1.0, calibration = cal_relative) rag1_chrono_abs_garli_lambda_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0, calibration = cal_absolute) rag1_chrono_abs_garli_lambda_0_1 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 0.1, calibration = cal_absolute) rag1_chrono_abs_garli_lambda_1_0 <- chronos(rag1_rooted_garli, model = "relaxed", lambda = 1.0, calibration = cal_absolute) #check the 'Penalised log-lik' values, the you want the larger likelihood; # i.e., largest likelihood, or, more commonly, smallest negative log-likelihood. # negative numbers are OK. A log-likelihood of -2 is better than -4. # the best in this case corresponds to the lambda = 0 for the cal_relative (i.e., Penalised log-lik = -3.885571) #11) After the calculations are done, we can write the trees in newick format for future use. write.tree (rag1_chrono_rel_garli_lambda_0, file ="rag1_chrono_rel_garli_lambda_0.newick") write.tree (rag1_chrono_rel_garli_lambda_0_1, file ="rag1_chrono_rel_garli_lambda_0_1.newick") write.tree (rag1_chrono_rel_garli_lambda_1_0, file ="rag1_chrono_rel_garli_lambda_1_0.newick") write.tree (rag1_chrono_abs_garli_lambda_0, file ="rag1_chrono_abs_garli_lambda_0.newick") write.tree (rag1_chrono_abs_garli_lambda_0_1, file ="rag1_chrono_abs_garli_lambda_0_1.newick") write.tree (rag1_chrono_abs_garli_lambda_1_0, file ="rag1_chrono_abs_garli_lambda_1_0.newick") #12) Let's check that our trees are ultrametric is.ultrametric(rag1_rooted_garli) is.ultrametric(rag1_chrono_rel_garli_lambda_0) is.ultrametric(rag1_chrono_rel_garli_lambda_0_1) is.ultrametric(rag1_chrono_rel_garli_lambda_1_0) is.ultrametric(rag1_chrono_abs_garli_lambda_0) is.ultrametric(rag1_chrono_abs_garli_lambda_0_1) is.ultrametric(rag1_chrono_abs_garli_lambda_1_0) #13) Finally, plot both trees: non-ultrametric and ultrametric. par(mfrow=c(1,2)) plot(rag1_rooted_garli) add.scale.bar(x=0, y=0) plot(rag1_chrono_abs_garli_lambda_0) add.scale.bar(x=0, y=0) #14) Check if we can read our trees rag1_chrono_abs_garli_lambda_1_0_check <- read.tree ("rag1_chrono_abs_garli_lambda_1_0.newick") #######################