library(GenSA) # GenSA is better than optimx (although somewhat slower) library(FD) # for FD::maxent() (make sure this is up-to-date) library(snow) # (if you want to use multicore functionality; some systems/R versions prefer library(parallel), try either) library(parallel) library(rexpokit) library(cladoRcpp) library(BioGeoBEARS) library(phytools) library(paleotree) library(strap) library(geiger) ################################################################################################### ################################################################################################### run_BioGeoBEARS <- function(trees, model_names, model_names_J, ages, geogfn, dispersal_multiplier, ntrees, strict_or_majority, max_range_size, runslow, num_cores) { tipranges = getranges_from_LagrangePHYLIP(lgdata_fn=geogfn) areas = getareas_from_tipranges_object(tipranges) numareas = length(areas) total_num_areas <- numstates_from_numareas(numareas=numareas, maxareas=max_range_size, include_null_range=TRUE) #change numareas include_null_ranges_TF <- TRUE # set to FALSE for e.g. DEC* model, DEC*+J, etc. ####################################################### # Basic Model ####################################################### BioGeoBEARS_run_object = define_BioGeoBEARS_run() # Intitialize a default model (DEC model) BioGeoBEARS_run_object$geogfn = geogfn # Give BioGeoBEARS the location of the geography text file BioGeoBEARS_run_object$max_range_size = max_range_size # Input the maximum range size BioGeoBEARS_run_object$min_branchlength = 0.000001 # Min to treat tip as a direct ancestor (no speciation event) BioGeoBEARS_run_object$include_null_range = include_null_ranges_TF # set to FALSE for e.g. DEC* model, DEC*+J, etc. #BioGeoBEARS_run_object$timesfn = "timeperiods.txt" BioGeoBEARS_run_object$dispersal_multipliers_fn = paste0(dispersal_multiplier, ".txt") # Speed options and multicore processing if desired BioGeoBEARS_run_object$on_NaN_error = -1e50 # returns very low lnL if parameters produce NaN error (underflow check) BioGeoBEARS_run_object$speedup = TRUE # shorcuts to speed ML search; use FALSE if worried (e.g. >3 params) BioGeoBEARS_run_object$num_cores_to_use = num_cores BioGeoBEARS_run_object$use_optimx = "GenSA" # if FALSE, use optim() instead of optimx() BioGeoBEARS_run_object$force_sparse = FALSE # force_sparse=TRUE causes pathology & isn't much faster at this scale # This function loads the dispersal multiplier matrix etc. from the text files into the model object. Required for these to work! # (It also runs some checks on these inputs for certain errors.) BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) # Divide the tree up by timeperiods/strata (uncomment this for stratified analysis) #BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) # The stratified tree is described in this table: #BioGeoBEARS_run_object$master_table # Good default settings to get ancestral states BioGeoBEARS_run_object$return_condlikes_table = TRUE BioGeoBEARS_run_object$calc_TTL_loglike_from_condlikes_table = TRUE BioGeoBEARS_run_object$calc_ancprobs = TRUE # get ancestral states from optim run ####################################################### # Set up models (non +J models) ####################################################### # Set up DEC model ====================================================== BioGeoBEARS_run_object_DEC <- BioGeoBEARS_run_object # Set up DIVALIKE model ================================================= BioGeoBEARS_run_object_DIVALIKE <- BioGeoBEARS_run_object # Remove subset-sympatry BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["ysv","type"] = "2-j" BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/2" BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["y","type"] = "ysv*1/2" BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["v","type"] = "ysv*1/2" # Allow classic, widespread vicariance; all events equiprobable BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["mx01v","type"] = "fixed" BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["mx01v","init"] = 0.5 BioGeoBEARS_run_object_DIVALIKE$BioGeoBEARS_model_object@params_table["mx01v","est"] = 0.5 # Set up BAYAREALIKE model ============================================== BioGeoBEARS_run_object_BAYAREALIKE <- BioGeoBEARS_run_object # No subset sympatry BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["s","type"] = "fixed" BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["s","init"] = 0.0 BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["s","est"] = 0.0 # No vicariance BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["v","type"] = "fixed" BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["v","init"] = 0.0 BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["v","est"] = 0.0 # Adjust linkage between parameters BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["ysv","type"] = "1-j" BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["ys","type"] = "ysv*1/1" BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["y","type"] = "1-j" # Only sympatric/range-copying (y) events allowed, and with # exact copying (both descendants always the same size as the ancestor) BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["mx01y","type"] = "fixed" BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["mx01y","init"] = 0.9999 BioGeoBEARS_run_object_BAYAREALIKE$BioGeoBEARS_model_object@params_table["mx01y","est"] = 0.9999 ####################################################### # Setup trees ####################################################### # obj <- name.check(trees[[1]], ages) # ages <- ages[!rownames(ages)%in%c(obj$data_not_tree),] # obj <- name.check(trees[[1]], ages) # obj con_tree <- consensus(trees, p = if (strict_or_majority == "Strict"){1} else {0.5}) # If the newick files already exists, directly uses the files. # This avoids using different trees when analyzing different disparsal multipliers if (!file.exists(paste0("con_tree_", ntrees, "_", strict_or_majority, ".newick"))){ con_trees <- timePaleoPhy(con_tree, ages, type = "equal", vartime = 1, ntrees = ntrees, randres = F, dateTreatment = if (ntrees == 1){"firstLast"} else{"minMax"}) tree_edit <- function(con_tree){ con_tree <- multi2di(con_tree, random = F) #con_tree<-ladderize(con_tree) ## set zero-length branches to be 1/1000000 total tree length con_tree$edge.length[con_tree$edge.length==0]<-max(node.height(con_tree))*1e-6 return(con_tree) } con_trees <- lapply(con_trees, tree_edit) # class(con_trees) <- "multiPhylo" # write.tree(con_trees, "test_contree.tre") # for (i in 1:ntrees){ # plotTree(con_trees[[i]]) # nodelabels(cex = 0.5) # } for (i in 1:ntrees){ write.tree(con_trees[[i]], file = paste0("con_tree_", i, "_", strict_or_majority, ".newick")) } class(con_trees) <- "multiPhylo" write.tree(con_trees, file = "con_trees.nex") } else{ con_trees <- read.tree("con_trees.nex") } #input the list of trees trfn <- BioGeoBEARS_run_objects_DEC <- BioGeoBEARS_run_objects_BAYAREALIKE <- BioGeoBEARS_run_objects_DIVALIKE <- list() for (i in 1: ntrees){ trfn[[i]] <- np(paste0("con_tree_", i, "_", strict_or_majority, ".newick")) BioGeoBEARS_run_objects_DEC[[i]] <- BioGeoBEARS_run_object_DEC BioGeoBEARS_run_objects_DIVALIKE[[i]] <- BioGeoBEARS_run_object_DIVALIKE BioGeoBEARS_run_objects_BAYAREALIKE[[i]] <- BioGeoBEARS_run_object_BAYAREALIKE BioGeoBEARS_run_objects_DEC[[i]]$trfn <- BioGeoBEARS_run_objects_DIVALIKE[[i]]$trfn <- BioGeoBEARS_run_objects_BAYAREALIKE[[i]]$trfn <- trfn[[i]] } # Check the inputs ==================================================== # Run this to check inputs. Read the error messages if you get them! for (model_name in model_names) { model <- eval(str2expression(paste0("BioGeoBEARS_run_objects_", model_name))) print(lapply(model, check_BioGeoBEARS_run)) } ####################################################### # Run analyses (non +J models) ####################################################### # Make .Rdata to load for slow analyses for(model_name in model_names){ assign(paste0("resfn_", model_name), paste0(strict_or_majority, model_name, "_result_", dispersal_multiplier, ".Rdata")) print(eval(str2lang(paste0("resfn_", model_name)))) } for (model_name in model_names) { if (runslow) #if already have results, change runslow to False above { res = lapply(eval(str2expression(paste0("BioGeoBEARS_run_objects_", model_name))), bears_optim_run) save(res, file = eval(str2lang(paste0("resfn_", model_name)))) } else { # Loads to "res" load(eval(str2lang(paste0("resfn_", model_name)))) } assign(paste0("res", model_name, "s"), res) } #paste0("res, model_name) produces these, if multiple models are tested # resDECs # resDIVALIKEs # resBAYAREALIKEs ####################################################### # Set up models (+J models) ####################################################### # Set up DEC+J model ==================================================== BioGeoBEARS_run_objects_DEC_J <- BioGeoBEARS_run_objects_DEC # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) DEC_J_to_list <- function(BioGeoBEARS_run_object_DEC_J, resDEC){ dstart <- resDEC$outputs@params_table["d","est"] estart <- resDEC$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["e","est"] = estart # Add j as a free parameter BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object_DEC_J$BioGeoBEARS_model_object@params_table["j","est"] = jstart return(BioGeoBEARS_run_object_DEC_J) } BioGeoBEARS_run_objects_DEC_J <- mapply(DEC_J_to_list, BioGeoBEARS_run_objects_DEC_J, resDECs, SIMPLIFY = F) # Set up DIVALIKE+J model =============================================== if (!is.null(model_names_J)){ BioGeoBEARS_run_objects_DIVALIKE_J <- BioGeoBEARS_run_objects_DIVALIKE # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) DIVALIKE_J_to_list <- function(BioGeoBEARS_run_object_DIVALIKE_J, resDIVALIKE){ dstart = resDIVALIKE$outputs@params_table["d","est"] estart = resDIVALIKE$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["e","est"] = estart # Add jump dispersal/founder-event speciation BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["j","est"] = jstart # Under DIVALIKE+J, the max of "j" should be 2, not 3 (as is default in DEC+J) BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["j","min"] = 0.00001 BioGeoBEARS_run_object_DIVALIKE_J$BioGeoBEARS_model_object@params_table["j","max"] = 1.99999 return(BioGeoBEARS_run_object_DIVALIKE_J) } BioGeoBEARS_run_objects_DIVALIKE_J <- mapply(DIVALIKE_J_to_list, BioGeoBEARS_run_objects_DIVALIKE_J, resDIVALIKEs, SIMPLIFY = F) # Set up BAYAREALIKE+J model ============================================ BioGeoBEARS_run_objects_BAYAREALIKE_J <- BioGeoBEARS_run_objects_BAYAREALIKE # Get the ML parameter values from the 2-parameter nested model # (this will ensure that the 3-parameter model always does at least as good) BAYAREALIKE_J_to_list <- function(BioGeoBEARS_run_object_BAYAREALIKE_J, resBAYAREALIKE){ dstart = resBAYAREALIKE$outputs@params_table["d","est"] estart = resBAYAREALIKE$outputs@params_table["e","est"] jstart = 0.0001 # Input starting values for d, e BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["e","est"] = estart # *DO* allow jump dispersal/founder-event speciation (set the starting value close to 0) BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["j","est"] = jstart # Under BAYAREALIKE+J, the max of "j" should be 1, not 3 (as is default in DEC+J) or 2 (as in DIVALIKE+J) BioGeoBEARS_run_object_BAYAREALIKE_J$BioGeoBEARS_model_object@params_table["j","max"] = 0.99999 return(BioGeoBEARS_run_object_BAYAREALIKE_J) } BioGeoBEARS_run_objects_BAYAREALIKE_J <- mapply(BAYAREALIKE_J_to_list, BioGeoBEARS_run_objects_BAYAREALIKE_J, resBAYAREALIKEs, SIMPLIFY = F) # Check the inputs ==================================================== # Run this to check inputs. Read the error messages if you get them! for (model_name in model_names_J) { model <- eval(str2expression(paste0("BioGeoBEARS_run_objects_", model_name))) print(lapply(model, check_BioGeoBEARS_run)) } ####################################################### # Run analyses (+J models) ####################################################### # Make .Rdata to load for slow analyses for(model_name in model_names_J){ assign(paste0("resfn_", model_name), paste0(model_name, "_result_", dispersal_multiplier, ".Rdata")) print(eval(str2lang(paste0("resfn_", model_name)))) } for (model_name in model_names_J) { if (runslow) #if already have results, change runslow to False above { res = lapply(eval(str2expression(paste0("BioGeoBEARS_run_objects_", model_name))), bears_optim_run) save(res, file = eval(str2lang(paste0("resfn_", model_name)))) } else { # Loads to "res" load(eval(str2lang(paste0("resfn_", model_name)))) } assign(paste0("res", model_name, "s"), res) } #paste0("res", model_name) produces these by defaults # resDEC_Js # resDIVALIKE_Js # resBAYAREALIKE_Js } ######################################################################### # # CALCULATE SUMMARY STATISTICS TO COMPARE # DEC, DEC+J, DIVALIKE, DIVALIKE+J, BAYAREALIKE, BAYAREALIKE+J # ######################################################################### restables = NULL teststables = NULL numparams1 <- 3 numparams2 <- 2 if (length(model_names)!=1){ statistic_summary <- function(result2, result1, x){ LnL_2 = get_LnL_from_BioGeoBEARS_results_object(result2) LnL_1 = get_LnL_from_BioGeoBEARS_results_object(result1) stats = AICstats_2models(LnL_1, LnL_2, numparams1, numparams2) res2 = extract_params_from_BioGeoBEARS_results_object(results_object=result2, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) res1 = extract_params_from_BioGeoBEARS_results_object(results_object=result1, returnwhat="table", addl_params=c("j"), paramsstr_digits=4) stats = AICstats_2models(LnL_1, LnL_2, numparams1, numparams2) tmp_tests = conditional_format_table(stats) temp_restable = rbind(res2, res1) temp_teststable = rbind(tmp_tests) return(list(temp_restable, temp_teststable)[[x]]) } for(model_name in model_names){ result_list2 <- eval(str2lang(paste0("res", model_name, "s"))) result_list1 <- eval(str2lang(paste0("res", model_name, "_Js"))) restable_temp <- mapply(statistic_summary, result_list2, result_list1, 1, SIMPLIFY = F) restables <- if (is.null(restables)){restable_temp} else{ mapply(function(x, y){rbind(x, y)}, restables, restable_temp, SIMPLIFY = F) } teststable_temp <- mapply(statistic_summary, result_list2, result_list1, 2, SIMPLIFY = F) teststables <- if (is.null(teststables)){teststable_temp} else{ mapply(function(x, y){rbind(x, y)}, teststables, teststable_temp, SIMPLIFY = F) } } teststables <- lapply(teststables, function(teststable){ teststable$alt = c("DEC+J", "DIVALIKE+J", "BAYAREALIKE+J") teststable$null = c("DEC", "DIVALIKE", "BAYAREALIKE") return(teststable) }) restables <- lapply(restables, function(restable){ row.names(restable) = c("DEC", "DEC+J", "DIVALIKE", "DIVALIKE+J", "BAYAREALIKE", "BAYAREALIKE+J") restable = put_jcol_after_ecol(restable) return(restable) }) # With AICcs -- factors in sample size restables2 <- restables samplesize = length(con_tree$tip.label) restable_AICc_rellike <- mapply(function(restable2, restable){ AICtable = calc_AICc_column(LnL_vals=restable$LnL, nparam_vals=restable$numparams, samplesize=samplesize) restable2 = cbind(restable2, AICtable) restable_AICc_rellike = AkaikeWeights_on_summary_table(restable=restable2, colname_to_use="AICc") restable_AICc_rellike = put_jcol_after_ecol(restable_AICc_rellike) return(restable_AICc_rellike) }, restables2, restables, SIMPLIFY = F) restable_AICc_rellike AICc_array <- array(unlist(restable_AICc_rellike), dim = c(nrow(restable_AICc_rellike[[1]]), ncol(restable_AICc_rellike[[1]]), length(restable_AICc_rellike))) stats_mean <- apply(AICc_array, c(1, 2), mean) stats_sd <- apply(AICc_array , c(1, 2), sd) stats_median <- apply(AICc_array, c(1, 2), median) rownames(stats_mean) <- rownames(stats_sd) <- rownames(stats_median) <- rownames(restable_AICc_rellike[[1]]) colnames(stats_mean) <- colnames(stats_sd) <- colnames(stats_median) <- colnames(restable_AICc_rellike[[1]]) for(i in c("stats_mean", "stats_sd", "stats_median")){ write.csv(eval(str2expression(i)), paste0(i, dispersal_multiplier, ".csv")) } }else{ stats_all <- NULL result_list <- eval(str2lang(paste0("res", model_names, "s"))) for(i in c(1:ntrees)){ LnL = get_LnL_from_BioGeoBEARS_results_object(result_list[[i]]) res = extract_params_from_BioGeoBEARS_results_object(results_object=result_list[[i]], returnwhat="table", addl_params=c("j"), paramsstr_digits=4) AIC = 2 * res$numparams - 2 * LnL AICc <- AIC + ((2 * numparams2 * (numparams2 + 1))/(length(trees[[1]]$tip.label) - numparams2 - 1)) res <- cbind(res, AIC, AICc) stats_all <- rbind(stats_all, res) } stats_mean <- apply(stats_all, 2, mean) stats_sd <- apply(stats_all, 2, sd) stats_median <- apply(stats_all, 2, median) stats_all <- rbind(mean = stats_mean, sd = stats_sd, median = stats_median) write.csv(stats_all, paste0("stats_", dispersal_multiplier, "_", strict_or_majority, ".csv")) } ####################################################### # Plot results # Since the analyses are done for ntrees times, the # averages of the results are plotted on the consensus # tree. # Note that the consensus tree is not equal to the # any of the trees used in the analyses. ####################################################### #Setting up the tree to plot and the tip labels ========================================== states_list_0based_index = rcpp_areas_list_to_states_list(areas, maxareas = max_range_size, include_null_range = include_null_ranges_TF) colors_matrix = get_colors_for_numareas(length(areas)) colors_list_for_states = mix_colors_for_states(colors_matrix, states_list_0based_index, plot_null_range = include_null_ranges_TF) con_tree_plot <- timePaleoPhy(con_tree, ages, type = "equal", vartime = 1, ntrees = 1, randres = F, dateTreatment = "firstLast") con_tree_plot <- multi2di(con_tree_plot, random = F) #con_tree_plot <- ladderize(con_tree_plot) reord_ages <- ages[con_tree_plot$tip.label, ] contree_root_age <- con_tree_plot$root.time taxon.ranges <- contree_root_age - reord_ages[, c("FAD", "LAD")] tip_range <- c(1:length(con_tree_plot$tip.label)) node_range <- c((length(con_tree_plot$tip.label)+1): (length(con_tree_plot$tip.label)+con_tree_plot$Nnode)) tip_ranges_matrix <- data.matrix(tipranges@df)-1 tip_ranges_df <- data.frame(matrix(tip_ranges_matrix, length(con_tree_plot$tip.label), numareas), row.names = attributes(tip_ranges_matrix)$dimnames[[1]]) colnames(tip_ranges_df) <- attributes(tip_ranges_matrix)$dimnames[[2]] tip_ranges_df[tip_ranges_df == 0] <- NA for (j in c(1:nrow(tip_ranges_df))){ for (i in colnames(tip_ranges_df)){ tip_ranges_df[j, i][!is.na(tip_ranges_df[j, i])] <- i } } tip_ranges_vector <- apply(tip_ranges_df, 1, median, na.rm = T) tip_ranges_vector <- tip_ranges_vector[con_tree_plot$tip.label] for(model_name in c(model_names, model_names_J)){ #Setting up the node labels============================================ temp_func <- function(result){ return(result$ML_marginal_prob_each_state_at_branch_top_AT_node) } marg_prob_list <- lapply(eval(str2lang(paste0("res", model_name, "s"))), temp_func) marg_prob_average <- apply(simplify2array(marg_prob_list), 1:2, mean) #Setting up the legends================================================ statenames = areas_list_to_states_list_new(areas, maxareas = max_range_size, include_null_range = include_null_ranges_TF, split_ABC = FALSE) MLstates = get_ML_states_from_relprobs(marg_prob_average, statenames, returnwhat = "states", if_ties = "takefirst") possible_ranges_list_txt = areas_list_to_states_list_new(areas, maxareas = max_range_size, split_ABC = FALSE, include_null_range = include_null_ranges_TF) cols_byNode = rangestxt_to_colors(possible_ranges_list_txt, colors_list_for_states, MLstates) names(cols_byNode) <- MLstates unique_ML_states <- unique(MLstates)[order(nchar(unique(MLstates)), unique(MLstates))] x_pos_tex <- c(rep(1, times = ceiling(length(unique_ML_states)/2)), rep(6, times = floor(length(unique_ML_states)/2))) y_pos_tex <- rev(c(rep(c(1:ceiling(length(unique_ML_states)/2))*4, times = 2))) if (length(x_pos_tex)!=length(y_pos_tex)){ y_pos_tex <- head(y_pos_tex, -1) } #Plot============================================================== pdf(paste0(strict_or_majority, "_", model_name, "_result_", dispersal_multiplier, ".pdf") , width = 3.5, height = 5) geoscalePhylo(con_tree_plot, ages, erotate = 0, cex.tip = 0.4) lastPP_global <- get("last_plot.phylo", envir = .PlotPhyloEnv) par(lend = 1) tip_ranges_vector <- factor(tip_ranges_vector, levels = c("A", "L", "P", "S", "E", "F")) segments(taxon.ranges[, "FAD"], lastPP_global$yy[c(1:length(con_tree_plot$tip.label))], taxon.ranges[, "LAD"], lastPP_global$yy[c(1:length(con_tree_plot$tip.label))], col = colors_list_for_states[c(2:(numareas+1))][as.numeric(factor(tip_ranges_vector))], lwd = 2) nodelabels(pie = marg_prob_average[node_range, ], cex = 0.5, piecol = colors_list_for_states) sw <- strwidth(unique_ML_states) sh <- strheight(unique_ML_states) frsz <- 0.3 text_size <- 0.5 rect( x_pos_tex - text_size*(sw/2) - frsz, y_pos_tex - text_size*(sh/2) - frsz, x_pos_tex + text_size*(sw/2) + frsz, y_pos_tex + text_size*(sh/2) + frsz, col = cols_byNode[unique_ML_states] ) text(x_pos_tex, y_pos_tex, unique_ML_states, cex = text_size) dev.off() } } ####################################################### # Basic variables ####################################################### setwd("D:/Dropbox/Research_Projects/Awaji Hadrosaur/Manuscripts/Supplements") model_names <- "DEC" #Performs only DEC following Ree and Sanmartin (2018) model_names_J <- NULL # model_names <- c("DEC", "DIVALIKE", "BAYAREALIKE") # model_names_J <- c("DEC_J", "DIVALIKE_J", "BAYAREALIKE_J") trees <- read.nexus("Data S1.nex") ages <- read.csv("Data S2.csv", row.names = 1) geogfn = np("Data S8.data") #Rename Supplementary Data S5, S6, S7 into "harsh.txt", "starting.txt", and "relaxed.txt", respectively. files <- list.files() if (files[grep("\\.txt$", files)][1] != "harsh.txt"){ file.rename(files[grep("\\.txt$", files)][1], "harsh.txt") file.rename(files[grep("\\.txt$", files)][2], "starting.txt") file.rename(files[grep("\\.txt$", files)][3], "relaxed.txt") } dispersal_multipliers <- c("harsh", "starting", "relaxed") #"harsh", "starting", or "relaxed" ntrees <- 100 #Number of trees to replicate strict_or_majority <- "Maj50" #set either "Strict or "Maj50" max_range_size = 3 runslow <- T #IF TRUE, RUNS ALL ANALYSES. IF FALSE, LOADS PREVIOUS RESULTS num_cores = 4 #Number of CPU cores to use ############################################################################### ####################################################### # RUN analyses ####################################################### for (dispersal_multiplier in dispersal_multipliers){ run_BioGeoBEARS(trees = trees, model_names = model_names, model_names_J = model_names_J, ages = ages, geogfn = geogfn, dispersal_multiplier = dispersal_multiplier, ntrees = ntrees, strict_or_majority = strict_or_majority, max_range_size = max_range_size, runslow = runslow, num_cores = num_cores) } if(model_names != "DEC"){ for (i in c("mean", "sd", "median")){ write.csv(rbind(read.csv("stats_meanstarting.csv", row.names = 1), read.csv("stats_meanharsh.csv", row.names = 1), read.csv("stats_meanrelaxed.csv", row.names = 1)), paste0("stats_all_", i, ".csv"))} }else{ for (i in c("mean", "sd", "median")){ stats_all <- rbind(read.csv("stats_relaxed_Maj50.csv", row.names = 1)[i, ], read.csv("stats_starting_Maj50.csv", row.names = 1)[i, ], read.csv("stats_harsh_Maj50.csv", row.names = 1)[i, ]) row.names(stats_all) <- c("relaxed", "starting", "harsh") write.csv(stats_all, paste0("stats_all_", i, ".csv")) } } ###############################################################################