################################################################################ # R-Script # Global patterns of geo-ecological controls on the response of soil respiration to warming # # Author information will be added once review process is completed # # # Details and constraints of the parameters of the model, statistics and caveats # can be found in the online-only methods section of this manuscript and its sup- # plement. All files needed to re-construct our workflow are available in the # supplement. Abbreviations of variables are explained in the extended data # "SRRW_database.xls" file ################################################################################ # Load Q10 data (Best and Generalized tables; Generalized always with _gl) df <- read.csv("Q10_local_test.csv",header=TRUE,sep=";") colnames(df)[c(16,18:32)] <- c("Tmean","pH","C","N","CN","CEC","Bsat", "MAT","MAP","Sand","Clay","Silt","P","NDVI", "PET","Humidity") df_gl <- read.csv("Q10_global_test.csv",header=TRUE,sep=";") colnames(df_gl)[c(16,18:32)] <- c("Tmean","pH","C","N","CN","CEC","Bsat","MAT", "MAP","Sand","Clay","Silt","P","NDVI","PET", "Humidity") # Replace pH < 3 with 3 df$pH <- ifelse(df$pH > 3, df$pH, 3) df_gl$pH <- ifelse(df_gl$pH > 3, df_gl$pH, 3) # The following script shows the procedure with the best data approach. It can # be reproduced interchangeable for the generalized data approach by replacing # input file with the according "_gl" equivalents. Note that in the following # script all regions that are classified as subpolar and covered by our data # are grouped together with boreal data and named "boreal". # # For reproduction set.seed(138) # Data map (Figure 1S) # Needed Packages and Functions: library(maps);library(maptools);library(ggsn);library(ggplot2) # Functions for changing the x and y-axis labels of the map: scale_x_longitude <- function(xmin=-180, xmax=180, step=1, ...) { xbreaks <- seq(xmin,xmax,step) xlabels <- unlist(lapply(xbreaks, function(x) ifelse(x < 0, parse(text=paste0(x,"^o", " *W")), ifelse(x > 0, parse(text=paste0(x,"^o", " *E")),x)))) return(scale_x_continuous("Longitude", breaks = xbreaks, labels = xlabels, expand = c(0, 0), ...)) } scale_y_latitude <- function(ymin=-90, ymax=90, step=0.5, ...) { ybreaks <- seq(ymin,ymax,step) ylabels <- unlist(lapply(ybreaks, function(x) ifelse(x < 0, parse(text=paste0(x,"^o", " *S")), ifelse(x > 0, parse(text=paste0(x,"^o", " *N")),x)))) return(scale_y_continuous("Latitude", breaks = ybreaks, labels = ylabels, expand = c(0, 0), ...)) } # Get global map: mapWorld <- borders("world") mp <- ggplot() + mapWorld # Plot data points on global map and change axis labels: map <- mp + geom_point(df,mapping=aes(x=WGS.84.E, y=WGS.84.N, shape=Type,color=Type))+ coord_cartesian(xlim=c(-180, 190), ylim=c(-75,90))+ geom_jitter(width = 1, height = 1)+theme_bw(base_size = 14)+ scale_x_longitude(xmin=-180, xmax=180, step=40) + scale_y_latitude(ymin=-75, ymax=80, step=50) + labs(shape="Study Type",color="Study Type", caption="Coordinate system: WGS84")+ labs(x="", y="")+guides(shape=guide_legend(override.aes=list(size=4)))+ theme(legend.background = element_rect(fill=NULL,size=0.5,linetype="solid", colour ="black"), legend.justification=c(0,0), legend.position=c(0.05,0.05)) # Save the map in pdf and tif format ggsave("2019_05_07254A_SRRW/map_all.tiff",map, dpi=600, width = 25, height=20, units = "cm") ggsave("2019_05_07254A_SRRW/map_all.pdf",map, dpi=600, width = 25, height=20, units = "cm") # ANOVA/Kruskal-Wallis-Test for Climatezones and Land-use # Needed packages: library(Rmisc); library(ggplot2); library(gridExtra) library(pgirmess); library(multcompView);library(rstatix) # Climatezones # Descriptive statitstics sumdata_clim <- summarySE(df, measurevar="Q10", groupvars="Climatezone") # Test for Normality and Homogeinity of Variance shapiro.test(df$Q10[df$Climatezone=="Boreal"]) shapiro.test(df$Q10[df$Climatezone=="Temperate"]) shapiro.test(df$Q10[df$Climatezone=="Subtropical"]) shapiro.test(df$Q10[df$Climatezone=="Tropical"]) fligtest_clim <- fligner.test(Q10 ~ Climatezone, data=df) # All test with p < 0.05 ==> Kruskal-Willis-Test: kruskal.test(Q10~Climatezone,data=df) # Effect size of Kruskal-Wallis Test kw_effsize_veg <- kruskal_effsize(Q10~Climatezone,data=df) # Post-Hoc Test for Kruskal-Wallis-test kmc_clim <- kruskalmc(Q10~Climatezone,data=df) test_clim <- kmc_clim$dif.com$difference names(test_clim) <- row.names(kmc_clim$dif.com) # Define Letters for figure let_clim <- multcompLetters(test_clim, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_clim <- data.frame(let_clim$Letters) letters_clim$Zones <- row.names(letters_clim) # Define text for figure clim_order <- c("Boreal","Temperate","Subtropical","Tropical") sumdata_clim$ext <- c("15 %","5 %","10 %","4 %") # External calculation # of excluded data points # Plot the data (Figure 2S a) clim_plot_fig <- ggplot(data=sumdata_clim, aes(y=Q10,x=Climatezone))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size = 14)+labs(x="Climate Zones", y = "Mean Q10")+ scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+ geom_text(aes(label=paste("n =",N),x=Climatezone,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_clim.Letters),data=letters_clim)+ geom_text(aes(x=Climatezone,y=1.2,label=ext),data=sumdata_clim)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ scale_x_discrete(expand=c(0,0),limits=clim_order)+ annotate("text",x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5,label="a)") # Land-Use (same procedure as for climate zones) sumdata_veg <- summarySE(df, measurevar="Q10", groupvars="Vegetation") shapiro.test(df$Q10[df$Vegetation=="agriculture"]) shapiro.test(df$Q10[df$Vegetation=="forest"]) shapiro.test(df$Q10[df$Vegetation=="grassland"]) shapiro.test(df$Q10[df$Vegetation=="wetland"]) fligtest_veg <- fligner.test(Q10 ~ Vegetation, data=df) # All test with p < 0.05 ==> Kruskal-Willis-Test: kruskal.test(Q10~Vegetation,data=df) # Test for effect size of Kruskal-Wallis Test kw_effsize_veg <- kruskal_effsize(Q10~Vegetation,data=df) # Post-Hoc Test for Kruskal-Wallis-test kmc_veg <- kruskalmc(Q10~Vegetation,data=df) test_veg <- kmc_veg$dif.com$difference names(test_veg) <- row.names(kmc_veg$dif.com) # Define Letters let_veg <- multcompLetters(test_veg, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_veg <- data.frame(let_veg$Letters) # Define text for figure letters_veg$Zones <- row.names(letters_veg) sumdata_veg$ext <- c("7 %","9 %","7 %","17 %") # External calculation # of excluded data points. # Plot the data (Figure 2S b) veg_plot_fig <- ggplot(data=sumdata_veg, aes(y=Q10,x=Vegetation))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size = 14)+labs(x="Land-Use", y = "Mean Q10")+ scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+ geom_text(aes(label=paste("n =",N),x=Vegetation,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_veg.Letters),data=letters_veg)+ geom_text(aes(x=Vegetation,y=1.2,label=ext),data=sumdata_veg)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ annotate("text", x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5,label="b)") # Combine the climate zone and land-use Plots climveg_fig <- arrangeGrob(clim_plot_fig, veg_plot_fig) # Save plots for onlz onlz climate yones and vegetation ggsave("climveg_fig_new.pdf", climveg_fig,dpi=600) ggsave("climveg_fig_new.tiff", climveg_fig,dpi=600) ################################################################################ # Analyse experiment techniques library(Rmisc); library(ggplot2); library(gridExtra) library(pgirmess); library(multcompView) # Load extra data set df_treat <- read.csv("Q10_treatment_db.csv",header=TRUE,sep=";") df_treat <- df_treat[,-(c(15:30))] # Remove autotrophic studies (n = 13) df_treat <- df_treat[-which(df_treat$Autotrophic.Heterotrophic=="Autotrophic"),] # Sequential Warming: Consecutively vs Paralell df_warm_sum <- summarySE(df_treat, measurevar="Q10", groupvars="Sequential.warming") df_treat$Sequential.warming <- as.character(df_treat$Sequential.warming) shapiro.test(df_treat$Q10[df_treat$Sequential.warming=="parallel independent samples "]) # p.value < 2.2e-16 shapiro.test(df_treat$Q10[df_treat$Sequential.warming=="warmed consecutively "]) # p.value < 2.2e-16 fligtest_warm <- fligner.test(Q10 ~ Sequential.warming, data=df_treat) # p-value = 0.5175 kruskal.test(Q10~Sequential.warming,data=df_treat) # Test for effect size of Kruskal-Wallis Test kruskal_effsize(Q10~Sequential.warming,data=df_treat) # Post-Hoc Test for Kruskal-Wallis-test kmc_warm <- kruskalmc(Q10~Sequential.warming,data=df_treat) test_warm <- kmc_warm$dif.com$difference names(test_warm) <- row.names(kmc_warm$dif.com) # Define Letters let_warm <- multcompLetters(test_warm, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_warm <- data.frame(let_warm$Letters) # Define text for figure letters_warm$Zones <- row.names(letters_warm) # Plot the data (Figure 2S d) warm_plot_fig <- ggplot(data=df_warm_sum, aes(y=Q10,x=Sequential.warming))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size=14)+labs(x="Sequential/parallel warming", y = "Q10")+ scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+ geom_text(aes(label=paste("n =",N),x=Sequential.warming,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_warm.Letters),data=letters_warm)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ scale_x_discrete(label=c("Parallel","Sequential"))+ theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank())+ annotate("text",label = 'd)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5) # Treatment vs. None Treatment df_treat$Treated.Added <- as.character(df_treat$Treated.Added) df_treat$treat <- ifelse(df_treat$Treated.Added=="None","None","Treated") df_treat_sum <- summarySE(df_treat, measurevar="Q10", groupvars="treat") shapiro.test(df_treat$Q10[df_treat$treat=="None"]) # p.value < 2.2e-16 shapiro.test(df_treat$Q10[df_treat$treat=="Treated"]) # p.value < 2.2e-16 fligtest_treat <- fligner.test(Q10 ~ treat, data=df_treat) # p-value = 0.4466 kruskal.test(Q10~treat,data=df_treat) # Test for effect size of Kruskal-Wallis Test kruskal_effsize(Q10~treat,data=df_treat) # Post-Hoc Test for Kruskal-Wallis-test kmc_treat <- kruskalmc(Q10~treat,data=df_treat) test_treat <- kmc_treat$dif.com$difference names(test_treat) <- row.names(kmc_treat$dif.com) # Define Letters let_treat <- multcompLetters(test_treat, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_treat <- data.frame(let_treat$Letters) # Define text for figure letters_treat$Zones <- row.names(letters_treat) # Plot the data (Figure 2S e) treat_plot_fig <- ggplot(data=df_treat_sum, aes(y=Q10,x=treat))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size = 14)+labs(x="Pre-Treatment", y = "Mean Q10")+ scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+ geom_text(aes(label=paste("n =",N),x=treat,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_treat.Letters),data=letters_treat)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ scale_x_discrete(label=c("None","Treatment"))+ annotate("text",label = 'e)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5) # Hetero vs Auto df_treat$Autotrophic.Heterotrophic <- as.character(df_treat$Autotrophic.Heterotrophic) df_auhe_sum <- summarySE(df_treat, measurevar="Q10", groupvars="Autotrophic.Heterotrophic") shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Both"]) # p.value < 2.2e-16 shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Heterotrophic"]) # p.value = 2.206e-06 shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Autotrophic"]) # p.value p-value = 0.1332 fligtest_treat <- fligner.test(Q10 ~ Autotrophic.Heterotrophic, data=df_treat) # p-value = 1.069e-07 kruskal.test(Q10~Autotrophic.Heterotrophic,data=df_treat) # Test for effect size of Kruskal-Wallis Test kruskal_effsize(Q10~Autotrophic.Heterotrophic,data=df_treat) # Post-Hoc Test for Kruskal-Wallis-test kmc_auhe <- kruskalmc(Q10~Autotrophic.Heterotrophic,data=df_treat) test_auhe <- kmc_auhe$dif.com$difference names(test_auhe) <- row.names(kmc_auhe$dif.com) # Define Letters let_auhe <- multcompLetters(test_auhe, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_auhe <- data.frame(let_auhe$Letters) # Define text for figure letters_auhe$Zones <- row.names(letters_auhe) # Plot the data (Figure 2S f) auhe_plot_fig <- ggplot(data=df_auhe_sum, aes(y=Q10,x=Autotrophic.Heterotrophic))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size = 14)+labs(x="Mixed vs. Heterotrophic", y = "Mean Q10")+ scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+ geom_text(aes(label=paste("n =",N),x=Autotrophic.Heterotrophic,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_auhe.Letters),data=letters_auhe)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ scale_x_discrete(label=c("Mixed","Heteorotophic"))+ theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank())+ annotate("text",label = 'f)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5) # Lab versus Field df_treat$ohneauto <- ifelse(df_treat$Autotrophic.Heterotrophic=="Autotrophic",NA,df_treat$Sequential.warming) df_treat_bh <- na.omit(df_treat) df_lf_sum <- summarySE(df_treat_bh, measurevar="Q10", groupvars="Type") kruskal.test(Q10~Type,data=df_treat_bh) # Test for effect size of Kruskal-Wallis Test kruskal_effsize(Q10~Type,data=df_treat_bh) kmc_lf <- kruskalmc(Q10~Type,data=df_treat_bh) test_lf <- kmc_lf$dif.com$difference names(test_lf) <- row.names(kmc_lf$dif.com) # Define Letters let_lf <- multcompLetters(test_lf, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_lf <- data.frame(let_lf$Letters) # Define text for figure letters_lf$Zones <- row.names(letters_lf) # Plot the data (Figure 2Sb) lf_plot_fig <- ggplot(data=df_lf_sum, aes(y=Q10,x=Type))+ geom_point(size=4)+ geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), position = position_dodge(0.9),width = 0.25)+ theme_bw(base_size = 14)+labs(x="Lab vs. Field", y = "Mean Q10")+ geom_text(aes(label=paste("n =",N),x=Type,y=3.6), position = position_dodge(0.9)) + geom_text(aes(x=Zones,y=3.8, label=let_lf.Letters),data=letters_lf)+ scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+ scale_x_discrete(expand = c(0,0))+ annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5) # Combine the experiment plots library(gridExtra) com_plot <- grid.arrange(lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig,all_plot_fig, layout_matrix = rbind(c(1, 2, 3,4), c(5, 5, 5,5))) com_plot <- grid.arrange(climveg_fig,lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig, layout_matrix = rbind(c(1, 1),c(1, 1),c(2, 3), c(4,5))) com_plot <- arrangeGrob(climveg_fig,lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig, layout_matrix = rbind(c(1, 1),c(1, 1),c(2, 3), c(4,5))) ggsave("Kruskall_wallis.pdf", com_plot,width=20,height=30, unit="cm",dpi=600) ggsave("Kruskall_wallis.tiff", com_plot,width=20,height=30, unit="cm",dpi=600) ############################################################################# # Q10 prediction modelling (shown for local studies (df); same approach with # with generalized table (df_gl), replace df with df_gl) # # Individual Variables (no changes) # Define Formula form_all2 <- as.formula(Q10~pH+C+N+CN+CEC+Bsat+MAT+MAP+Sand+Silt+Clay+NDVI+P+ Tmean+PET+Humidity) library(caret) # Create Resample (80 % of input data and 20 Resamples) dp_all <- createDataPartition(df$Q10, p=.80, times=20) # Define Monte-Carlo cross-validation and model selection # LGOCV = Leave-one-group-out-cross-validation tr_all <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_all, preProcOptions=list(center=T,scale=T), selectionFunction = "oneSE") # Define predictors impVars <- df[,c("pH","C","N","CN","CEC","Bsat","MAT","MAP","Sand", "Silt","Clay","Tmean","P","NDVI","PET","Humidity")] # Variable Inflation Factor library(car) all_vifs_test <- try(vif(lm(form_all2, data=df)), silent=TRUE) if (class(all_vifs_test) == "try-error"){ lm_alias <- alias(lm(form_all2, data=df)) broken <- data.frame(lm_alias$Complete) broken_var <- row.names(broken) nam_var <- names(impVars) %in% broken_var impVars <- impVars[!nam_var] form_all_new <- as.formula(paste("Q10 ~ ", paste(names(impVars), collapse=" + "), sep="")) all_vifs <- vif(lm(form_all_new, data=df)) } else { all_vifs <- all_vifs_test } if(any(all_vifs>5)){ all_vifs <- as.data.frame(all_vifs) while((nrow(all_vifs) > 2)& (max(all_vifs[, 1]) > 5) & (class(all_vifs) != "try-error")) { remove_var <- rownames(all_vifs)[which(all_vifs[, 1] == max(all_vifs[, 1]))] impVars <- impVars[!names(impVars) %in% remove_var] fullForm <- paste ("Q10 ~ ", paste (names(impVars), collapse=" + "), sep="") fullMod <- lm(as.formula(fullForm), data=df) all_vifs <- try(as.data.frame(vif(fullMod)), silent=TRUE) } vif_filtered_variables <- names(fullMod$model)[!names(fullMod$model) %in% "Q10"] } else { all_vifs <- as.data.frame(all_vifs) vif_filtered_variables <- rownames(all_vifs)[!names(all_vifs) %in% "Q10"] } # Filtered Formula form <- as.formula(paste("Q10~",paste(vif_filtered_variables, collapse='+'))) ################################################################################ # Rotated Principal Components Analysis as Preprocessing # Estimation of defined Interactions # pred.wI <- df_gl[,c(16,18:32)] (for generalized data set) # Best data approach with Experimental data pred.wI <- df[,c(16,18:32)] pred.wI$'CN:P' <- pred.wI$CN/pred.wI$P pred.wI$'Clay:MAT' <- pred.wI$MAT/pred.wI$Clay pred.wI$'PET:NDVI' <- pred.wI$PET/pred.wI$NDVI pred.wI$'MAP:PET' <- pred.wI$MAP/pred.wI$PET pred.wI$PET <- NULL pred.wI$'Bsat:CEC' <- pred.wI$Bsat/pred.wI$CEC pred.wI$'Bsat:pH' <- pred.wI$Bsat/pred.wI$pH pred.wI$'CEC:Clay' <- pred.wI$CEC/pred.wI$Clay pred.wI$'MAT:MAP' <- pred.wI$MAT/pred.wI$MAP # Remove Tmean for analyses without experiment modifier #pred.wI$Tmean <- NULL # With the experiment parameters df_treat_all <- read.csv("Q10_treatment_db.csv",header=TRUE,sep=";") df_treat_all <- df_treat_all[,c(1:14)] df_all_data <- cbind.data.frame(df,df_treat_all[,c(12:14)]) write.csv(df_all_data,file="SRRW_Rscript/Database_best_wExp.csv",row.names = FALSE) df_merged <- cbind.data.frame(df[c(16,18:32)],df_treat_all[,c(11:14)]) df_merged$Sequential.warming <- ifelse(df_merged$Sequential.warming=="warmed consecutively ",1,0) df_merged$Treated.Added <- ifelse(df_merged$Treated.Added=="None",1,0) df_merged <- df_merged[-which(df_merged$Autotrophic.Heterotrophic=="Autotrophic"),] df_merged$Autotrophic.Heterotrophic <- ifelse(df_merged$Autotrophic.Heterotrophic=="Both",1,0) df_merged$Type <- ifelse(df_merged$Type=="Field",1,0) pred.wI <- df_merged #pred.wI$'N:P' <- pred.wI$N/pred.wI$P #pred.wI$MAT <- NULL # Without experimental data pred.wI <- pred.wI[,c(2:15,20:27)] # Calculation of unrotated PCA and selection of PCs amount sdx <- as.matrix(pred.wI) nc <- ncol(sdx) nr <- nrow(sdx) rmin <- 1 rmax <- nc sdx <- scale(sdx) cx <- cov(sdx) # Covariance Matrix ea <- svd(cx, nv=0) nopc <- ncol(ea$u) sdevalues <- matrix(rep(sqrt(ea$d), nopc), nrow = nopc, ncol=nopc,byrow = TRUE) loadings <- ea$u*sdevalues scores <- sdx %*% ea$u scmx <- matrix(rep(colMeans(scores), nr), nrow = nr, ncol = nc, byrow = TRUE) scsx <- matrix(rep(sd(scores[,]), nr), nrow = nr, ncol = nc, byrow = TRUE) scsdx <- (scores-scmx)/scsx scsdx <- scsdx[1:nr,1:nc] varex <- ea$d / sum(ea$d) * 100 pc_or <- as.data.frame(matrix(NA, ncol=4, nrow=rmax)) colnames(pc_or) <- c("PC","EV","Variance","cumvar") for(i in 1:rmax){ pc_or[i,1] <- i pc_or[i,2] <- round(ea$d[i],3) pc_or[i,3] <- round(varex[i],3) pc_or[i,4] <- round(sum(varex[seq(1,i)]),3) } pc_or$EV <- as.numeric(pc_or$EV) # Rotation of PCA with VARIMAX vm <- varimax(loadings[,1:8],normalize=TRUE) vm$rotmat <- vm$rotmat[,order(colSums(vm$loadings^2), decreasing = TRUE)] rloadings <- loadings[,1:8] %*% vm$rotmat library(corpcor) rcoef <- pseudoinverse(as.matrix(cx)) %*% rloadings rscores <- as.matrix(sdx) %*% rcoef rscmx <- matrix(rep(colMeans(rscores), nrow(rscores)), nrow = nrow(rscores), ncol = ncol(rscores), byrow = TRUE) rscsx <- matrix(rep(sd(rscores[,]), nrow(rscores)), nrow = nrow(rscores), ncol = ncol(rscores), byrow = TRUE) rscsdx <- (rscores-rscmx)/rscsx rscores <- rscsdx[1:nrow(rscores),1:8] rvalues <- colSums(rloadings^2) vmt <- varimax(loadings, normalize=TRUE) vmtloadings <- loadings %*% vmt$rotmat allrvalues <- colSums(vmtloadings^2) rvarex <- rvalues / sum(allrvalues) * 100 pc_wr <- as.data.frame(matrix(NA, ncol=4, nrow=8)) colnames(pc_wr) <- c("PC", "Eigenvalue", "Variance", "Cumulative") for(i in 1:8){ pc_wr[i,1] <- i pc_wr[i,2] <- round(rvalues[i],3) pc_wr[i,3] <- round(rvarex[i], 3) pc_wr[i,4] <- round(sum(rvarex[seq(1,i)]),3) } library(lessR) colnames(rscores) <- to("PC", ncol(rscores),same.size=FALSE) colnames(rloadings) <- to("PC", ncol(rloadings),same.size=FALSE) row.names(rloadings) <- colnames(pred.wI) pca_wr.new <- data.frame(t(pc_wr[,-1])) colnames(pca_wr.new) <- to("PC", ncol(pca_wr.new),same.size=FALSE) pca_wr.new <- do.call(cbind,lapply(pca_wr.new,function(x)as.numeric(as.character(x)))) pca.allbind <- rbind.data.frame(pca_wr.new, round(rloadings,3)) row.names(pca.allbind)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)") # Modelling approach with rPCs # Packages for prediction modelling library(lars); library(leaps); library(Cubist);library(gbm) library(elasticnet);library(ipred); library(e1071); library(randomForest);library(rpart) # Modelling processes and variable importance estimation (varImp()) # Procdure for all models similiar: # 1. Train the model with tuning (sometimes before defined) and the before # defined cross-validation and model selection parameters # 2. Get metrics of selected model # 3. Calculate Variable Importance # 4. Save the metrics and variable importance # Combine Q10 values without autotrophic respiration and rotated scores Q10 <- df_treat$Q10 all_data_mod <- cbind.data.frame(Q10,rscores) n <- to("PC", ncol(rscores),same.size=FALSE) # Define formula form2 <- as.formula(paste("Q10~",paste(n, collapse='+'))) # Define Resampling method dp <- createDataPartition(all_data_mod$Q10, p=.80, times=20) # Define cross-validation method tr <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp, preProcOptions=list(center=T,scale=T), selectionFunction = "oneSE",savePredictions = TRUE) #Linear Regression (LM) linreg <- train(form2, data=all_data_mod, method="lm",metric="RMSE", trControl=tr) res_lm <- linreg$results[,-1] coef_lm <- as.data.frame(t(varImp(linreg$finalModel))) endtab_lm <- merge(res_lm, coef_lm) colnames(endtab_lm) <- paste("LM", colnames(endtab_lm), sep="_") # LEAPS leaps <- train(form2, data=all_data_mod ,method = "leapSeq",metric="RMSE", trControl=tr) ss <- summary(leaps$finalModel) leaps_reduced <- train(update(form2,as.formula(paste0("~ . - ",paste( colnames(ss$which)[!ss$which[leaps$finalModel$tuneValue[[1]],]], collapse="-")))),data=all_data_mod,method = "lm",trControl=tr) arg_leaps <- leaps_reduced$results[,-1] varleaps <- varImp(leaps_reduced$finalModel) coef_leaps <- as.data.frame(t(varleaps)) endtab_leaps <- merge(arg_leaps, coef_leaps) colnames(endtab_leaps) <- paste("LEAPS", colnames(endtab_leaps), sep = "_") # Penalizing Models: # LARS lasso <- train(form2, data=all_data_mod, method = "lars",metric="RMSE", trControl=tr, tuneGrid = data.frame(fraction = seq(0.01,1,0.01))) arg_lasso <- lasso$results[is.element(lasso$results$fraction,lasso$bestTune),] coef_lasso <- as.matrix(coef(lasso$finalModel, mode = "fraction", s= lasso$bestTune$fraction)*lasso$finalModel$normx) coef_lasso <- as.data.frame(t(coef_lasso)) endtab_lasso <- merge(arg_lasso, coef_lasso) colnames(endtab_lasso) <- paste("LARS", colnames(endtab_lasso), sep = "_") # Elastic Net (ENET) enetgrid <- expand.grid(.lambda= c(0, 0.01, .1), .fraction=seq(.05,1,length=20)) enetmod <- train(form2, data=all_data_mod, method = "enet",metric="RMSE", trControl=tr,tuneGrid=enetgrid) arg_enet <- enetmod$results[is.element(rownames(enetmod$results), rownames(enetmod$bestTune)),] coef_enet <- predict(enetmod$finalModel, type="coef",mode="fraction", s=c(enetmod$bestTune$fraction,enetmod$bestTune$lampda)) varimp_enet <- as.data.frame(t(coef_enet$coefficients*enetmod$finalModel$normx)) endtab_enet <- merge(arg_enet, varimp_enet) colnames(endtab_enet) <- paste("ENET", colnames(endtab_enet), sep="_") # Tree models: # Random Forest (RF) rfmod <- train(form2, data=all_data_mod, method="rf",metric="RMSE", trControl=tr,ntrees=1000, importance=TRUE) # rfmod2 <- train(form2, data=all_data_mod, method="rf",metric="RMSE", # trControl=tr,ntrees=1000, # importance=TRUE) arg_rf <- rfmod$results[is.element(rownames(rfmod$results), rownames(rfmod$bestTune)),] var_rfmod <- varImp(rfmod, scale=FALSE) coef_rf <- as.data.frame(t(var_rfmod$importance)) endtab_rf <- merge(arg_rf,coef_rf) colnames(endtab_rf) <- paste("RF", colnames(endtab_rf),sep="_") # xyplot(Q10~predict(rfmod),xlab="Predicted Q10",ylab="Observed Q10",main="RF") # xyplot(resid(rfmod)~predict(rfmod)) # Bagged CART (BAGGED) treebagmod <- train(form2, data=all_data_mod, method="treebag", metric="RMSE",trControl=tr) arg_treebag <- treebagmod$results var_treebag <- varImp(treebagmod, scale=FALSE) coef_treebag <- as.data.frame(t(var_treebag$importance)) endtab_treebag <- merge(arg_treebag,coef_treebag) colnames(endtab_treebag) <- paste("BAGTREE", colnames(endtab_treebag), sep="_") # Boosted CART (BOOSTED) boostedmod <- train(form2, all_data_mod, metric="RMSE", method="gbm", verbose=FALSE, tuneGrid=expand.grid(.interaction.depth = seq(1,7,by=2), .n.trees = seq(10,100,by=10), .shrinkage = c(0.01, 0.1), .n.minobsinnode=5), trControl=tr) arg_boost <- boostedmod$results[is.element(rownames(boostedmod$results), rownames(boostedmod$bestTune)),] var_boost <- varImp(boostedmod, scale=FALSE) coef_boost <- as.data.frame(t(var_boost$importance)) endtab_boost <- merge(arg_boost,coef_boost) colnames(endtab_boost) <- paste("BOOST", colnames(endtab_boost), sep="_") # Cubist (CUBIST) cubistmod <- train(form2,data=all_data_mod,method = "cubist", metric="RMSE", trControl = tr, tuneGrid = expand.grid(.committees=c(1,5,10,50,75,100), .neighbors=c(1,3,5,7,9))) arg_cubist <- cubistmod$results[is.element(rownames(cubistmod$results), rownames(cubistmod$bestTune)),] var_cubist <- varImp(cubistmod, scale=FALSE) coef_cubist <- as.data.frame(t(var_cubist$importance)) endtab_cubist <- merge(arg_cubist,coef_cubist) colnames(endtab_cubist) <- paste("CUBIST", colnames(endtab_cubist), sep="_") # Save results results <- resamples(list("LM"=linreg, "LEAPS"=leaps, "LARS"=lasso, "ENET"=enetmod, "RF"=rfmod, "BAGGED"=treebagmod, "BOOSTED" = boostedmod, "CUBIST"=cubistmod)) endtab <- cbind(endtab_lm, endtab_treebag, endtab_rf, endtab_lasso, endtab_leaps,endtab_boost, endtab_cubist, endtab_enet) compare <- subset(endtab, select=c(LARS_RMSE, LARS_Rsquared, LEAPS_RMSE, LEAPS_Rsquared, CUBIST_RMSE, CUBIST_Rsquared, LM_RMSE, LM_Rsquared,ENET_RMSE, ENET_Rsquared, BAGTREE_RMSE,BAGTREE_Rsquared,BOOST_RMSE, BOOST_Rsquared,RF_RMSE,RF_Rsquared)) ################################################################################ # Result Plots (Figure 1) # Predicted vs observed (Figure 1a) pred_rf <- predict(rfmod, all_data_mod) # Predict Q10 # Combine predicted and observed Q10 data pred_rf <- data.frame(pred = pred_rf, Q10 = all_data_mod$Q10) # Plot the results rfplot_PCA <- ggplot(data = pred_rf, aes(y=pred, x= Q10))+ geom_point()+ geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line annotate("text",label = 'a)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ xlim(1,4.5)+ylim(1,4.5)+theme_bw()+ labs(x="Q10 observed",y="Q10 predicted")+ annotate("text",label=paste("RMSE = ",round(arg_rf$RMSE,2),"\nrRMSE = ", round(arg_rf$RMSE/mean(Q10)*100,0),"%","\nR² = ", round(arg_rf$Rsquared,2)), x=1,y=4.3,hjust=0,size=5)+ theme(axis.text=element_text(size=12), axis.title=element_text(size=14)) # Variable Importance Plot (only random forest) importance_coef <- rbind(coef_rf, coef_boost, coef_cubist) row.names(importance_coef) <- c("RF","Boosted","Cubist") importance_coef <- data.frame(t(importance_coef)) importance_coef$pRF <- importance_coef$RF / sum(importance_coef$RF) * 100 importance_coef$rfround <- round(importance_coef$pRF,2) # Best data # Names of rPcs with experiment modifier data importance_coef$names <- c("Plant growth conditions", "Soil organic matter & fertility", "Experiment specific modifiers","Clay & weathering", "Soil solution chemistry","Soil CNP stoichiometry", "Textural coarseness","Precipitation & Evapotranspiration") # Names of rPcs without experiment modifier data importance_coef$names <- c("Plant growth conditions", "Texture & weathering","Temperature & weathering", "Soil solution chemistry", "Soil nutrient stoichiometry", "Soil organic matter", "Coarseness","Water use efficiency") importance_coef$rfround <- round(importance_coef$rfround,1) varI_plot_pca <- ggplot(importance_coef,aes(x=reorder(names, pRF),y=pRF))+ geom_bar(stat="identity",width=0.8,fill="grey")+ geom_text(aes(label=paste(rfround,"%",sep=" ")),hjust=1,size=5)+ xlab("")+ylab("Relative Importance [%]")+ annotate("text",label = 'b)', x = -Inf, y = Inf, hjust = 1.25, vjust = -0.25,size=5)+ coord_flip(ylim=c(5,max(importance_coef$pRF) + 0.2))+theme_bw()+ scale_y_continuous(breaks = seq(5, 25, by = 5),expand=c(0,0))+ theme(axis.text=element_text(size=12), axis.title=element_text(size=14)) # Partial Dependence Plots # Best data approach library(pdp) dpd_1_rf <- partial(rfmod, pred.var="PC1") dpd_2_rf <- partial(rfmod, pred.var="PC2") dpd_3_rf <- partial(rfmod, pred.var="PC3") dpd_4_rf <- partial(rfmod, pred.var="PC4") dpd_5_rf <- partial(rfmod, pred.var="PC5") dpd_6_rf <- partial(rfmod, pred.var="PC6") dpd_7_rf <- partial(rfmod, pred.var="PC7") dpd_8_rf <- partial(rfmod, pred.var="PC8") dpd_table <- as.data.frame(mapply(c, dpd_1_rf[,c(1,2)],dpd_2_rf[,c(1,2)], dpd_3_rf[,c(1,2)], dpd_4_rf[,c(1,2)], dpd_5_rf[,c(1,2)],dpd_6_rf[,c(1,2)], dpd_7_rf[,c(1,2)],dpd_8_rf[,c(1,2)])) library(reshape2) # Naming and inversion of rPCs with experiment data dpd_table$x <- rep(seq(1,51,1),8) dpd_table$x[c(1:51,307:357)] <- seq(51,1,-1) pca_names <- c("Plant growth conditions", "Soil organic matter & fertility", "Experiment specific modifiers","Clay & weathering", "Soil solution chemistry","Soil CNP stoichiometry", "Textural coarseness","Precipitation &\nEvapotranspiration") # Naming and inversion of rPCs without experiment data dpd_table$x <- rep(seq(1,51,1),8) dpd_table$x[c(103:153,256:306,358:408)] <- seq(51,1,-1) pca_names <- c("Plant growth conditions", "Texture & weathering","Temperature & weathering", "Soil solution chemistry", "Soil nutrient stoichiometry", "Soil organic matter", "Coarseness","Water use efficiency") # Defining colours for plotting cols <- c("#E31A1C","#FFD92F","#8DA0CB","#E76AC3","#FB9A99","#FF7F00","#1F78B4", "#33A02C") # Add rPC names and prepare for plotting dpd_table$names <- rep(pca_names,each=51) dpd_table$names <- factor(dpd_table$names, levels = pca_names) # Plot Best data results (Figure 1c) dpd_pca_plot <- ggplot(dpd_table,aes(x=x,y=yhat,color=names))+ geom_line(size=1.25)+theme_bw(base_size=14)+ylab("Predicted Q10")+ xlab("rPC relative range")+ guides(color=guide_legend(ncol=2))+scale_color_manual(values = cols)+ annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ scale_y_continuous(limits=c(1.7,3),expand=c(0,0))+ # geom_segment(aes(x = 22, y = 2, xend = 22, yend = 2.8),linetype="dashed", # size=1,colour="black")+ # annotate("text",x=22.5,y=2.75,label="10°C \nIncubation temperature", # hjust=0,size=5)+ scale_x_continuous(breaks=c(min(dpd_table$x),max(dpd_table$x)), labels=c("Low","High"),expand=c(0.03,0.03))+ theme(legend.title=element_blank(),legend.justification=c(0,0), legend.position=c(0.02,0.05), legend.background = element_rect(fill=NULL,size=0.5,linetype="solid", colour ="black"), axis.title.x=element_text(size=14)) # Generalized PCA results (Figure 3Sc) dpd_pca_plot <- ggplot(dpd_table,aes(x=x,y=yhat,color=names))+ geom_line(size=1.25)+theme_bw(base_size=14)+ylab("Predicted Q10")+ xlab("rPC relative range")+ guides(color=guide_legend(ncol=2))+scale_color_manual(values = cols)+ annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ scale_y_continuous(limits=c(1.9,2.8),expand=c(0,0))+ geom_segment(aes(x = 33, y = 2, xend = 33, yend = 2.8),linetype="dashed", size=1,colour="black")+ annotate("text",x=33.5,y=2.75,label="10°C \nIncubation temperature", hjust=0,size=4)+ scale_x_continuous(breaks=c(min(dpd_table$x),max(dpd_table$x)), labels=c("Low","High"),expand=c(0.03,0.03))+ theme(legend.title=element_blank(),legend.justification=c(0,0), legend.position=c(0.02,0.05), legend.background = element_rect(fill=NULL,size=0.5,linetype="solid", colour ="black"), axis.title.x=element_text(size=14)) # Arrange plots library(gridExtra) res_plot_fig <- arrangeGrob(rfplot_PCA,varI_plot_pca ,dpd_pca_plot,nrow=1) # Save plots # with experiment data ggsave("res_experimentdata.pdf",res_plot_fig, width=42,height=17, unit="cm",dpi=600 ) ggsave("res_experimentdata.tiff",res_plot_fig, width=42,height=17, unit="cm",dpi=600 ) #without experiment data # with experiment data ggsave("res_without_experimentdata.pdf",res_plot_fig, width=42,height=17, unit="cm",dpi=600 ) ggsave("res_without_experimentdata.tiff",res_plot_fig, width=42,height=17, unit="cm",dpi=600 ) # Generalized data ggsave("res_new_gl.pdf",res_plot_fig, width=43,height=17, unit="cm",dpi=600 ) ggsave("res_new_gl.tiff",res_plot_fig, width=43,height=17, unit="cm",dpi=600 ) # Test for Spatial Autocorrelation # Get spatial data spdat <- SpatialPointsDataFrame(cbind(df$WGS.84.E, df$WGS.84.N), df) # Weights matrix with the 10 nearest neighbors, in weights list form library(spdep) lstw <- nb2listw(knn2nb(knearneigh(spdat, k = 10))) # Moran's test using the residuals of the model # only shown for linear models (non-linear models not affected by spatial # autocorrelation) moran.test(residuals(linreg_all), lstw) moran.test(residuals(lasso_all),lstw) moran.test(residuals(enetmod_all),lstw) moran.test(residuals(leaps_all),lstw) moran.test(residuals(lasso),lstw) moran.test(residuals(enetmod),lstw) moran.test(residuals(leaps),lstw) moran.test(residuals(linreg), lstw) ################################################################################ # Global Predictions with raster data # Required packages library(raster); library(rgdal) # Global approach depending on individual model results # Load of the individual raster files (see Tab. 1S for more sources) # and adjustments for further processing (projecting to 0.5° resolution). Third # party source files for global datasets: See methods. C <- raster("1km/C.tif") C <- projectRaster(C,N,method = "ngb") CEC <- raster("1km/CEC.tif") CEC <- projectRaster(CEC,N,method = "ngb") Silt <- raster("1km/Silt.tif") Silt <- projectRaster(Silt,N,method = "ngb") Sand <- raster("1km/Sand.tif") Sand <- projectRaster(Sand,N,method = "ngb") Clay <- raster("1km/Clay.tif") Clay <- projectRaster(Clay,N,method = "ngb") pH <- raster("1km/pH.tif") pH <- projectRaster(pH,N,method = "ngb") N <- raster("1km/N.tif") N <- raster("N.tif") CN <- raster("1km/CN.tif") CN <- raster("CN.tif") Bsat <- raster("1km/Bsat.tif") Bsat <- raster("Bsat.tif") NDVI <- raster("1km/NDVI.tif") NDVI <- projectRaster(NDVI,N,method = "ngb") MAT <- raster("1km/MAT.tif") MAT <- projectRaster(MAT,N,method = "ngb") MAP <- raster("1km/MAP.tif") MAP <- projectRaster(MAP,N,method = "ngb") PET <- raster("1km/PET.tif") PET <- projectRaster(PET,N,method = "ngb") P <- raster("1km/P.tif") P <- raster("E:/Masterarbeit/GIS/P.tif") Humidity <- raster("1km/Humidity.tif") Humidity <- projectRaster(Humidity,N,method = "ngb") # RF-Model saved and loaded (if needed) # rfmod_exp <- rfmod #with experimental data # save(rfmod,file="rfmod.Rdata") # load("2019_05_07254A_SRRW/rfmod.RData") # # Stack the loaded raster files (without Tmean) preds <- stack(C,N,CN,Bsat,CEC,Silt,Sand,Clay,pH,NDVI, MAT,MAP,PET,P,Humidity) # Remove files to save memory space rm(C,N,CN,Bsat,CEC,Silt,Sand,Clay,pH,NDVI, MAT,MAP,PET,P,Humidity) # Save raster as tif-file mean_rf_1km <- raster("2019_05_07254A_SRRW/mean_rf_GIS.tif") #Mapping PCA as input pca_tab <- data.frame(pca.allbind[-c(1:3),]) CN_P <- calc(stack(CN,P),fun=function(x){x[1]/x[2]}) Clay_MAT <- calc(stack(Clay,MAT),fun=function(x){x[1]/x[2]}) PET_NDVI <- calc(stack(PET,NDVI),fun=function(x){x[1]/x[2]}) MAP_PET <- calc(stack(MAP,PET),fun=function(x){x[1]/x[2]}) Bsat_CEC <- calc(stack(Bsat,CEC),fun=function(x){x[1]/x[2]}) Bsat_pH <- calc(stack(Bsat,pH),fun=function(x){x[1]/x[2]}) CEC_Clay <- calc(stack(CEC,Clay),fun=function(x){x[1]/x[2]}) MAT_MAP <- calc(stack(MAT,MAP),fun=function(x){x[1]/x[2]}) # 1km rasters made in ArcMAP (much faster) CN_P <- raster("1km/Ratios/CN_P.tif") Clay_MAT <- raster("1km/Ratios/Clay_MAT.tif") PET_NDVI <- raster("1km/Ratios/PET_NDVI.tif") PET_NDVI <- crop(PET_NDVI,N) MAP_PET <- raster("1km/Ratios/MAP_PET.tif") MAP_PET <- crop(MAP_PET,N) Bsat_CEC <- raster("1km/Ratios/Bsat_CEC.tif") Bsat_pH <- raster("1km/Ratios/Bsat_pH.tif") MAT_MAP <- raster("1km/Ratios/MAT_MAP.tif") MAT_MAP <- crop(MAT_MAP,N) CEC_Clay <- raster("1km/Ratios/CEC_Clay.tif") e <- extent(-180, 180, -56.00083, 83.99917) MAT <- setExtent(MAT, e, keepres=TRUE) MAP <- setExtent(MAP, e, keepres=TRUE) PET_NDVI <- setExtent(PET_NDVI, e, keepres=TRUE) MAP_PET <- setExtent(MAP_PET, e, keepres=TRUE) MAT_MAP <- setExtent(MAT_MAP,e,keepres = TRUE) CEC_Clay <- setExtent(CEC_Clay,e,keepres=TRUE) preds_pca <- stack(pH,C,N,CN,CEC,Bsat,MAT,MAP,Sand,Clay,Silt,P, NDVI,Humidity,CN_P,Clay_MAT,PET_NDVI,MAP_PET, Bsat_CEC,Bsat_pH,CEC_Clay,MAT_MAP) pca_tab$names <- row.names(pca_tab) pca_tab$names[15:22]<- c("CN_P","Clay_MAT","PET_NDVI","MAP_PET", "Bsat_CEC","Bsat_pH","CEC_Clay","MAT_MAP") PC1 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,1]+ x[2]*pca_tab[2,1]+x[3]*pca_tab[3,1]+x[4]*pca_tab[4,1]+x[1]*pca_tab[4,1]+ +x[5]*pca_tab[5,1]+x[6]*pca_tab[6,1]+x[7]*pca_tab[7,1]+x[8]*pca_tab[8,1] +x[9]*pca_tab[9,1]+x[10]*pca_tab[10,1]+x[11]*pca_tab[11,1]+x[12]*pca_tab[12,1]+x[13]*pca_tab[13,1] +x[14]*pca_tab[14,1]+x[15]*pca_tab[15,1]+x[16]*pca_tab[16,1]+x[17]*pca_tab[17,1] +x[18]*pca_tab[18,1]+x[19]*pca_tab[19,1]+x[20]*pca_tab[20,1]+x[21]*pca_tab[21,1]+x[22]*pca_tab[22,1]}) PC2 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,2]+ x[2]*pca_tab[2,2]+x[3]*pca_tab[3,2]+x[4]*pca_tab[4,2]+x[1]*pca_tab[4,2]+ +x[5]*pca_tab[5,2]+x[6]*pca_tab[6,2]+x[7]*pca_tab[7,2]+x[8]*pca_tab[8,2] +x[9]*pca_tab[9,2]+x[10]*pca_tab[10,2]+x[11]*pca_tab[11,2]+x[12]*pca_tab[12,2]+x[13]*pca_tab[13,2] +x[14]*pca_tab[14,2]+x[15]*pca_tab[15,2]+x[16]*pca_tab[16,2]+x[17]*pca_tab[17,2] +x[18]*pca_tab[18,2]+x[19]*pca_tab[19,2]+x[20]*pca_tab[20,2]+x[21]*pca_tab[21,2]+x[22]*pca_tab[22,2]}) PC3 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,3]+ x[2]*pca_tab[2,3]+x[3]*pca_tab[3,3]+x[4]*pca_tab[4,3]+x[1]*pca_tab[4,3]+ +x[5]*pca_tab[5,3]+x[6]*pca_tab[6,3]+x[7]*pca_tab[7,3]+x[8]*pca_tab[8,3] +x[9]*pca_tab[9,3]+x[10]*pca_tab[10,3]+x[11]*pca_tab[11,3]+x[12]*pca_tab[12,3]+x[13]*pca_tab[13,3] +x[14]*pca_tab[14,3]+x[15]*pca_tab[15,3]+x[16]*pca_tab[16,3]+x[17]*pca_tab[17,3] +x[18]*pca_tab[18,3]+x[19]*pca_tab[19,3]+x[20]*pca_tab[20,3]+x[21]*pca_tab[21,3]+x[22]*pca_tab[22,3]}) PC4 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,4]+ x[2]*pca_tab[2,4]+x[3]*pca_tab[3,4]+x[4]*pca_tab[4,4]+x[1]*pca_tab[4,4]+ +x[5]*pca_tab[5,4]+x[6]*pca_tab[6,4]+x[7]*pca_tab[7,4]+x[8]*pca_tab[8,4] +x[9]*pca_tab[9,4]+x[10]*pca_tab[10,4]+x[11]*pca_tab[11,4]+x[12]*pca_tab[12,4]+x[13]*pca_tab[13,4] +x[14]*pca_tab[14,4]+x[15]*pca_tab[15,4]+x[16]*pca_tab[16,4]+x[17]*pca_tab[17,4] +x[18]*pca_tab[18,4]+x[19]*pca_tab[19,4]+x[20]*pca_tab[20,4]+x[21]*pca_tab[21,4]+x[22]*pca_tab[22,1]}) PC5 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,5]+ x[2]*pca_tab[2,5]+x[3]*pca_tab[3,5]+x[4]*pca_tab[4,5]+x[1]*pca_tab[4,5]+ +x[5]*pca_tab[5,5]+x[6]*pca_tab[6,5]+x[7]*pca_tab[7,5]+x[8]*pca_tab[8,5] +x[9]*pca_tab[9,5]+x[10]*pca_tab[10,5]+x[11]*pca_tab[11,5]+x[12]*pca_tab[12,5]+x[13]*pca_tab[13,5] +x[14]*pca_tab[14,5]+x[15]*pca_tab[15,5]+x[16]*pca_tab[16,5]+x[17]*pca_tab[17,5] +x[18]*pca_tab[18,5]+x[19]*pca_tab[19,5]+x[20]*pca_tab[20,5]+x[21]*pca_tab[21,5]+x[22]*pca_tab[22,5]}) PC6 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,6]+ x[2]*pca_tab[2,6]+x[3]*pca_tab[3,6]+x[4]*pca_tab[4,6]+x[1]*pca_tab[4,6]+ +x[5]*pca_tab[5,6]+x[6]*pca_tab[6,6]+x[7]*pca_tab[7,6]+x[8]*pca_tab[8,6] +x[9]*pca_tab[9,6]+x[10]*pca_tab[10,6]+x[11]*pca_tab[11,6]+x[12]*pca_tab[12,6]+x[13]*pca_tab[13,6] +x[14]*pca_tab[14,6]+x[15]*pca_tab[15,6]+x[16]*pca_tab[16,6]+x[17]*pca_tab[17,6] +x[18]*pca_tab[18,6]+x[19]*pca_tab[19,6]+x[20]*pca_tab[20,6]+x[21]*pca_tab[21,6]+x[22]*pca_tab[22,6]}) PC7 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,7]+ x[2]*pca_tab[2,7]+x[3]*pca_tab[3,7]+x[4]*pca_tab[4,7]+x[1]*pca_tab[4,7]+ +x[5]*pca_tab[5,7]+x[6]*pca_tab[6,7]+x[7]*pca_tab[7,7]+x[8]*pca_tab[8,7] +x[9]*pca_tab[9,7]+x[10]*pca_tab[10,7]+x[11]*pca_tab[11,7]+x[12]*pca_tab[12,7]+x[13]*pca_tab[13,7] +x[14]*pca_tab[14,7]+x[15]*pca_tab[15,7]+x[16]*pca_tab[16,7]+x[17]*pca_tab[17,7] +x[18]*pca_tab[18,7]+x[19]*pca_tab[19,7]+x[20]*pca_tab[20,7]+x[21]*pca_tab[21,7]+x[22]*pca_tab[22,7]}) PC8 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,8]+ x[2]*pca_tab[2,8]+x[3]*pca_tab[3,8]+x[4]*pca_tab[4,8]+x[1]*pca_tab[4,8]+ +x[5]*pca_tab[5,8]+x[6]*pca_tab[6,8]+x[7]*pca_tab[7,8]+x[8]*pca_tab[8,8] +x[9]*pca_tab[9,8]+x[10]*pca_tab[10,8]+x[11]*pca_tab[11,8]+x[12]*pca_tab[12,8]+x[13]*pca_tab[13,8] +x[14]*pca_tab[14,8]+x[15]*pca_tab[15,8]+x[16]*pca_tab[16,8]+x[17]*pca_tab[17,8] +x[18]*pca_tab[18,8]+x[19]*pca_tab[19,8]+x[20]*pca_tab[20,8]+x[21]*pca_tab[21,8]+x[22]*pca_tab[22,8]}) PC1[is.infinite(PC1)] <- NA PC2[is.infinite(PC2)] <- NA PC3[is.infinite(PC3)] <- NA PC4[is.infinite(PC4)] <- NA PC5[is.infinite(PC5)] <- NA PC6[is.infinite(PC6)] <- NA PC7[is.infinite(PC7)] <- NA PC8[is.infinite(PC8)] <- NA #Load PCA raster 1km made in ArcMap PC1 <- raster("1km/PC_1km/PC1.tif") PC2 <- raster("1km/PC_1km/PC2.tif") PC3 <- raster("1km/PC_1km/PC3.tif") PC4 <- raster("1km/PC_1km/PC4.tif") PC5 <- raster("1km/PC_1km/PC5.tif") PC6 <- raster("1km/PC_1km/PC6.tif") PC7 <- raster("1km/PC_1km/PC7.tif") PC8 <- raster("1km/PC_1km/PC8.tif") pca_preds <- stack(PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8) names(pca_preds) <- c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8") rf_pred_pca <- raster::predict(object=pca_preds,model=rfmod) #writeRaster(rf_pred_pca,"rf_pred_map.tif") #rf_pred_pca <- raster("rf_pred_map.tif") # Further processing again only shown for local values # Global Q10 map and Latitude plot WorldData <- ggplot2::map_data("world") library(dplyr) WorldData %>% filter(region != "Antarctica") -> WorldData # Global shapefile WorldData <- fortify(WorldData) ewbrks <- c(seq(-180,180,40),0) nsbrks <- seq(-60,80,20) ewlbls <- unlist(lapply(ewbrks, function(x) ifelse(x < 0, gsub("[-]","",paste(x, "°W")), ifelse(x > 0, paste(x, "°E"),x)))) nslbls <- unlist(lapply(nsbrks, function(x) ifelse(x < 0, gsub("[-]","",paste(x, "°S")), ifelse(x > 0, paste(x, "°N"),x)))) # Climatezones Map derivaded from FAO Agrizones (prepared in ArcMAP 10.4) # Remove polar and desert regions clim_rast <- raster("E:/Masterarbeit/Statistik/Raster/1km/Clim_raster.tif") clim_mat <- matrix(c(0,1,2,3,4,NA,1,1,1,1),ncol=2) clim_rast <- reclassify(clim_rast,clim_mat) clim_rast <- setExtent(clim_rast,rf_pred_pca) clim_rast<- projectRaster(clim_rast,rf_pred_pca,method="ngb") p_class <- P p_class[!is.na(p_class)] <- 1 mean_rf_oP <- calc(stack(rf_pred_pca,mean_rf_oP,p_class),fun=function(x){x[1]*x[2]*x[3]}) #mean_rf_oP <- calc(stack(rf_pred_pca,mean_rf_oP),fun=function(x){x[1]*x[2]}) # Load the 1km file made with ArcMAP mean_rf_1km <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/Pred_RF_1km_class.tif") # Q10 raster as data frame for plotting rf_df <- as(mean_rf_oP, "SpatialPixelsDataFrame") rf_df <- as.data.frame(rf_df) colnames(rf_df) <- c("value", "x", "y") # Plot the Q10 with global map map_rf <- ggplot()+geom_map(data=WorldData, map=WorldData, aes(x=long, y=lat, group=group, map_id=region), fill="grey", colour="grey", size=0.5)+ geom_tile(data=rf_df, aes(x=x, y=y, fill=value), alpha=0.8)+ annotate("text",label = 'a)', x = Inf, y = Inf, hjust = 1.25, vjust = 1.25,size=5)+ labs(fill="Q10")+xlab("")+ylab("")+ scale_fill_gradientn(colours = rev(rainbow(5)),breaks=c(1.75,2,2.25,2.5,2.75,3), values = c(0,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.75,1), guide = guide_colourbar(direction = "horizontal", title.position = "top", title.hjust=0.5))+ theme_bw()+ scale_x_continuous(breaks = ewbrks, labels = ewlbls, expand = c(0, 0)) + scale_y_continuous(breaks = nsbrks, labels = nslbls)+ theme(legend.position="bottom", legend.key.width=unit(2.25,"cm"), legend.margin=ggplot2::margin(-10,-10,0,0)) ################################################################################ # Calculate differences between 1km map (extra made) and 0.5° map for assessment # of potential bias due to data aggregation at coarser resolution. mean_rf_1km <- projectRaster(mean_rf_1km,mean_rf_oP,method = "ngb") Q10_dif <- calc(stack(mean_rf_oP,mean_rf_1km), fun=function(x){x[1]-x[2]}) Q10_mean <- cellStats(Q10_dif,mean) Q10_sd <- cellStats(Q10_dif,sd) Q10_per <- calc(stack(mean_rf_oP,mean_rf_1km), fun=function(x){(x[1]-x[2])/x[1]}) Q10_per_1 <- calc(Q10_per, fun = function(x){x[x < 0] <- x*(-1); return(x)}) Q10_m_per <- cellStats(Q10_per_1,mean) Q10_sd_per <- cellStats(Q10_per_1,sd) # Calculate differences between Hashimoto et al. 2015 Q10 map and the newly # predicted Q10 map Q10_hash <- raster("E:/Masterarbeit/Statistik/Raster/Respiration_Hashimoto/Q10_H15.tif") Q10_hash <- projectRaster(Q10_hash,mean_rf_oP,method="ngb") Q10_dif_H <- calc(stack(mean_rf_oP,Q10_hash),fun=function(x){(x[1]-x[2])/x[1]}) Q10_mean_H <- cellStats(Q10_dif_H,mean) Q10_sd_H <- cellStats(Q10_dif_H,sd) ################################################################################ # Preparation for latitudial plot # Load Global Land-Use map lu <- raster("E:/Masterarbeit/Statistik/Raster/1km/Land_Use_small.tif") lu <- projectRaster(lu,mean_rf_oP,method = "ngb") #lu <- setExtent(lu,bb,keepres = TRUE) # Separate the data in Agriculture, Forest and Grassland mat_agri <- matrix(c(1,2,3,4,1,NA,NA,NA),ncol=2) lu_agri <- reclassify(lu,mat_agri) # e <- extent(-180, 180, -55.63135, 83.86865) # lu_agri <- setExtent(lu_agri, e, keepres=TRUE) agri_tab <- calc(stack(lu_agri,mean_rf_oP),fun=function(x){x[1]*x[2]}) #agri_tab <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/pred_agri_1km.tif") agri_tab <- as.data.frame(as(agri_tab,"SpatialPixelsDataFrame")) agri_tab <- aggregate(layer~y,data=agri_tab,mean) agri_tab$name <- "Agriculture" mat_for <- matrix(c(1,2,3,4,NA,1,NA,NA),ncol=2) lu_for <- reclassify(lu,mat_for) # e <- extent(-180, 180, -55.63135, 83.86865) # lu_for <- setExtent(lu_for, e, keepres=TRUE) for_tab <- calc(stack(lu_for,mean_rf_oP),fun=function(x){x[1]*x[2]}) for_tab <- as.data.frame(as(for_tab,"SpatialPixelsDataFrame")) for_tab <- aggregate(layer~y,data=for_tab,mean) for_tab$name <- "Forest" mat_gras <- matrix(c(1,2,3,4,NA,NA,1,NA),ncol=2) lu_gras <- reclassify(lu,mat_gras) # e <- extent(-180, 180, -55.63135, 83.86865) # lu_gras <- setExtent(lu_gras, e, keepres=TRUE) gras_tab <- calc(stack(lu_gras,mean_rf_oP),fun=function(x){x[1]*x[2]}) gras_tab <- as.data.frame(as(gras_tab,"SpatialPixelsDataFrame")) gras_tab <- aggregate(layer~y,data=gras_tab,mean) gras_tab$name <- "Grassland" # Combine the land-use data frames df_lu <- rbind(agri_tab,for_tab,gras_tab) # Plot the data as path rf_lat_plot <- ggplot(df_lu)+ geom_path(aes(x=layer,y=y,color=name),lineend = "square")+ theme_bw(base_size=14)+ geom_map(data=WorldData, map=WorldData, aes(x=long, y=lat, group=group, map_id=region), fill=NA, colour=NA, size=0)+ annotate("text",label = 'b)', x = Inf, y = Inf, hjust = 1.25, vjust = 1.25,size=5)+ylab("")+xlab("Q10")+ scale_y_continuous(breaks=nsbrks,labels = nslbls)+ scale_x_continuous(breaks=c(2,2.5,3),limits=c(2,3),expand = c(0.01,0))+ theme(legend.title=element_blank(), legend.position=c(0.7,0.5), legend.background = element_rect(fill=NULL,linetype="solid", colour ="black"), legend.key.size = unit(1, "cm")) # Combine the global map and latitudial plot # Get the plot heights p1.common.y <- ggplot_gtable(ggplot_build(map_rf)) p2.common.y <- ggplot_gtable(ggplot_build(rf_lat_plot)) # Copy the plot height from p1 to p2 p2.common.y$heights <- p1.common.y$heights ## Uncertainty Maps # https://stats.stackexchange.com/questions/56895/do-the-predictions-of-a-random-forest-model-have-a-prediction-interval pc_tab_global <- as.data.frame(pca_preds,xy=TRUE) pc_tab_global <- data.frame(na.omit(pc_tab_global)) rf_pred_ci <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE) rf_pred_ci2 <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE,interval="confidence") rf_pred_ci3 <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE,interval="prediction") tab_ci <- data.frame(rf_pred_ci2$individual) tab_ci$mean <- rowMeans(tab_ci[,c(1:500)]) tab_ci$sd <- apply(tab_ci[,1:500],1,sd) tab_ci$ratio <- tab_ci$sd/tab_ci$mean tab_ci$relunc <- tab_ci$ratio*100 tab_ci2 <- data.frame(rf_pred_ci3$individual) pred.rf.int <- apply(rf_pred_ci$individual, 1, function(x) { c(mean(x) + c(-1.96, 1.96) * 0.608928, quantile(x, c(0.05, 0.95))) }) pred.rf.int <- t(pred.rf.int) mean.rf <- rf_pred_ci$aggregate sd.rf <- mean(sqrt(rfmod$finalModel$mse)) pred.rf.int3 <- cbind.data.frame(lower=mean.rf - 1.96*sd.rf, upper=mean.rf + 1.96*sd.rf,pred=mean.rf) pred.rf.int3$Mean <- (pred.rf.int3$lower+pred.rf.int3$upper)/2 # Ratio between prediction interval and mean prediction (Domke et al. 2017 - Towards inventory- based estimates of soil organic carbon in forests of the United States) pred.rf.int3$ratio <- (pred.rf.int3$pred/pred.rf.int3$upper)*(pred.rf.int3$lower/pred.rf.int3$pred) #uncer_tab <- cbind(pc_tab_global,pred.rf.int3) uncer_tab <- cbind(pc_tab_global,tab_ci$relunc) #uncer_rast <- rasterFromXYZ(uncer_tab[,c(1,2,15)],crs = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") uncer_rast <- rasterFromXYZ(uncer_tab[,c(1,2,11)],crs = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") clim_rast <- raster("E:/Masterarbeit/Statistik/Raster/1km/Clim_raster.tif") clim_mat <- matrix(c(0,1,2,3,4,NA,1,1,1,1),ncol=2) clim_rast<- projectRaster(clim_rast,uncer_rast,method="ngb") clim_rast <- reclassify(clim_rast,clim_mat) p_class <- P p_class <- projectRaster(p_class,uncer_rast,method="ngb") p_class[!is.na(p_class)] <- 1 uncer_rast <- calc(stack(uncer_rast,clim_rast,p_class),fun=function(x){x[1]*x[2]*x[3]}) uncer_tab <- as.data.frame(uncer_rast,xy=TRUE) #uncer_tab$layer <- uncer_tab$layer*100 uncer_map <- ggplot()+geom_map(data=WorldData, map=WorldData, aes(x=long, y=lat, group=group, map_id=region), fill="grey", colour="grey", size=0.5)+ geom_tile(data=uncer_tab,aes(x=x,y=y,fill=layer))+ annotate("text",label = 'c)', x = Inf, y = Inf, hjust = 1.25, vjust = 1.25,size=5)+ labs(fill="Relative Uncertainty [%]")+xlab("")+ylab("")+ scale_fill_gradientn(colours = rev(rainbow(7)),na.value = NA, guide = guide_colourbar(direction = "horizontal", title.position = "top", title.hjust=0.5))+theme_bw()+ scale_x_continuous(breaks = ewbrks, labels = ewlbls, expand = c(0, 0)) + scale_y_continuous(breaks = nsbrks, labels = nslbls)+ theme(legend.position="bottom", legend.key.width=unit(2.25,"cm"), legend.margin=ggplot2::margin(-10,-10,0,0)) ################################################################################ # Preparation for latitudial plot # Load Global Land-Use map lu <- raster("E:/Masterarbeit/Statistik/Raster/1km/Land_Use_small.tif") lu <- projectRaster(lu,mean_rf_oP,method = "ngb") #lu <- setExtent(lu,bb,keepres = TRUE) # Separate the data in Agriculture, Forest and Grassland mat_agri <- matrix(c(1,2,3,4,1,NA,NA,NA),ncol=2) lu_agri <- reclassify(lu,mat_agri) # e <- extent(-180, 180, -55.63135, 83.86865) lu_agriuncer_rast <- projectRaster(lu_agri, uncer_rast) agri_tab_unc <- calc(stack(lu_agriuncer_rast,uncer_rast),fun=function(x){x[1]*x[2]}) #agri_tab <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/pred_agri_1km.tif") agri_tab_unc <- as.data.frame(as(agri_tab_unc,"SpatialPixelsDataFrame")) agri_tab_unc <- aggregate(layer~y,data=agri_tab_unc,mean) agri_tab_unc$name <- "Agriculture" mat_for <- matrix(c(1,2,3,4,NA,1,NA,NA),ncol=2) lu_for <- reclassify(lu,mat_for) # e <- extent(-180, 180, -55.63135, 83.86865) lu_foruncer_rast <- projectRaster(lu_for, uncer_rast) for_tab_unc <- calc(stack(lu_foruncer_rast,uncer_rast),fun=function(x){x[1]*x[2]}) for_tab_unc <- as.data.frame(as(for_tab_unc,"SpatialPixelsDataFrame")) for_tab_unc <- aggregate(layer~y,data=for_tab_unc,mean) for_tab_unc$name <- "Forest" mat_gras <- matrix(c(1,2,3,4,NA,NA,1,NA),ncol=2) lu_gras <- reclassify(lu,mat_gras) # e <- extent(-180, 180, -55.63135, 83.86865) lu_grasuncer_rast <- projectRaster(lu_gras, uncer_rast) gras_tab_unc <- calc(stack(lu_grasuncer_rast,uncer_rast),fun=function(x){x[1]*x[2]}) gras_tab_unc <- as.data.frame(as(gras_tab_unc,"SpatialPixelsDataFrame")) gras_tab_unc <- aggregate(layer~y,data=gras_tab_unc,mean) gras_tab_unc$name <- "Grassland" # Combine the land-use data frames df_lu_unc <- rbind(agri_tab_unc,for_tab_unc,gras_tab_unc) # Plot the data as path rf_lat_unc_plot <- ggplot(df_lu_unc)+ geom_path(aes(x=layer,y=y,color=name),lineend = "square")+ theme_bw(base_size=14)+ geom_map(data=WorldData, map=WorldData, aes(x=long, y=lat, group=group, map_id=region), fill=NA, colour=NA, size=0)+ annotate("text",label = 'd)', x = Inf, y = Inf, hjust = 1.25, vjust = 1.25,size=5)+ylab("")+xlab("Q10")+ scale_y_continuous(breaks=nsbrks,labels = nslbls)+ scale_x_continuous(limits=c(22,40),expand = c(0.01,0))+ theme(legend.title=element_blank(), legend.position=c(0.3,0.5), legend.background = element_rect(fill=NULL,linetype="solid", colour ="black"), legend.key.size = unit(1, "cm")) # Combine the global map and latitudial plot # Get the plot heights p3.common.y <- ggplot_gtable(ggplot_build(uncer_map)) p4.common.y <- ggplot_gtable(ggplot_build(rf_lat_unc_plot)) # Copy the plot height from p1 to p2 p4.common.y$heights <- p3.common.y$heights # Arrange the plots library(gridExtra) grid.arrange(p1.common.y,p2.common.y,p3.common.y,p4.common.y,ncol=2,widths=c(5,2)) map_plot <- arrangeGrob(p1.common.y,p2.common.y,ncol=2,widths=c(5,2)) map_plot_uncer <- arrangeGrob(p1.common.y,p2.common.y,p3.common.y,p4.common.y,ncol=2,widths=c(5,2)) # Save the plot (Figure 2) ggsave("uncer_map2.pdf",map_plot_uncer, width=40,height=30,unit="cm",dpi=600) ggsave("uncer_map2.tiff",map_plot_uncer, width=40,height=30,unit="cm",dpi=600) ################################################################################ # Test for Europe with and without Lab or Field Studies df_europe <- cbind.data.frame(df[c(8,9,16,17,18:32)],df_treat_all[,c(11:14)]) df_europe$Sequential.warming <- ifelse(df_europe$Sequential.warming=="warmed consecutively ",1,0) df_europe$Treated.Added <- ifelse(df_europe$Treated.Added=="None",1,0) df_europe <- df_europe[-which(df_europe$Autotrophic.Heterotrophic=="Autotrophic"),] df_europe$Autotrophic.Heterotrophic <- ifelse(df_europe$Autotrophic.Heterotrophic=="Both",1,0) # Load map of Europe euro <- shapefile("E:/Masterarbeit/GIS/Europe/Europe_2.shp") # Make data frame to point shapefile coordinates(df_europe) <- ~WGS.84.E+WGS.84.N proj4string(df_europe) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0") # Get Points in Europe polygon df_euro_only <- as.data.frame(df_europe[euro,]) # Get Field and Lab data frame df_euro_Field <- df_euro_only[which(df_euro_only$Type=="Field"),] # n = 549 df_euro_Lab <- df_euro_only[which(df_euro_only$Type=="Lab"),] # n = 237 mean(df_euro_only$Q10) sd(df_euro_only$Q10) mean(df_euro_Field$Q10) sd(df_euro_Field$Q10) mean(df_euro_Lab$Q10) sd(df_euro_Lab$Q10) # Define data frames for PCA pred.wI_euro <- df_euro_only[,c(3,5:19,21:23)] pred.wI_euro$'CN:P' <- pred.wI_euro$CN/pred.wI_euro$P pred.wI_euro$'Clay:MAT' <- pred.wI_euro$MAT/pred.wI_euro$Clay pred.wI_euro$'PET:NDVI' <- pred.wI_euro$PET/pred.wI_euro$NDVI pred.wI_euro$'MAP:PET' <- pred.wI_euro$MAP/pred.wI_euro$PET pred.wI_euro$PET <- NULL pred.wI_euro$'Bsat:CEC' <- pred.wI_euro$Bsat/pred.wI_euro$CEC pred.wI_euro$'Bsat:pH' <- pred.wI_euro$Bsat/pred.wI_euro$pH pred.wI_euro$'CEC:Clay' <- pred.wI_euro$CEC/pred.wI_euro$Clay pred.wI_euro$'MAT:MAP' <- pred.wI_euro$MAT/pred.wI_euro$MAP sdx_euro <- as.matrix(pred.wI_euro) nc_euro <- ncol(sdx_euro) nr_euro <- nrow(sdx_euro) rmin_euro <- 1 rmax_euro <- nc_euro sdx_euro <- scale(sdx_euro) cx_euro <- cov(sdx_euro) # Covariance Matrix ea_euro <- svd(cx_euro, nv=0) nopc_euro <- ncol(ea_euro$u) sdevalues_euro <- matrix(rep(sqrt(ea_euro$d), nopc_euro), nrow = nopc_euro, ncol=nopc_euro,byrow = TRUE) loadings_euro <- ea_euro$u*sdevalues_euro scores_euro <- sdx_euro %*% ea_euro$u scmx_euro <- matrix(rep(colMeans(scores_euro), nr_euro), nrow = nr_euro, ncol = nc_euro, byrow = TRUE) scsx_euro <- matrix(rep(sd(scores_euro[,]), nr_euro), nrow = nr_euro, ncol = nc_euro, byrow = TRUE) scsdx_euro <- (scores_euro-scmx_euro)/scsx_euro scsdx_euro <- scsdx_euro[1:nr_euro,1:nc_euro] varex_euro <- ea_euro$d / sum(ea_euro$d) * 100 pc_or_euro <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_euro)) colnames(pc_or_euro) <- c("PC","EV","Variance","cumvar") for(i in 1:rmax_euro){ pc_or_euro[i,1] <- i pc_or_euro[i,2] <- round(ea_euro$d[i],3) pc_or_euro[i,3] <- round(varex_euro[i],3) pc_or_euro[i,4] <- round(sum(varex_euro[seq(1,i)]),3) } pc_or_euro$EV <- as.numeric(pc_or_euro$EV) # Rotation of PCA with VARIMAX vm_euro <- varimax(loadings_euro[,1:8],normalize=TRUE) vm_euro$rotmat <- vm_euro$rotmat[,order(colSums(vm_euro$loadings^2), decreasing = TRUE)] rloadings_euro <- loadings_euro[,1:8] %*% vm_euro$rotmat library(corpcor) rcoef_euro <- pseudoinverse(as.matrix(cx_euro)) %*% rloadings_euro rscores_euro <- as.matrix(sdx_euro) %*% rcoef_euro rscmx_euro <- matrix(rep(colMeans(rscores_euro), nrow(rscores_euro)), nrow = nrow(rscores_euro), ncol = ncol(rscores_euro), byrow = TRUE) rscsx_euro <- matrix(rep(sd(rscores_euro[,]), nrow(rscores_euro)), nrow = nrow(rscores_euro), ncol = ncol(rscores_euro), byrow = TRUE) rscsdx_euro <- (rscores_euro-rscmx_euro)/rscsx_euro rscores_euro <- rscsdx_euro[1:nrow(rscores_euro),1:8] rvalues_euro <- colSums(rloadings_euro^2) vmt_euro <- varimax(loadings_euro, normalize=TRUE) vmtloadings_euro <- loadings_euro %*% vmt_euro$rotmat allrvalues_euro <- colSums(vmtloadings_euro^2) rvarex_euro <- rvalues_euro / sum(allrvalues_euro) * 100 pc_wr_euro <- as.data.frame(matrix(NA, ncol=4, nrow=8)) colnames(pc_wr_euro) <- c("PC", "Eigenvalue", "Variance", "Cumulative") for(i in 1:8){ pc_wr_euro[i,1] <- i pc_wr_euro[i,2] <- round(rvalues_euro[i],3) pc_wr_euro[i,3] <- round(rvarex_euro[i], 3) pc_wr_euro[i,4] <- round(sum(rvarex_euro[seq(1,i)]),3) } library(lessR) colnames(rscores_euro) <- to("PC", ncol(rscores_euro),same.size=FALSE) colnames(rloadings_euro) <- to("PC", ncol(rloadings_euro),same.size=FALSE) row.names(rloadings_euro) <- colnames(pred.wI_euro) pca_wr.new_euro <- data.frame(t(pc_wr_euro[,-1])) colnames(pca_wr.new_euro) <- to("PC", ncol(pca_wr.new_euro),same.size=FALSE) pca_wr.new_euro <- do.call(cbind,lapply(pca_wr.new_euro,function(x)as.numeric(as.character(x)))) pca.allbind_euro <- rbind.data.frame(pca_wr.new_euro, round(rloadings_euro,3)) row.names(pca.allbind_euro)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)") Q10_euro <- df_euro_only$Q10 all_data_mod_euro <- cbind.data.frame(Q10_euro,rscores_euro) colnames(all_data_mod_euro)[1] <- "Q10" n_euro <- to("PC", ncol(rscores_euro),same.size=FALSE) # Define formula form2_euro <- as.formula(paste("Q10~",paste(n_euro, collapse='+'))) library(caret);library(randomForest) # Define Resampling method dp_euro <- createDataPartition(all_data_mod_euro$Q10, p=.80, times=20) # Define cross-validation method tr_euro <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_euro, preProcOptions=list(center=T,scale=T), selectionFunction = "oneSE") # Random Forest (RF) rfmod_euro <- train(form2_euro, data=all_data_mod_euro, method="rf",metric="RMSE", trControl=tr_euro,ntrees=1000, importance=TRUE) arg_rf_euro <- rfmod_euro$results[is.element(rownames(rfmod_euro$results), rownames(rfmod_euro$bestTune)),] # Predicted vs observed (Figure 1a) pred_rf_euro <- predict(rfmod_euro, all_data_mod_euro) # Predict Q10 # Combine predicted and observed Q10 data pred_rf_euro <- data.frame(pred = pred_rf_euro, Q10 = all_data_mod_euro$Q10) # Plot the results rfplot_PCA_euro <- ggplot(data = pred_rf_euro, aes(y=pred, x= Q10))+ geom_point()+ geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line annotate("text",label = 'a)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ xlim(1,4.5)+ylim(1,4.5)+theme_bw()+ labs(x="Q10 observed",y="Q10 predicted")+ annotate("text",label=paste("RMSE = ",round(arg_rf_euro$RMSE,2),"\nrRMSE = ", round(arg_rf_euro$RMSE/mean(Q10)*100,0),"%","\nR² = ", round(arg_rf_euro$Rsquared,2)), x=1,y=4.3,hjust=0,size=5)+ theme(axis.text=element_text(size=12), axis.title=element_text(size=14)) ## Field # Define data frames for PCA pred.wI_field <- df_euro_Field[,c(3,5:19,21:23)] pred.wI_field$'CN:P' <- pred.wI_field$CN/pred.wI_field$P pred.wI_field$'Clay:MAT' <- pred.wI_field$MAT/pred.wI_field$Clay pred.wI_field$'PET:NDVI' <- pred.wI_field$PET/pred.wI_field$NDVI pred.wI_field$'MAP:PET' <- pred.wI_field$MAP/pred.wI_field$PET pred.wI_field$PET <- NULL pred.wI_field$'Bsat:CEC' <- pred.wI_field$Bsat/pred.wI_field$CEC pred.wI_field$'Bsat:pH' <- pred.wI_field$Bsat/pred.wI_field$pH pred.wI_field$'CEC:Clay' <- pred.wI_field$CEC/pred.wI_field$Clay pred.wI_field$'MAT:MAP' <- pred.wI_field$MAT/pred.wI_field$MAP sdx_field <- as.matrix(pred.wI_field) nc_field <- ncol(sdx_field) nr_field <- nrow(sdx_field) rmin_field <- 1 rmax_field <- nc_field sdx_field <- scale(sdx_field) cx_field <- cov(sdx_field) # Covariance Matrix ea_field <- svd(cx_field, nv=0) nopc_field <- ncol(ea_field$u) sdevalues_field <- matrix(rep(sqrt(ea_field$d), nopc_field), nrow = nopc_field, ncol=nopc_field,byrow = TRUE) loadings_field <- ea_field$u*sdevalues_field scores_field <- sdx_field %*% ea_field$u scmx_field <- matrix(rep(colMeans(scores_field), nr_field), nrow = nr_field, ncol = nc_field, byrow = TRUE) scsx_field <- matrix(rep(sd(scores_field[,]), nr_field), nrow = nr_field, ncol = nc_field, byrow = TRUE) scsdx_field <- (scores_field-scmx_field)/scsx_field scsdx_field <- scsdx_field[1:nr_field,1:nc_field] varex_field <- ea_field$d / sum(ea_field$d) * 100 pc_or_field <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_field)) colnames(pc_or_field) <- c("PC","EV","Variance","cumvar") for(i in 1:rmax_field){ pc_or_field[i,1] <- i pc_or_field[i,2] <- round(ea_field$d[i],3) pc_or_field[i,3] <- round(varex_field[i],3) pc_or_field[i,4] <- round(sum(varex_field[seq(1,i)]),3) } pc_or_field$EV <- as.numeric(pc_or_field$EV) # Rotation of PCA with VARIMAX vm_field <- varimax(loadings_field[,1:8],normalize=TRUE) vm_field$rotmat <- vm_field$rotmat[,order(colSums(vm_field$loadings^2), decreasing = TRUE)] rloadings_field <- loadings_field[,1:8] %*% vm_field$rotmat library(corpcor) rcoef_field <- pseudoinverse(as.matrix(cx_field)) %*% rloadings_field rscores_field <- as.matrix(sdx_field) %*% rcoef_field rscmx_field <- matrix(rep(colMeans(rscores_field), nrow(rscores_field)), nrow = nrow(rscores_field), ncol = ncol(rscores_field), byrow = TRUE) rscsx_field <- matrix(rep(sd(rscores_field[,]), nrow(rscores_field)), nrow = nrow(rscores_field), ncol = ncol(rscores_field), byrow = TRUE) rscsdx_field <- (rscores_field-rscmx_field)/rscsx_field rscores_field <- rscsdx_field[1:nrow(rscores_field),1:8] rvalues_field <- colSums(rloadings_field^2) vmt_field <- varimax(loadings_field, normalize=TRUE) vmtloadings_field <- loadings_field %*% vmt_field$rotmat allrvalues_field <- colSums(vmtloadings_field^2) rvarex_field <- rvalues_field / sum(allrvalues_field) * 100 pc_wr_field <- as.data.frame(matrix(NA, ncol=4, nrow=8)) colnames(pc_wr_field) <- c("PC", "Eigenvalue", "Variance", "Cumulative") for(i in 1:8){ pc_wr_field[i,1] <- i pc_wr_field[i,2] <- round(rvalues_field[i],3) pc_wr_field[i,3] <- round(rvarex_field[i], 3) pc_wr_field[i,4] <- round(sum(rvarex_field[seq(1,i)]),3) } library(lessR) colnames(rscores_field) <- to("PC", ncol(rscores_field),same.size=FALSE) colnames(rloadings_field) <- to("PC", ncol(rloadings_field),same.size=FALSE) row.names(rloadings_field) <- colnames(pred.wI_field) pca_wr.new_field <- data.frame(t(pc_wr_field[,-1])) colnames(pca_wr.new_field) <- to("PC", ncol(pca_wr.new_field),same.size=FALSE) pca_wr.new_field <- do.call(cbind,lapply(pca_wr.new_field,function(x)as.numeric(as.character(x)))) pca.allbind_field <- rbind.data.frame(pca_wr.new_field, round(rloadings_field,3)) row.names(pca.allbind_field)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)") Q10_field <- df_euro_Field$Q10 all_data_mod_field <- cbind.data.frame(Q10_field,rscores_field) colnames(all_data_mod_field)[1] <- "Q10" n_field <- to("PC", ncol(rscores_field),same.size=FALSE) # Define formula form2_field <- as.formula(paste("Q10~",paste(n_field, collapse='+'))) library(caret);library(randomForest) # Define Resampling method dp_field <- createDataPartition(all_data_mod_field$Q10, p=.80, times=20) # Define cross-validation method tr_field <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_field, preProcOptions=list(center=T,scale=T), selectionFunction = "oneSE") # Random Forest (RF) rfmod_field <- train(form2_field, data=all_data_mod_field, method="rf",metric="RMSE", trControl=tr_field,ntrees=1000, importance=TRUE) arg_rf_field <- rfmod_field$results[is.element(rownames(rfmod_field$results), rownames(rfmod_field$bestTune)),] # Predicted vs observed (Figure 1a) pred_rf_field <- predict(rfmod_field, all_data_mod_field) # Predict Q10 # Combine predicted and observed Q10 data pred_rf_field <- data.frame(pred = pred_rf_field, Q10 = all_data_mod_field$Q10) # Plot the results rfplot_PCA_field <- ggplot(data = pred_rf_field, aes(y=pred, x= Q10))+ geom_point()+ geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line annotate("text",label = 'b)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ xlim(1,4.5)+ylim(1,4.5)+theme_bw()+ labs(x="Q10 observed",y="Q10 predicted")+ annotate("text",label=paste("RMSE = ",round(arg_rf_field$RMSE,2),"\nrRMSE = ", round(arg_rf_field$RMSE/mean(Q10)*100,0),"%","\nR² = ", round(arg_rf_field$Rsquared,2)), x=1,y=4.3,hjust=0,size=5)+ theme(axis.text=element_text(size=12), axis.title=element_text(size=14)) ## Lab # Define data frames for PCA pred.wI_lab <- df_euro_Lab[,c(3,5:19,21:23)] pred.wI_lab$'CN:P' <- pred.wI_lab$CN/pred.wI_lab$P pred.wI_lab$'Clay:MAT' <- pred.wI_lab$MAT/pred.wI_lab$Clay pred.wI_lab$'PET:NDVI' <- pred.wI_lab$PET/pred.wI_lab$NDVI pred.wI_lab$'MAP:PET' <- pred.wI_lab$MAP/pred.wI_lab$PET pred.wI_lab$PET <- NULL pred.wI_lab$'Bsat:CEC' <- pred.wI_lab$Bsat/pred.wI_lab$CEC pred.wI_lab$'Bsat:pH' <- pred.wI_lab$Bsat/pred.wI_lab$pH pred.wI_lab$'CEC:Clay' <- pred.wI_lab$CEC/pred.wI_lab$Clay pred.wI_lab$'MAT:MAP' <- pred.wI_lab$MAT/pred.wI_lab$MAP sdx_lab <- as.matrix(pred.wI_lab) nc_lab <- ncol(sdx_lab) nr_lab <- nrow(sdx_lab) rmin_lab <- 1 rmax_lab <- nc_lab sdx_lab <- scale(sdx_lab) cx_lab <- cov(sdx_lab) # Covariance Matrix ea_lab <- svd(cx_lab, nv=0) nopc_lab <- ncol(ea_lab$u) sdevalues_lab <- matrix(rep(sqrt(ea_lab$d), nopc_lab), nrow = nopc_lab, ncol=nopc_lab,byrow = TRUE) loadings_lab <- ea_lab$u*sdevalues_lab scores_lab <- sdx_lab %*% ea_lab$u scmx_lab <- matrix(rep(colMeans(scores_lab), nr_lab), nrow = nr_lab, ncol = nc_lab, byrow = TRUE) scsx_lab <- matrix(rep(sd(scores_lab[,]), nr_lab), nrow = nr_lab, ncol = nc_lab, byrow = TRUE) scsdx_lab <- (scores_lab-scmx_lab)/scsx_lab scsdx_lab <- scsdx_lab[1:nr_lab,1:nc_lab] varex_lab <- ea_lab$d / sum(ea_lab$d) * 100 pc_or_lab <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_lab)) colnames(pc_or_lab) <- c("PC","EV","Variance","cumvar") for(i in 1:rmax_lab){ pc_or_lab[i,1] <- i pc_or_lab[i,2] <- round(ea_lab$d[i],3) pc_or_lab[i,3] <- round(varex_lab[i],3) pc_or_lab[i,4] <- round(sum(varex_lab[seq(1,i)]),3) } pc_or_lab$EV <- as.numeric(pc_or_lab$EV) # Rotation of PCA with VARIMAX vm_lab <- varimax(loadings_lab[,1:8],normalize=TRUE) vm_lab$rotmat <- vm_lab$rotmat[,order(colSums(vm_lab$loadings^2), decreasing = TRUE)] rloadings_lab <- loadings_lab[,1:8] %*% vm_lab$rotmat library(corpcor) rcoef_lab <- pseudoinverse(as.matrix(cx_lab)) %*% rloadings_lab rscores_lab <- as.matrix(sdx_lab) %*% rcoef_lab rscmx_lab <- matrix(rep(colMeans(rscores_lab), nrow(rscores_lab)), nrow = nrow(rscores_lab), ncol = ncol(rscores_lab), byrow = TRUE) rscsx_lab <- matrix(rep(sd(rscores_lab[,]), nrow(rscores_lab)), nrow = nrow(rscores_lab), ncol = ncol(rscores_lab), byrow = TRUE) rscsdx_lab <- (rscores_lab-rscmx_lab)/rscsx_lab rscores_lab <- rscsdx_lab[1:nrow(rscores_lab),1:8] rvalues_lab <- colSums(rloadings_lab^2) vmt_lab <- varimax(loadings_lab, normalize=TRUE) vmtloadings_lab <- loadings_lab %*% vmt_lab$rotmat allrvalues_lab <- colSums(vmtloadings_lab^2) rvarex_lab <- rvalues_lab / sum(allrvalues_lab) * 100 pc_wr_lab <- as.data.frame(matrix(NA, ncol=4, nrow=8)) colnames(pc_wr_lab) <- c("PC", "Eigenvalue", "Variance", "Cumulative") for(i in 1:8){ pc_wr_lab[i,1] <- i pc_wr_lab[i,2] <- round(rvalues_lab[i],3) pc_wr_lab[i,3] <- round(rvarex_lab[i], 3) pc_wr_lab[i,4] <- round(sum(rvarex_lab[seq(1,i)]),3) } library(lessR) colnames(rscores_lab) <- to("PC", ncol(rscores_lab),same.size=FALSE) colnames(rloadings_lab) <- to("PC", ncol(rloadings_lab),same.size=FALSE) row.names(rloadings_lab) <- colnames(pred.wI_lab) pca_wr.new_lab <- data.frame(t(pc_wr_lab[,-1])) colnames(pca_wr.new_lab) <- to("PC", ncol(pca_wr.new_lab),same.size=FALSE) pca_wr.new_lab <- do.call(cbind,lapply(pca_wr.new_lab,function(x)as.numeric(as.character(x)))) pca.allbind_lab <- rbind.data.frame(pca_wr.new_lab, round(rloadings_lab,3)) row.names(pca.allbind_lab)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)") Q10_lab <- df_euro_Lab$Q10 all_data_mod_lab <- cbind.data.frame(Q10_lab,rscores_lab) colnames(all_data_mod_lab)[1] <- "Q10" n_lab <- to("PC", ncol(rscores_lab),same.size=FALSE) # Define formula form2_lab <- as.formula(paste("Q10~",paste(n_lab, collapse='+'))) library(caret);library(randomForest) # Define Resampling method dp_lab <- createDataPartition(all_data_mod_lab$Q10, p=.80, times=20) # Define cross-validation method tr_lab <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_lab, preProcOptions=list(center=T,scale=T), selectionFunction = "oneSE") # Random Forest (RF) rfmod_lab <- train(form2_lab, data=all_data_mod_lab, method="rf",metric="RMSE", trControl=tr_lab,ntrees=1000, importance=TRUE) arg_rf_lab <- rfmod_lab$results[is.element(rownames(rfmod_lab$results), rownames(rfmod_lab$bestTune)),] # Predicted vs observed (Figure 1a) pred_rf_lab <- predict(rfmod_lab, all_data_mod_lab) # Predict Q10 # Combine predicted and observed Q10 data pred_rf_lab <- data.frame(pred = pred_rf_lab, Q10 = all_data_mod_lab$Q10) # Plot the results rfplot_PCA_lab <- ggplot(data = pred_rf_lab, aes(y=pred, x= Q10))+ geom_point()+ geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, vjust = -0.25,size=5)+ xlim(1,4.5)+ylim(1,4.5)+theme_bw()+ labs(x="Q10 observed",y="Q10 predicted")+ annotate("text",label=paste("RMSE = ",round(arg_rf_lab$RMSE,2),"\nrRMSE = ", round(arg_rf_lab$RMSE/mean(Q10)*100,0),"%","\nR² = ", round(arg_rf_lab$Rsquared,2)), x=1,y=4.3,hjust=0,size=5)+ theme(axis.text=element_text(size=12), axis.title=element_text(size=14)) # Arrange plots library(gridExtra) euro_plot_fig <- arrangeGrob(rfplot_PCA_euro,rfplot_PCA_field,rfplot_PCA_lab ,nrow=1) # Save plots # with experiment data ggsave("euro_mod_lf.pdf",euro_plot_fig, width=42,height=17, unit="cm",dpi=600 ) ggsave("euro_mod_lf.tiff",euro_plot_fig, width=42,height=17, unit="cm",dpi=600 ) ############################################################################## # Analyses of soil taxonomy library(Rmisc); library(ggplot2); library(gridExtra) library(pgirmess); library(multcompView) # Load table with Q10 and soil taxonomy data q10_tax <- read.csv("E:/Masterarbeit/Statistik/Raster/Soil_Taxonomy/Q10_soiltaxo.csv",header=TRUE,sep=";") q10_tax2 <- q10_tax[,c(1,13,17,35,36)] legend <- read.csv("E:/Masterarbeit/Statistik/Raster/Soil_Taxonomy/Taxo_legend.csv",header=TRUE,sep=";",stringsAsFactors = FALSE,na.string="") # Reclassify the subgroup to bigger groups q10_tax2$USDA <- legend$USDA[match(q10_tax2$TAXOUSDA,legend$Number_USDA)] q10_tax2$WRB<- legend$WRB[match(q10_tax2$TAXNWRB,legend$Number_WRB)] # USDA kmc_usda <- kruskalmc(Q10~USDA,data=q10_tax2) test_usda <- kmc_usda$dif.com$difference names(test_usda) <- row.names(kmc_usda$dif.com) usda_sum <- summarySE(q10_tax2, measurevar="Q10", groupvars="USDA") # Define Letters let_usda <- multcompLetters(test_usda, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_usda <- data.frame(let_usda$Letters) # Define text for figure letters_usda$Zones <- row.names(letters_usda) usda_all <- merge(usda_sum,letters_usda,by.x = "USDA", by.y = "Zones") # WRB kmc_wrb <- kruskalmc(Q10~WRB,data=q10_tax2) test_wrb <- kmc_wrb$dif.com$difference names(test_wrb) <- row.names(kmc_wrb$dif.com) wrb_sum <- summarySE(q10_tax2, measurevar="Q10", groupvars="WRB") # Define Letters let_wrb <- multcompLetters(test_wrb, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_wrb <- data.frame(let_wrb$Letters) # Define text for figure letters_wrb$Zones <- row.names(letters_wrb) wrb_all <- merge(wrb_sum,letters_wrb,by.x = "WRB", by.y = "Zones") # Landnutzungen q10_tax2$Vegetation <- as.character(q10_tax2$Vegetation) # 1 Forest q10_taxfor <- q10_tax2[which(q10_tax2$Vegetation=="forest"),] # USDA kmc_usda_for <- kruskalmc(Q10~USDA,data=q10_taxfor) test_usda_for <- kmc_usda_for$dif.com$difference names(test_usda_for) <- row.names(kmc_usda_for$dif.com) usda_sum_for <- summarySE(q10_taxfor, measurevar="Q10", groupvars="USDA") # Define Letters let_usda_for <- multcompLetters(test_usda_for, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_usda_for <- data.frame(let_usda_for$Letters) # Define text for figure letters_usda_for$Zones <- row.names(letters_usda_for) usda_for <- merge(usda_sum_for,letters_usda_for,by.x = "USDA", by.y = "Zones") # WRB kmc_wrb_for <- kruskalmc(Q10~WRB,data=q10_taxfor) test_wrb_for <- kmc_wrb_for$dif.com$difference names(test_wrb_for) <- row.names(kmc_wrb_for$dif.com) wrb_sum_for <- summarySE(q10_taxfor, measurevar="Q10", groupvars="WRB") # Define Letters let_wrb_for <- multcompLetters(test_wrb_for, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_wrb_for <- data.frame(let_wrb_for$Letters) # Define text for figure letters_wrb_for$Zones <- row.names(letters_wrb_for) wrb_for <- merge(wrb_sum_for,letters_wrb_for,by.x = "WRB", by.y = "Zones") # 2 Grassland q10_taxgras <- q10_tax2[which(q10_tax2$Vegetation=="grassland"),] # USDA kmc_usda_gras <- kruskalmc(Q10~USDA,data=q10_taxgras) test_usda_gras <- kmc_usda_gras$dif.com$difference names(test_usda_gras) <- row.names(kmc_usda_gras$dif.com) usda_sum_gras <- summarySE(q10_taxgras, measurevar="Q10", groupvars="USDA") # Define Letters let_usda_gras <- multcompLetters(test_usda_gras, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_usda_gras <- data.frame(let_usda_gras$Letters) # Define text gras figure letters_usda_gras$Zones <- row.names(letters_usda_gras) usda_gras <- merge(usda_sum_gras,letters_usda_gras,by.x = "USDA", by.y = "Zones") # WRB kmc_wrb_gras <- kruskalmc(Q10~WRB,data=q10_taxgras) test_wrb_gras <- kmc_wrb_gras$dif.com$difference names(test_wrb_gras) <- row.names(kmc_wrb_gras$dif.com) wrb_sum_gras <- summarySE(q10_taxgras, measurevar="Q10", groupvars="WRB") # Define Letters let_wrb_gras <- multcompLetters(test_wrb_gras, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_wrb_gras <- data.frame(let_wrb_gras$Letters) # Define text gras figure letters_wrb_gras$Zones <- row.names(letters_wrb_gras) wrb_gras <- merge(wrb_sum_gras,letters_wrb_gras,by.x = "WRB", by.y = "Zones") # 3 Agriculture q10_taxagri <- q10_tax2[which(q10_tax2$Vegetation=="agriculture"),] # USDA kmc_usda_agri <- kruskalmc(Q10~USDA,data=q10_taxagri) test_usda_agri <- kmc_usda_agri$dif.com$difference names(test_usda_agri) <- row.names(kmc_usda_agri$dif.com) usda_sum_agri <- summarySE(q10_taxagri, measurevar="Q10", groupvars="USDA") # Define Letters let_usda_agri <- multcompLetters(test_usda_agri, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_usda_agri <- data.frame(let_usda_agri$Letters) # Define text agri figure letters_usda_agri$Zones <- row.names(letters_usda_agri) usda_agri <- merge(usda_sum_agri,letters_usda_agri,by.x = "USDA", by.y = "Zones") # WRB kmc_wrb_agri <- kruskalmc(Q10~WRB,data=q10_taxagri) test_wrb_agri <- kmc_wrb_agri$dif.com$difference names(test_wrb_agri) <- row.names(kmc_wrb_agri$dif.com) wrb_sum_agri <- summarySE(q10_taxagri, measurevar="Q10", groupvars="WRB") # Define Letters let_wrb_agri <- multcompLetters(test_wrb_agri, compare="<", threshold=0.05, Letters=c(letters, LETTERS, "."), reversed = FALSE) letters_wrb_agri <- data.frame(let_wrb_agri$Letters) # Define text agri figure letters_wrb_agri$Zones <- row.names(letters_wrb_agri) wrb_agri <- merge(wrb_sum_agri,letters_wrb_agri,by.x = "WRB", by.y = "Zones")