################################################################################
# R-Script
# Global patterns of geo-ecological controls on the response of soil respiration to warming
#
# Author information will be added once review process is completed
# 
#
# Details and constraints of the parameters of the model, statistics and caveats
# can be found in the online-only methods section of this manuscript and its sup-
# plement. All files needed to re-construct our workflow are available in the 
# supplement. Abbreviations of variables are explained in the extended data 
# "SRRW_database.xls" file
################################################################################
# Load Q10 data (Best and Generalized tables; Generalized always with _gl)

df <- read.csv("Q10_local_test.csv",header=TRUE,sep=";")
colnames(df)[c(16,18:32)] <- c("Tmean","pH","C","N","CN","CEC","Bsat",
                               "MAT","MAP","Sand","Clay","Silt","P","NDVI",
                               "PET","Humidity")
df_gl <- read.csv("Q10_global_test.csv",header=TRUE,sep=";")
colnames(df_gl)[c(16,18:32)] <- c("Tmean","pH","C","N","CN","CEC","Bsat","MAT",
                                  "MAP","Sand","Clay","Silt","P","NDVI","PET",
                                  "Humidity")

# Replace pH < 3 with 3

df$pH <- ifelse(df$pH > 3, df$pH, 3)
df_gl$pH <- ifelse(df_gl$pH > 3, df_gl$pH, 3)

# The following script shows the procedure with the best data approach. It can 
# be reproduced interchangeable for the generalized data approach by replacing
# input file with the according "_gl" equivalents. Note that in the following
# script all regions that are classified as subpolar and covered by our data 
# are grouped together with boreal data and named "boreal". 
#
# For reproduction
set.seed(138)

# Data map (Figure 1S)
# Needed Packages and Functions:
library(maps);library(maptools);library(ggsn);library(ggplot2)

# Functions for changing the x and y-axis labels of the map:
scale_x_longitude <- function(xmin=-180, xmax=180, step=1, ...) {
  xbreaks <- seq(xmin,xmax,step)
  xlabels <- unlist(lapply(xbreaks, function(x) 
    ifelse(x < 0, parse(text=paste0(x,"^o", " *W")), 
           ifelse(x > 0, parse(text=paste0(x,"^o", " *E")),x))))
  return(scale_x_continuous("Longitude", 
                            breaks = xbreaks, 
                            labels = xlabels, expand = c(0, 0), ...))
}
scale_y_latitude <- function(ymin=-90, ymax=90, step=0.5, ...) {
  ybreaks <- seq(ymin,ymax,step)
  ylabels <- unlist(lapply(ybreaks, function(x) 
    ifelse(x < 0, parse(text=paste0(x,"^o", " *S")), 
           ifelse(x > 0, parse(text=paste0(x,"^o", " *N")),x))))
  return(scale_y_continuous("Latitude", breaks = ybreaks, 
                            labels = ylabels, expand = c(0, 0), ...))
}
# Get global map:
mapWorld <- borders("world")

mp <- ggplot() + mapWorld 

# Plot data points on global map and change axis labels:

map <- mp + geom_point(df,mapping=aes(x=WGS.84.E, y=WGS.84.N,
                                      shape=Type,color=Type))+
  coord_cartesian(xlim=c(-180, 190), ylim=c(-75,90))+
  geom_jitter(width = 1, height = 1)+theme_bw(base_size = 14)+
  scale_x_longitude(xmin=-180, xmax=180, step=40) +
  scale_y_latitude(ymin=-75, ymax=80, step=50) +
  labs(shape="Study Type",color="Study Type",
       caption="Coordinate system: WGS84")+
  labs(x="", y="")+guides(shape=guide_legend(override.aes=list(size=4)))+
  theme(legend.background = element_rect(fill=NULL,size=0.5,linetype="solid",
                                         colour ="black"),
        legend.justification=c(0,0), 
        legend.position=c(0.05,0.05))

# Save the map in pdf and tif format
ggsave("2019_05_07254A_SRRW/map_all.tiff",map,
       dpi=600, width = 25, height=20, units = "cm")
ggsave("2019_05_07254A_SRRW/map_all.pdf",map,
       dpi=600, width = 25, height=20, units = "cm")

# ANOVA/Kruskal-Wallis-Test for Climatezones and Land-use 
# Needed packages:

library(Rmisc); library(ggplot2); library(gridExtra)
library(pgirmess); library(multcompView);library(rstatix)

# Climatezones
# Descriptive statitstics 

sumdata_clim <- summarySE(df, measurevar="Q10", groupvars="Climatezone")

# Test for Normality and Homogeinity of Variance

shapiro.test(df$Q10[df$Climatezone=="Boreal"])
shapiro.test(df$Q10[df$Climatezone=="Temperate"])
shapiro.test(df$Q10[df$Climatezone=="Subtropical"])
shapiro.test(df$Q10[df$Climatezone=="Tropical"])
fligtest_clim <- fligner.test(Q10 ~ Climatezone, data=df)
# All test with p < 0.05 ==> Kruskal-Willis-Test:

kruskal.test(Q10~Climatezone,data=df)

# Effect size of Kruskal-Wallis Test
kw_effsize_veg <- kruskal_effsize(Q10~Climatezone,data=df)

# Post-Hoc Test for Kruskal-Wallis-test

kmc_clim <- kruskalmc(Q10~Climatezone,data=df)
test_clim <- kmc_clim$dif.com$difference 
names(test_clim) <- row.names(kmc_clim$dif.com)

# Define Letters for figure

let_clim <- multcompLetters(test_clim, compare="<", threshold=0.05,
                            Letters=c(letters, LETTERS, "."),
                            reversed = FALSE)
letters_clim <- data.frame(let_clim$Letters)
letters_clim$Zones <- row.names(letters_clim)

# Define text for figure

clim_order <- c("Boreal","Temperate","Subtropical","Tropical")
sumdata_clim$ext <- c("15 %","5 %","10 %","4 %") # External calculation
# of excluded data points

# Plot the data (Figure 2S a)

clim_plot_fig <- ggplot(data=sumdata_clim, aes(y=Q10,x=Climatezone))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size = 14)+labs(x="Climate Zones", y = "Mean Q10")+
  scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+
  geom_text(aes(label=paste("n =",N),x=Climatezone,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_clim.Letters),data=letters_clim)+
  geom_text(aes(x=Climatezone,y=1.2,label=ext),data=sumdata_clim)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  scale_x_discrete(expand=c(0,0),limits=clim_order)+
  annotate("text",x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5,label="a)")

# Land-Use (same procedure as for climate zones)

sumdata_veg <- summarySE(df, measurevar="Q10", groupvars="Vegetation")
shapiro.test(df$Q10[df$Vegetation=="agriculture"])
shapiro.test(df$Q10[df$Vegetation=="forest"])
shapiro.test(df$Q10[df$Vegetation=="grassland"])
shapiro.test(df$Q10[df$Vegetation=="wetland"])
fligtest_veg <- fligner.test(Q10 ~ Vegetation, data=df)
# All test with p < 0.05 ==> Kruskal-Willis-Test:

kruskal.test(Q10~Vegetation,data=df)

# Test for effect size of Kruskal-Wallis Test

kw_effsize_veg <- kruskal_effsize(Q10~Vegetation,data=df)

# Post-Hoc Test for Kruskal-Wallis-test

kmc_veg <- kruskalmc(Q10~Vegetation,data=df)
test_veg <- kmc_veg$dif.com$difference 
names(test_veg) <- row.names(kmc_veg$dif.com)

# Define Letters

let_veg <- multcompLetters(test_veg, compare="<", threshold=0.05,
                           Letters=c(letters, LETTERS, "."),
                           reversed = FALSE)
letters_veg <- data.frame(let_veg$Letters)

# Define text for figure

letters_veg$Zones <- row.names(letters_veg)
sumdata_veg$ext <- c("7 %","9 %","7 %","17 %") # External calculation
# of excluded data points.

# Plot the data (Figure 2S b) 

veg_plot_fig <- ggplot(data=sumdata_veg, aes(y=Q10,x=Vegetation))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size = 14)+labs(x="Land-Use", y = "Mean Q10")+
  scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+
  geom_text(aes(label=paste("n =",N),x=Vegetation,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_veg.Letters),data=letters_veg)+
  geom_text(aes(x=Vegetation,y=1.2,label=ext),data=sumdata_veg)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  annotate("text", x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5,label="b)")

# Combine the climate zone and land-use Plots

climveg_fig <- arrangeGrob(clim_plot_fig, veg_plot_fig)

# Save plots for onlz onlz climate yones and vegetation

ggsave("climveg_fig_new.pdf",
       climveg_fig,dpi=600)
ggsave("climveg_fig_new.tiff",
       climveg_fig,dpi=600)

################################################################################
# Analyse experiment techniques
library(Rmisc); library(ggplot2); library(gridExtra)
library(pgirmess); library(multcompView)
# Load extra data set
df_treat <- read.csv("Q10_treatment_db.csv",header=TRUE,sep=";")
df_treat <- df_treat[,-(c(15:30))]

# Remove autotrophic studies (n = 13)
df_treat <- df_treat[-which(df_treat$Autotrophic.Heterotrophic=="Autotrophic"),]

# Sequential Warming: Consecutively vs Paralell
df_warm_sum <- summarySE(df_treat, measurevar="Q10", groupvars="Sequential.warming")

df_treat$Sequential.warming <- as.character(df_treat$Sequential.warming)
shapiro.test(df_treat$Q10[df_treat$Sequential.warming=="parallel independent samples "])
# p.value < 2.2e-16 
shapiro.test(df_treat$Q10[df_treat$Sequential.warming=="warmed consecutively "])
# p.value < 2.2e-16 
fligtest_warm <- fligner.test(Q10 ~ Sequential.warming, data=df_treat)
# p-value = 0.5175

kruskal.test(Q10~Sequential.warming,data=df_treat)

# Test for effect size of Kruskal-Wallis Test

kruskal_effsize(Q10~Sequential.warming,data=df_treat)

# Post-Hoc Test for Kruskal-Wallis-test

kmc_warm <- kruskalmc(Q10~Sequential.warming,data=df_treat)
test_warm <- kmc_warm$dif.com$difference 
names(test_warm) <- row.names(kmc_warm$dif.com)

# Define Letters

let_warm <- multcompLetters(test_warm, compare="<", threshold=0.05,
                           Letters=c(letters, LETTERS, "."),
                           reversed = FALSE)
letters_warm <- data.frame(let_warm$Letters)

# Define text for figure

letters_warm$Zones <- row.names(letters_warm)

# Plot the data (Figure 2S d) 

warm_plot_fig <- ggplot(data=df_warm_sum, aes(y=Q10,x=Sequential.warming))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size=14)+labs(x="Sequential/parallel warming", y = "Q10")+
  scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+
  geom_text(aes(label=paste("n =",N),x=Sequential.warming,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_warm.Letters),data=letters_warm)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  scale_x_discrete(label=c("Parallel","Sequential"))+
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  annotate("text",label = 'd)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)

# Treatment vs. None Treatment
df_treat$Treated.Added <- as.character(df_treat$Treated.Added)
df_treat$treat <- ifelse(df_treat$Treated.Added=="None","None","Treated")
df_treat_sum <- summarySE(df_treat, measurevar="Q10", groupvars="treat")

shapiro.test(df_treat$Q10[df_treat$treat=="None"])
# p.value < 2.2e-16 
shapiro.test(df_treat$Q10[df_treat$treat=="Treated"])
# p.value < 2.2e-16 
fligtest_treat <- fligner.test(Q10 ~ treat, data=df_treat)
# p-value = 0.4466

kruskal.test(Q10~treat,data=df_treat)

# Test for effect size of Kruskal-Wallis Test

kruskal_effsize(Q10~treat,data=df_treat)

# Post-Hoc Test for Kruskal-Wallis-test

kmc_treat <- kruskalmc(Q10~treat,data=df_treat)
test_treat <- kmc_treat$dif.com$difference 
names(test_treat) <- row.names(kmc_treat$dif.com)

# Define Letters

let_treat <- multcompLetters(test_treat, compare="<", threshold=0.05,
                            Letters=c(letters, LETTERS, "."),
                            reversed = FALSE)
letters_treat <- data.frame(let_treat$Letters)

# Define text for figure

letters_treat$Zones <- row.names(letters_treat)

# Plot the data (Figure 2S e) 

treat_plot_fig <- ggplot(data=df_treat_sum, aes(y=Q10,x=treat))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size = 14)+labs(x="Pre-Treatment", y = "Mean Q10")+
  scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+
  geom_text(aes(label=paste("n =",N),x=treat,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_treat.Letters),data=letters_treat)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  scale_x_discrete(label=c("None","Treatment"))+
  annotate("text",label = 'e)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)

# Hetero vs Auto
df_treat$Autotrophic.Heterotrophic <- as.character(df_treat$Autotrophic.Heterotrophic)
df_auhe_sum <- summarySE(df_treat, measurevar="Q10", groupvars="Autotrophic.Heterotrophic")

shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Both"])
# p.value < 2.2e-16 
shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Heterotrophic"])
# p.value = 2.206e-06 
shapiro.test(df_treat$Q10[df_treat$Autotrophic.Heterotrophic=="Autotrophic"])
# p.value p-value = 0.1332
fligtest_treat <- fligner.test(Q10 ~ Autotrophic.Heterotrophic, data=df_treat)
# p-value = 1.069e-07

kruskal.test(Q10~Autotrophic.Heterotrophic,data=df_treat)

# Test for effect size of Kruskal-Wallis Test

kruskal_effsize(Q10~Autotrophic.Heterotrophic,data=df_treat)

# Post-Hoc Test for Kruskal-Wallis-test

kmc_auhe <- kruskalmc(Q10~Autotrophic.Heterotrophic,data=df_treat)
test_auhe <- kmc_auhe$dif.com$difference 
names(test_auhe) <- row.names(kmc_auhe$dif.com)

# Define Letters

let_auhe <- multcompLetters(test_auhe, compare="<", threshold=0.05,
                             Letters=c(letters, LETTERS, "."),
                             reversed = FALSE)
letters_auhe <- data.frame(let_auhe$Letters)

# Define text for figure

letters_auhe$Zones <- row.names(letters_auhe)

# Plot the data (Figure 2S f) 

auhe_plot_fig <- ggplot(data=df_auhe_sum, aes(y=Q10,x=Autotrophic.Heterotrophic))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size = 14)+labs(x="Mixed vs. Heterotrophic", y = "Mean Q10")+
  scale_fill_manual(values=c("#FFCC00","#FF9900","#339300","#3399FF"))+
  geom_text(aes(label=paste("n =",N),x=Autotrophic.Heterotrophic,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_auhe.Letters),data=letters_auhe)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  scale_x_discrete(label=c("Mixed","Heteorotophic"))+
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())+
  annotate("text",label = 'f)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)


# Lab versus Field 
df_treat$ohneauto <- ifelse(df_treat$Autotrophic.Heterotrophic=="Autotrophic",NA,df_treat$Sequential.warming)
df_treat_bh <- na.omit(df_treat)

df_lf_sum <- summarySE(df_treat_bh, measurevar="Q10", groupvars="Type")

kruskal.test(Q10~Type,data=df_treat_bh)

# Test for effect size of Kruskal-Wallis Test

kruskal_effsize(Q10~Type,data=df_treat_bh)

kmc_lf <- kruskalmc(Q10~Type,data=df_treat_bh)
test_lf <- kmc_lf$dif.com$difference 
names(test_lf) <- row.names(kmc_lf$dif.com)

# Define Letters

let_lf <- multcompLetters(test_lf, compare="<", threshold=0.05,
                           Letters=c(letters, LETTERS, "."),
                           reversed = FALSE)
letters_lf <- data.frame(let_lf$Letters)

# Define text for figure

letters_lf$Zones <- row.names(letters_lf)

# Plot the data (Figure 2Sb) 

lf_plot_fig <- ggplot(data=df_lf_sum, aes(y=Q10,x=Type))+
  geom_point(size=4)+
  geom_errorbar(aes(ymin = Q10-sd, ymax = Q10+sd), 
                position = position_dodge(0.9),width = 0.25)+
  theme_bw(base_size = 14)+labs(x="Lab vs. Field", y = "Mean Q10")+
  geom_text(aes(label=paste("n =",N),x=Type,y=3.6), 
            position = position_dodge(0.9)) +
  geom_text(aes(x=Zones,y=3.8,
                label=let_lf.Letters),data=letters_lf)+
  scale_y_continuous(expand=c(0,0))+expand_limits(y=c(1,3.9))+
  scale_x_discrete(expand = c(0,0))+
  annotate("text",label = 'c)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)

# Combine the experiment plots
library(gridExtra)
com_plot <- grid.arrange(lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig,all_plot_fig,
                         layout_matrix = rbind(c(1, 2, 3,4),
                                               c(5, 5, 5,5)))
com_plot <- grid.arrange(climveg_fig,lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig,
                         layout_matrix = rbind(c(1, 1),c(1, 1),c(2, 3),
                                               c(4,5)))
com_plot <- arrangeGrob(climveg_fig,lf_plot_fig,warm_plot_fig,treat_plot_fig,auhe_plot_fig,
                        layout_matrix = rbind(c(1, 1),c(1, 1),c(2, 3),
                                              c(4,5)))
ggsave("Kruskall_wallis.pdf",
       com_plot,width=20,height=30, unit="cm",dpi=600)
ggsave("Kruskall_wallis.tiff",
       com_plot,width=20,height=30, unit="cm",dpi=600)


#############################################################################
# Q10 prediction modelling (shown for local studies (df); same approach with
# with generalized table (df_gl), replace df with df_gl)
#
# Individual Variables (no changes)

# Define Formula
form_all2 <- as.formula(Q10~pH+C+N+CN+CEC+Bsat+MAT+MAP+Sand+Silt+Clay+NDVI+P+
                          Tmean+PET+Humidity) 
library(caret)

# Create Resample (80 % of input data and 20 Resamples)
dp_all <- createDataPartition(df$Q10, p=.80, times=20) 

# Define Monte-Carlo cross-validation and model selection 
# LGOCV = Leave-one-group-out-cross-validation

tr_all <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_all,
                       preProcOptions=list(center=T,scale=T),
                       selectionFunction = "oneSE") 

# Define predictors

impVars <- df[,c("pH","C","N","CN","CEC","Bsat","MAT","MAP","Sand",
                 "Silt","Clay","Tmean","P","NDVI","PET","Humidity")]

# Variable Inflation Factor

library(car)
all_vifs_test <- try(vif(lm(form_all2, data=df)), silent=TRUE)
if (class(all_vifs_test) == "try-error"){
  lm_alias <- alias(lm(form_all2, data=df))
  broken <- data.frame(lm_alias$Complete)
  broken_var <- row.names(broken)
  nam_var <- names(impVars) %in% broken_var
  impVars <- impVars[!nam_var]
  form_all_new <- as.formula(paste("Q10 ~ ", 
                                   paste(names(impVars), collapse=" + "),
                                   sep=""))
  all_vifs <- vif(lm(form_all_new, data=df))
} else {
  all_vifs <- all_vifs_test
}

if(any(all_vifs>5)){
  all_vifs <- as.data.frame(all_vifs)  
  while((nrow(all_vifs) > 2)& (max(all_vifs[, 1]) > 5)  &  
        (class(all_vifs) != "try-error")) {
    remove_var <- rownames(all_vifs)[which(all_vifs[, 1] == max(all_vifs[, 1]))]  
    impVars <- impVars[!names(impVars) %in% remove_var]  
    fullForm <- paste ("Q10 ~ ", paste (names(impVars), collapse=" + "), sep="")  
    fullMod <- lm(as.formula(fullForm), data=df)  
    all_vifs <- try(as.data.frame(vif(fullMod)), silent=TRUE) 
  }
  vif_filtered_variables <- names(fullMod$model)[!names(fullMod$model) %in% 
                                                   "Q10"]
} else {
  all_vifs <- as.data.frame(all_vifs)
  vif_filtered_variables <- rownames(all_vifs)[!names(all_vifs) %in% "Q10"]
}

# Filtered Formula

form <- as.formula(paste("Q10~",paste(vif_filtered_variables, collapse='+')))



################################################################################
# Rotated Principal Components Analysis as Preprocessing
# Estimation of defined Interactions
# pred.wI <- df_gl[,c(16,18:32)] (for generalized data set)

# Best data approach with Experimental data
pred.wI <- df[,c(16,18:32)]

pred.wI$'CN:P' <- pred.wI$CN/pred.wI$P
pred.wI$'Clay:MAT' <- pred.wI$MAT/pred.wI$Clay
pred.wI$'PET:NDVI' <- pred.wI$PET/pred.wI$NDVI
pred.wI$'MAP:PET' <- pred.wI$MAP/pred.wI$PET
pred.wI$PET <- NULL
pred.wI$'Bsat:CEC' <- pred.wI$Bsat/pred.wI$CEC
pred.wI$'Bsat:pH' <- pred.wI$Bsat/pred.wI$pH
pred.wI$'CEC:Clay' <- pred.wI$CEC/pred.wI$Clay
pred.wI$'MAT:MAP' <- pred.wI$MAT/pred.wI$MAP

# Remove Tmean for analyses without experiment modifier
#pred.wI$Tmean <- NULL
# With the experiment parameters
df_treat_all <- read.csv("Q10_treatment_db.csv",header=TRUE,sep=";")
df_treat_all <- df_treat_all[,c(1:14)]

df_all_data <- cbind.data.frame(df,df_treat_all[,c(12:14)])
write.csv(df_all_data,file="SRRW_Rscript/Database_best_wExp.csv",row.names = FALSE)


df_merged <- cbind.data.frame(df[c(16,18:32)],df_treat_all[,c(11:14)])
df_merged$Sequential.warming <- ifelse(df_merged$Sequential.warming=="warmed consecutively ",1,0)
df_merged$Treated.Added <- ifelse(df_merged$Treated.Added=="None",1,0)
df_merged <- df_merged[-which(df_merged$Autotrophic.Heterotrophic=="Autotrophic"),]
df_merged$Autotrophic.Heterotrophic <- ifelse(df_merged$Autotrophic.Heterotrophic=="Both",1,0)
df_merged$Type <- ifelse(df_merged$Type=="Field",1,0)
pred.wI <- df_merged

#pred.wI$'N:P' <- pred.wI$N/pred.wI$P
#pred.wI$MAT <- NULL
# Without experimental data
pred.wI <- pred.wI[,c(2:15,20:27)]

# Calculation of unrotated PCA and selection of PCs amount

sdx <- as.matrix(pred.wI)
nc <- ncol(sdx)
nr <- nrow(sdx)
rmin <- 1
rmax <- nc
sdx <- scale(sdx)
cx <- cov(sdx) # Covariance Matrix
ea <- svd(cx, nv=0)
nopc <- ncol(ea$u)
sdevalues <- matrix(rep(sqrt(ea$d), nopc), nrow = nopc, ncol=nopc,byrow = TRUE)
loadings <- ea$u*sdevalues
scores <- sdx %*% ea$u
scmx <- matrix(rep(colMeans(scores), nr), nrow = nr, ncol = nc, byrow = TRUE) 
scsx <- matrix(rep(sd(scores[,]), nr), nrow = nr, ncol = nc, byrow = TRUE) 
scsdx <- (scores-scmx)/scsx
scsdx <- scsdx[1:nr,1:nc]
varex <- ea$d / sum(ea$d) * 100
pc_or <- as.data.frame(matrix(NA, ncol=4, nrow=rmax))
colnames(pc_or) <- c("PC","EV","Variance","cumvar")
for(i in 1:rmax){
  pc_or[i,1] <- i
  pc_or[i,2] <- round(ea$d[i],3)
  pc_or[i,3] <- round(varex[i],3)
  pc_or[i,4] <- round(sum(varex[seq(1,i)]),3)
}
pc_or$EV <- as.numeric(pc_or$EV)

# Rotation of PCA with VARIMAX

vm <- varimax(loadings[,1:8],normalize=TRUE)
vm$rotmat <- vm$rotmat[,order(colSums(vm$loadings^2), decreasing = TRUE)]
rloadings <- loadings[,1:8] %*% vm$rotmat
library(corpcor)
rcoef <- pseudoinverse(as.matrix(cx)) %*% rloadings
rscores <- as.matrix(sdx) %*% rcoef
rscmx <- matrix(rep(colMeans(rscores), nrow(rscores)), nrow = nrow(rscores), 
                ncol = ncol(rscores), byrow = TRUE) 
rscsx <- matrix(rep(sd(rscores[,]), nrow(rscores)), nrow = nrow(rscores), 
                ncol = ncol(rscores), byrow = TRUE) 
rscsdx <- (rscores-rscmx)/rscsx
rscores <- rscsdx[1:nrow(rscores),1:8]
rvalues <- colSums(rloadings^2)
vmt <- varimax(loadings, normalize=TRUE)
vmtloadings <- loadings %*% vmt$rotmat
allrvalues <- colSums(vmtloadings^2)
rvarex <- rvalues / sum(allrvalues) * 100

pc_wr <- as.data.frame(matrix(NA, ncol=4, nrow=8))
colnames(pc_wr) <- c("PC", "Eigenvalue", "Variance", "Cumulative")
for(i in 1:8){
  pc_wr[i,1] <- i
  pc_wr[i,2] <- round(rvalues[i],3)
  pc_wr[i,3] <- round(rvarex[i], 3)
  pc_wr[i,4] <- round(sum(rvarex[seq(1,i)]),3)
}
library(lessR)
colnames(rscores) <- to("PC", ncol(rscores),same.size=FALSE)
colnames(rloadings) <- to("PC", ncol(rloadings),same.size=FALSE)
row.names(rloadings) <- colnames(pred.wI)
pca_wr.new <- data.frame(t(pc_wr[,-1]))
colnames(pca_wr.new) <- to("PC", ncol(pca_wr.new),same.size=FALSE)
pca_wr.new <- do.call(cbind,lapply(pca_wr.new,function(x)as.numeric(as.character(x))))
pca.allbind <- rbind.data.frame(pca_wr.new, round(rloadings,3))
row.names(pca.allbind)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)")

# Modelling approach with rPCs 
# Packages for prediction modelling

library(lars); library(leaps); library(Cubist);library(gbm)
library(elasticnet);library(ipred); library(e1071); 
library(randomForest);library(rpart)

# Modelling processes and variable importance estimation (varImp())
# Procdure for all models similiar:
# 1. Train the model with tuning (sometimes before defined) and the before
# defined cross-validation and model selection parameters 
# 2. Get metrics of selected model 
# 3. Calculate Variable Importance
# 4. Save the metrics and variable importance 

# Combine Q10 values without autotrophic respiration and rotated scores

Q10 <- df_treat$Q10
all_data_mod <- cbind.data.frame(Q10,rscores)
n <- to("PC", ncol(rscores),same.size=FALSE)

# Define formula
form2 <- as.formula(paste("Q10~",paste(n, collapse='+')))

# Define Resampling method
dp <- createDataPartition(all_data_mod$Q10, p=.80, times=20)

# Define cross-validation method
tr <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp,
                   preProcOptions=list(center=T,scale=T),
                   selectionFunction = "oneSE",savePredictions = TRUE)

#Linear Regression (LM)

linreg <- train(form2, data=all_data_mod, method="lm",metric="RMSE", 
                trControl=tr)
res_lm <- linreg$results[,-1]
coef_lm <- as.data.frame(t(varImp(linreg$finalModel)))
endtab_lm <- merge(res_lm, coef_lm)
colnames(endtab_lm) <- paste("LM", colnames(endtab_lm), sep="_")

# LEAPS

leaps <- train(form2, data=all_data_mod ,method = "leapSeq",metric="RMSE",
               trControl=tr)
ss <- summary(leaps$finalModel)
leaps_reduced <- train(update(form2,as.formula(paste0("~ . - ",paste(
  colnames(ss$which)[!ss$which[leaps$finalModel$tuneValue[[1]],]],
  collapse="-")))),data=all_data_mod,method = "lm",trControl=tr)
arg_leaps <- leaps_reduced$results[,-1]
varleaps <- varImp(leaps_reduced$finalModel)
coef_leaps <- as.data.frame(t(varleaps))
endtab_leaps <- merge(arg_leaps, coef_leaps)
colnames(endtab_leaps) <- paste("LEAPS", colnames(endtab_leaps), sep = "_")

# Penalizing Models:
# LARS

lasso <- train(form2, data=all_data_mod, method = "lars",metric="RMSE",
               trControl=tr,
               tuneGrid = data.frame(fraction = seq(0.01,1,0.01)))
arg_lasso <- lasso$results[is.element(lasso$results$fraction,lasso$bestTune),]
coef_lasso <- as.matrix(coef(lasso$finalModel, mode = "fraction",
                             s= lasso$bestTune$fraction)*lasso$finalModel$normx)
coef_lasso <- as.data.frame(t(coef_lasso))
endtab_lasso <- merge(arg_lasso, coef_lasso)
colnames(endtab_lasso) <- paste("LARS", colnames(endtab_lasso), sep = "_")

# Elastic Net (ENET)

enetgrid <- expand.grid(.lambda= c(0, 0.01, .1),
                        .fraction=seq(.05,1,length=20))
enetmod <- train(form2, data=all_data_mod, method = "enet",metric="RMSE",
                 trControl=tr,tuneGrid=enetgrid)
arg_enet <- enetmod$results[is.element(rownames(enetmod$results),
                                       rownames(enetmod$bestTune)),]
coef_enet <- predict(enetmod$finalModel, type="coef",mode="fraction",
                     s=c(enetmod$bestTune$fraction,enetmod$bestTune$lampda))
varimp_enet <- as.data.frame(t(coef_enet$coefficients*enetmod$finalModel$normx))
endtab_enet <- merge(arg_enet, varimp_enet)
colnames(endtab_enet) <- paste("ENET", colnames(endtab_enet), sep="_")

# Tree models:
# Random Forest (RF)

rfmod <- train(form2, data=all_data_mod, method="rf",metric="RMSE",
               trControl=tr,ntrees=1000,
               importance=TRUE)

# rfmod2 <- train(form2, data=all_data_mod, method="rf",metric="RMSE",
#                trControl=tr,ntrees=1000,
#                importance=TRUE)

arg_rf <- rfmod$results[is.element(rownames(rfmod$results),
                                   rownames(rfmod$bestTune)),]
var_rfmod <- varImp(rfmod, scale=FALSE)
coef_rf <- as.data.frame(t(var_rfmod$importance))
endtab_rf <- merge(arg_rf,coef_rf)
colnames(endtab_rf) <- paste("RF", colnames(endtab_rf),sep="_")

# xyplot(Q10~predict(rfmod),xlab="Predicted Q10",ylab="Observed Q10",main="RF")
# xyplot(resid(rfmod)~predict(rfmod))

# Bagged CART (BAGGED)

treebagmod <- train(form2, data=all_data_mod, method="treebag", 
                    metric="RMSE",trControl=tr)
arg_treebag <- treebagmod$results
var_treebag <- varImp(treebagmod, scale=FALSE)
coef_treebag <- as.data.frame(t(var_treebag$importance))
endtab_treebag <- merge(arg_treebag,coef_treebag)
colnames(endtab_treebag) <- paste("BAGTREE", colnames(endtab_treebag), sep="_")

# Boosted CART (BOOSTED)

boostedmod <- train(form2, all_data_mod, metric="RMSE", method="gbm",
                    verbose=FALSE,
                    tuneGrid=expand.grid(.interaction.depth = seq(1,7,by=2),
                                         .n.trees = seq(10,100,by=10),
                                         .shrinkage = c(0.01, 0.1),
                                         .n.minobsinnode=5),
                    trControl=tr)
arg_boost <- boostedmod$results[is.element(rownames(boostedmod$results),
                                           rownames(boostedmod$bestTune)),]
var_boost <- varImp(boostedmod, scale=FALSE)
coef_boost <- as.data.frame(t(var_boost$importance))
endtab_boost <- merge(arg_boost,coef_boost)
colnames(endtab_boost) <- paste("BOOST", colnames(endtab_boost), sep="_")

# Cubist (CUBIST)

cubistmod <- train(form2,data=all_data_mod,method = "cubist", metric="RMSE",
                   trControl = tr, 
                   tuneGrid = expand.grid(.committees=c(1,5,10,50,75,100),
                                          .neighbors=c(1,3,5,7,9)))
arg_cubist <- cubistmod$results[is.element(rownames(cubistmod$results),
                                           rownames(cubistmod$bestTune)),]
var_cubist <- varImp(cubistmod, scale=FALSE)
coef_cubist <- as.data.frame(t(var_cubist$importance))
endtab_cubist <- merge(arg_cubist,coef_cubist)
colnames(endtab_cubist) <- paste("CUBIST", colnames(endtab_cubist), sep="_")

# Save results

results <- resamples(list("LM"=linreg,
                          "LEAPS"=leaps,
                          "LARS"=lasso,
                          "ENET"=enetmod,
                          "RF"=rfmod,
                          "BAGGED"=treebagmod,
                          "BOOSTED" = boostedmod,
                          "CUBIST"=cubistmod))
endtab <- cbind(endtab_lm, endtab_treebag, endtab_rf,
                endtab_lasso, endtab_leaps,endtab_boost,
                endtab_cubist, endtab_enet)
compare <- subset(endtab, select=c(LARS_RMSE, LARS_Rsquared, LEAPS_RMSE,
                                   LEAPS_Rsquared, CUBIST_RMSE, CUBIST_Rsquared,
                                   LM_RMSE, LM_Rsquared,ENET_RMSE, ENET_Rsquared,
                                   BAGTREE_RMSE,BAGTREE_Rsquared,BOOST_RMSE,
                                   BOOST_Rsquared,RF_RMSE,RF_Rsquared))

################################################################################
# Result Plots (Figure 1)

# Predicted vs observed (Figure 1a)

pred_rf <- predict(rfmod, all_data_mod) # Predict Q10

# Combine predicted and observed Q10 data

pred_rf <- data.frame(pred = pred_rf, Q10 = all_data_mod$Q10)

# Plot the results

rfplot_PCA <- ggplot(data = pred_rf, aes(y=pred, x= Q10))+
  geom_point()+
  geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line
  geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line
  annotate("text",label = 'a)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)+
  xlim(1,4.5)+ylim(1,4.5)+theme_bw()+
  labs(x="Q10 observed",y="Q10 predicted")+
  annotate("text",label=paste("RMSE = ",round(arg_rf$RMSE,2),"\nrRMSE = ",
                              round(arg_rf$RMSE/mean(Q10)*100,0),"%","\nR� = ",
                              round(arg_rf$Rsquared,2)),
           x=1,y=4.3,hjust=0,size=5)+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14))

# Variable Importance Plot (only random forest)

importance_coef <- rbind(coef_rf, coef_boost, coef_cubist)
row.names(importance_coef) <- c("RF","Boosted","Cubist")
importance_coef <- data.frame(t(importance_coef))
importance_coef$pRF <- importance_coef$RF / sum(importance_coef$RF) * 100
importance_coef$rfround <- round(importance_coef$pRF,2)

# Best data
# Names of rPcs with experiment modifier data
importance_coef$names <- c("Plant growth conditions",
                           "Soil organic matter & fertility",
                           "Experiment specific modifiers","Clay & weathering",
                           "Soil solution chemistry","Soil CNP stoichiometry",
                           "Textural coarseness","Precipitation & Evapotranspiration")

# Names of rPcs without experiment modifier data
importance_coef$names <- c("Plant growth conditions",
                           "Texture & weathering","Temperature & weathering",
                           "Soil solution chemistry",
                           "Soil nutrient stoichiometry",
                           "Soil organic matter",
                           "Coarseness","Water use efficiency")

importance_coef$rfround <- round(importance_coef$rfround,1)

varI_plot_pca <- ggplot(importance_coef,aes(x=reorder(names, pRF),y=pRF))+
  geom_bar(stat="identity",width=0.8,fill="grey")+
  geom_text(aes(label=paste(rfround,"%",sep=" ")),hjust=1,size=5)+
  xlab("")+ylab("Relative Importance [%]")+
  annotate("text",label = 'b)', x = -Inf, y = Inf, 
           hjust = 1.25, vjust = -0.25,size=5)+
  coord_flip(ylim=c(5,max(importance_coef$pRF) + 0.2))+theme_bw()+
  scale_y_continuous(breaks = seq(5, 25, by = 5),expand=c(0,0))+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14))

# Partial Dependence Plots
# Best data approach
library(pdp)
dpd_1_rf <- partial(rfmod, pred.var="PC1")
dpd_2_rf <- partial(rfmod, pred.var="PC2")
dpd_3_rf <- partial(rfmod, pred.var="PC3")
dpd_4_rf <- partial(rfmod, pred.var="PC4")
dpd_5_rf <- partial(rfmod, pred.var="PC5")
dpd_6_rf <- partial(rfmod, pred.var="PC6")
dpd_7_rf <- partial(rfmod, pred.var="PC7")
dpd_8_rf <- partial(rfmod, pred.var="PC8")


dpd_table <- as.data.frame(mapply(c, dpd_1_rf[,c(1,2)],dpd_2_rf[,c(1,2)],
                                  dpd_3_rf[,c(1,2)], dpd_4_rf[,c(1,2)],
                                  dpd_5_rf[,c(1,2)],dpd_6_rf[,c(1,2)],
                                  dpd_7_rf[,c(1,2)],dpd_8_rf[,c(1,2)]))
library(reshape2)

# Naming and inversion of rPCs with experiment data
dpd_table$x <- rep(seq(1,51,1),8)
dpd_table$x[c(1:51,307:357)] <- seq(51,1,-1)
pca_names <- c("Plant growth conditions",
           "Soil organic matter & fertility",
           "Experiment specific modifiers","Clay & weathering",
           "Soil solution chemistry","Soil CNP stoichiometry",
           "Textural coarseness","Precipitation &\nEvapotranspiration")

# Naming and inversion of rPCs without experiment data
dpd_table$x <- rep(seq(1,51,1),8)
dpd_table$x[c(103:153,256:306,358:408)] <- seq(51,1,-1)
pca_names <- c("Plant growth conditions",
                           "Texture & weathering","Temperature & weathering",
                           "Soil solution chemistry",
                           "Soil nutrient stoichiometry",
                           "Soil organic matter",
                           "Coarseness","Water use efficiency")

# Defining colours for plotting
cols <- c("#E31A1C","#FFD92F","#8DA0CB","#E76AC3","#FB9A99","#FF7F00","#1F78B4",
          "#33A02C")

# Add rPC names and prepare for plotting
dpd_table$names <- rep(pca_names,each=51)
dpd_table$names <- factor(dpd_table$names, levels = pca_names)

# Plot Best data results (Figure 1c)
dpd_pca_plot <- ggplot(dpd_table,aes(x=x,y=yhat,color=names))+
  geom_line(size=1.25)+theme_bw(base_size=14)+ylab("Predicted Q10")+
  xlab("rPC relative range")+
  guides(color=guide_legend(ncol=2))+scale_color_manual(values = cols)+
  annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, 
           vjust = -0.25,size=5)+
  scale_y_continuous(limits=c(1.7,3),expand=c(0,0))+
  # geom_segment(aes(x = 22, y = 2, xend = 22, yend = 2.8),linetype="dashed",
  #              size=1,colour="black")+
  # annotate("text",x=22.5,y=2.75,label="10�C \nIncubation temperature",
  #          hjust=0,size=5)+
  scale_x_continuous(breaks=c(min(dpd_table$x),max(dpd_table$x)),
                     labels=c("Low","High"),expand=c(0.03,0.03))+
  theme(legend.title=element_blank(),legend.justification=c(0,0), 
        legend.position=c(0.02,0.05),
        legend.background = element_rect(fill=NULL,size=0.5,linetype="solid",
                                         colour ="black"),
        axis.title.x=element_text(size=14))

# Generalized PCA results (Figure 3Sc)
dpd_pca_plot <- ggplot(dpd_table,aes(x=x,y=yhat,color=names))+
  geom_line(size=1.25)+theme_bw(base_size=14)+ylab("Predicted Q10")+
  xlab("rPC relative range")+
  guides(color=guide_legend(ncol=2))+scale_color_manual(values = cols)+
  annotate("text",label = 'c)', x = Inf, y = -Inf, hjust = 1.25, 
           vjust = -0.25,size=5)+
  scale_y_continuous(limits=c(1.9,2.8),expand=c(0,0))+ 
  geom_segment(aes(x = 33, y = 2, xend = 33, yend = 2.8),linetype="dashed",
               size=1,colour="black")+
  annotate("text",x=33.5,y=2.75,label="10�C \nIncubation temperature",
           hjust=0,size=4)+
  scale_x_continuous(breaks=c(min(dpd_table$x),max(dpd_table$x)),
                     labels=c("Low","High"),expand=c(0.03,0.03))+
  theme(legend.title=element_blank(),legend.justification=c(0,0), 
        legend.position=c(0.02,0.05),
        legend.background = element_rect(fill=NULL,size=0.5,linetype="solid",
                                         colour ="black"),
        axis.title.x=element_text(size=14))

# Arrange plots
library(gridExtra)

res_plot_fig <- arrangeGrob(rfplot_PCA,varI_plot_pca ,dpd_pca_plot,nrow=1)

# Save plots
# with experiment data
ggsave("res_experimentdata.pdf",res_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )
ggsave("res_experimentdata.tiff",res_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )

#without experiment data
# with experiment data
ggsave("res_without_experimentdata.pdf",res_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )
ggsave("res_without_experimentdata.tiff",res_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )

# Generalized data
ggsave("res_new_gl.pdf",res_plot_fig, 
       width=43,height=17, unit="cm",dpi=600 )
ggsave("res_new_gl.tiff",res_plot_fig, 
       width=43,height=17, unit="cm",dpi=600 )


# Test for Spatial Autocorrelation
# Get spatial data 

spdat <- SpatialPointsDataFrame(cbind(df$WGS.84.E, df$WGS.84.N), df)

# Weights matrix with the 10 nearest neighbors, in weights list form 
library(spdep)
lstw  <- nb2listw(knn2nb(knearneigh(spdat, k = 10)))

# Moran's test using the residuals of the model
# only shown for linear models (non-linear models not affected by spatial
# autocorrelation)

moran.test(residuals(linreg_all), lstw)
moran.test(residuals(lasso_all),lstw)
moran.test(residuals(enetmod_all),lstw)
moran.test(residuals(leaps_all),lstw)
moran.test(residuals(lasso),lstw)
moran.test(residuals(enetmod),lstw)
moran.test(residuals(leaps),lstw)
moran.test(residuals(linreg), lstw)

################################################################################
# Global Predictions with raster data

# Required packages
library(raster); library(rgdal)

# Global approach depending on individual model results
# Load of the individual raster files (see Tab. 1S for more sources)
# and adjustments for further processing (projecting to 0.5� resolution). Third
# party source files for global datasets: See methods.
C <- raster("1km/C.tif")
C <- projectRaster(C,N,method = "ngb")
CEC <- raster("1km/CEC.tif")
CEC <- projectRaster(CEC,N,method = "ngb")
Silt <- raster("1km/Silt.tif")
Silt <- projectRaster(Silt,N,method = "ngb")
Sand <- raster("1km/Sand.tif")
Sand <- projectRaster(Sand,N,method = "ngb")
Clay <- raster("1km/Clay.tif")
Clay <- projectRaster(Clay,N,method = "ngb")
pH <- raster("1km/pH.tif")
pH <- projectRaster(pH,N,method = "ngb")
N <- raster("1km/N.tif")
N <- raster("N.tif")
CN <- raster("1km/CN.tif")
CN <- raster("CN.tif")
Bsat <- raster("1km/Bsat.tif")
Bsat <- raster("Bsat.tif")
NDVI <- raster("1km/NDVI.tif")
NDVI <- projectRaster(NDVI,N,method = "ngb")
MAT <- raster("1km/MAT.tif")
MAT <- projectRaster(MAT,N,method = "ngb")
MAP <- raster("1km/MAP.tif")
MAP <- projectRaster(MAP,N,method = "ngb")
PET <- raster("1km/PET.tif")
PET <- projectRaster(PET,N,method = "ngb")
P <- raster("1km/P.tif")
P <- raster("E:/Masterarbeit/GIS/P.tif")
Humidity <- raster("1km/Humidity.tif")
Humidity <- projectRaster(Humidity,N,method = "ngb")

# RF-Model saved and loaded (if needed)
# rfmod_exp <- rfmod #with experimental data
# save(rfmod,file="rfmod.Rdata")
# load("2019_05_07254A_SRRW/rfmod.RData")
#
# Stack the loaded raster files (without Tmean)

preds <- stack(C,N,CN,Bsat,CEC,Silt,Sand,Clay,pH,NDVI,
               MAT,MAP,PET,P,Humidity)

# Remove files to save memory space
rm(C,N,CN,Bsat,CEC,Silt,Sand,Clay,pH,NDVI,
   MAT,MAP,PET,P,Humidity)

# Save raster as tif-file

mean_rf_1km <- raster("2019_05_07254A_SRRW/mean_rf_GIS.tif")

#Mapping PCA as input
pca_tab <- data.frame(pca.allbind[-c(1:3),])

CN_P <- calc(stack(CN,P),fun=function(x){x[1]/x[2]})
Clay_MAT <- calc(stack(Clay,MAT),fun=function(x){x[1]/x[2]})
PET_NDVI <- calc(stack(PET,NDVI),fun=function(x){x[1]/x[2]})
MAP_PET <- calc(stack(MAP,PET),fun=function(x){x[1]/x[2]})
Bsat_CEC <- calc(stack(Bsat,CEC),fun=function(x){x[1]/x[2]})
Bsat_pH <- calc(stack(Bsat,pH),fun=function(x){x[1]/x[2]})
CEC_Clay <- calc(stack(CEC,Clay),fun=function(x){x[1]/x[2]})
MAT_MAP <- calc(stack(MAT,MAP),fun=function(x){x[1]/x[2]})

# 1km rasters made in ArcMAP (much faster)
CN_P <- raster("1km/Ratios/CN_P.tif")
Clay_MAT <- raster("1km/Ratios/Clay_MAT.tif")
PET_NDVI <- raster("1km/Ratios/PET_NDVI.tif")
PET_NDVI <- crop(PET_NDVI,N)
MAP_PET  <- raster("1km/Ratios/MAP_PET.tif")
MAP_PET <- crop(MAP_PET,N)
Bsat_CEC <- raster("1km/Ratios/Bsat_CEC.tif")
Bsat_pH  <- raster("1km/Ratios/Bsat_pH.tif")
MAT_MAP <- raster("1km/Ratios/MAT_MAP.tif")
MAT_MAP <- crop(MAT_MAP,N)
CEC_Clay <- raster("1km/Ratios/CEC_Clay.tif")

e <- extent(-180, 180, -56.00083, 83.99917)
MAT <- setExtent(MAT, e, keepres=TRUE)
MAP <- setExtent(MAP, e, keepres=TRUE)
PET_NDVI <- setExtent(PET_NDVI, e, keepres=TRUE)
MAP_PET <- setExtent(MAP_PET, e, keepres=TRUE)
MAT_MAP <- setExtent(MAT_MAP,e,keepres = TRUE)
CEC_Clay <- setExtent(CEC_Clay,e,keepres=TRUE)
preds_pca <- stack(pH,C,N,CN,CEC,Bsat,MAT,MAP,Sand,Clay,Silt,P,
                   NDVI,Humidity,CN_P,Clay_MAT,PET_NDVI,MAP_PET,
                   Bsat_CEC,Bsat_pH,CEC_Clay,MAT_MAP)
pca_tab$names <- row.names(pca_tab)
pca_tab$names[15:22]<- c("CN_P","Clay_MAT","PET_NDVI","MAP_PET",
                         "Bsat_CEC","Bsat_pH","CEC_Clay","MAT_MAP")
PC1 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,1]+
    x[2]*pca_tab[2,1]+x[3]*pca_tab[3,1]+x[4]*pca_tab[4,1]+x[1]*pca_tab[4,1]+
    +x[5]*pca_tab[5,1]+x[6]*pca_tab[6,1]+x[7]*pca_tab[7,1]+x[8]*pca_tab[8,1]
  +x[9]*pca_tab[9,1]+x[10]*pca_tab[10,1]+x[11]*pca_tab[11,1]+x[12]*pca_tab[12,1]+x[13]*pca_tab[13,1]
  +x[14]*pca_tab[14,1]+x[15]*pca_tab[15,1]+x[16]*pca_tab[16,1]+x[17]*pca_tab[17,1]
  +x[18]*pca_tab[18,1]+x[19]*pca_tab[19,1]+x[20]*pca_tab[20,1]+x[21]*pca_tab[21,1]+x[22]*pca_tab[22,1]})
PC2 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,2]+
    x[2]*pca_tab[2,2]+x[3]*pca_tab[3,2]+x[4]*pca_tab[4,2]+x[1]*pca_tab[4,2]+
    +x[5]*pca_tab[5,2]+x[6]*pca_tab[6,2]+x[7]*pca_tab[7,2]+x[8]*pca_tab[8,2]
  +x[9]*pca_tab[9,2]+x[10]*pca_tab[10,2]+x[11]*pca_tab[11,2]+x[12]*pca_tab[12,2]+x[13]*pca_tab[13,2]
  +x[14]*pca_tab[14,2]+x[15]*pca_tab[15,2]+x[16]*pca_tab[16,2]+x[17]*pca_tab[17,2]
  +x[18]*pca_tab[18,2]+x[19]*pca_tab[19,2]+x[20]*pca_tab[20,2]+x[21]*pca_tab[21,2]+x[22]*pca_tab[22,2]})
PC3 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,3]+
    x[2]*pca_tab[2,3]+x[3]*pca_tab[3,3]+x[4]*pca_tab[4,3]+x[1]*pca_tab[4,3]+
    +x[5]*pca_tab[5,3]+x[6]*pca_tab[6,3]+x[7]*pca_tab[7,3]+x[8]*pca_tab[8,3]
  +x[9]*pca_tab[9,3]+x[10]*pca_tab[10,3]+x[11]*pca_tab[11,3]+x[12]*pca_tab[12,3]+x[13]*pca_tab[13,3]
  +x[14]*pca_tab[14,3]+x[15]*pca_tab[15,3]+x[16]*pca_tab[16,3]+x[17]*pca_tab[17,3]
  +x[18]*pca_tab[18,3]+x[19]*pca_tab[19,3]+x[20]*pca_tab[20,3]+x[21]*pca_tab[21,3]+x[22]*pca_tab[22,3]})
PC4 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,4]+
    x[2]*pca_tab[2,4]+x[3]*pca_tab[3,4]+x[4]*pca_tab[4,4]+x[1]*pca_tab[4,4]+
    +x[5]*pca_tab[5,4]+x[6]*pca_tab[6,4]+x[7]*pca_tab[7,4]+x[8]*pca_tab[8,4]
  +x[9]*pca_tab[9,4]+x[10]*pca_tab[10,4]+x[11]*pca_tab[11,4]+x[12]*pca_tab[12,4]+x[13]*pca_tab[13,4]
  +x[14]*pca_tab[14,4]+x[15]*pca_tab[15,4]+x[16]*pca_tab[16,4]+x[17]*pca_tab[17,4]
  +x[18]*pca_tab[18,4]+x[19]*pca_tab[19,4]+x[20]*pca_tab[20,4]+x[21]*pca_tab[21,4]+x[22]*pca_tab[22,1]})
PC5 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,5]+
    x[2]*pca_tab[2,5]+x[3]*pca_tab[3,5]+x[4]*pca_tab[4,5]+x[1]*pca_tab[4,5]+
    +x[5]*pca_tab[5,5]+x[6]*pca_tab[6,5]+x[7]*pca_tab[7,5]+x[8]*pca_tab[8,5]
  +x[9]*pca_tab[9,5]+x[10]*pca_tab[10,5]+x[11]*pca_tab[11,5]+x[12]*pca_tab[12,5]+x[13]*pca_tab[13,5]
  +x[14]*pca_tab[14,5]+x[15]*pca_tab[15,5]+x[16]*pca_tab[16,5]+x[17]*pca_tab[17,5]
  +x[18]*pca_tab[18,5]+x[19]*pca_tab[19,5]+x[20]*pca_tab[20,5]+x[21]*pca_tab[21,5]+x[22]*pca_tab[22,5]})
PC6 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,6]+
    x[2]*pca_tab[2,6]+x[3]*pca_tab[3,6]+x[4]*pca_tab[4,6]+x[1]*pca_tab[4,6]+
    +x[5]*pca_tab[5,6]+x[6]*pca_tab[6,6]+x[7]*pca_tab[7,6]+x[8]*pca_tab[8,6]
  +x[9]*pca_tab[9,6]+x[10]*pca_tab[10,6]+x[11]*pca_tab[11,6]+x[12]*pca_tab[12,6]+x[13]*pca_tab[13,6]
  +x[14]*pca_tab[14,6]+x[15]*pca_tab[15,6]+x[16]*pca_tab[16,6]+x[17]*pca_tab[17,6]
  +x[18]*pca_tab[18,6]+x[19]*pca_tab[19,6]+x[20]*pca_tab[20,6]+x[21]*pca_tab[21,6]+x[22]*pca_tab[22,6]})
PC7 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,7]+
    x[2]*pca_tab[2,7]+x[3]*pca_tab[3,7]+x[4]*pca_tab[4,7]+x[1]*pca_tab[4,7]+
    +x[5]*pca_tab[5,7]+x[6]*pca_tab[6,7]+x[7]*pca_tab[7,7]+x[8]*pca_tab[8,7]
  +x[9]*pca_tab[9,7]+x[10]*pca_tab[10,7]+x[11]*pca_tab[11,7]+x[12]*pca_tab[12,7]+x[13]*pca_tab[13,7]
  +x[14]*pca_tab[14,7]+x[15]*pca_tab[15,7]+x[16]*pca_tab[16,7]+x[17]*pca_tab[17,7]
  +x[18]*pca_tab[18,7]+x[19]*pca_tab[19,7]+x[20]*pca_tab[20,7]+x[21]*pca_tab[21,7]+x[22]*pca_tab[22,7]})
PC8 <- calc(preds_pca,fun=function(x){x[1]*pca_tab[1,8]+
    x[2]*pca_tab[2,8]+x[3]*pca_tab[3,8]+x[4]*pca_tab[4,8]+x[1]*pca_tab[4,8]+
    +x[5]*pca_tab[5,8]+x[6]*pca_tab[6,8]+x[7]*pca_tab[7,8]+x[8]*pca_tab[8,8]
  +x[9]*pca_tab[9,8]+x[10]*pca_tab[10,8]+x[11]*pca_tab[11,8]+x[12]*pca_tab[12,8]+x[13]*pca_tab[13,8]
  +x[14]*pca_tab[14,8]+x[15]*pca_tab[15,8]+x[16]*pca_tab[16,8]+x[17]*pca_tab[17,8]
  +x[18]*pca_tab[18,8]+x[19]*pca_tab[19,8]+x[20]*pca_tab[20,8]+x[21]*pca_tab[21,8]+x[22]*pca_tab[22,8]})


PC1[is.infinite(PC1)] <- NA
PC2[is.infinite(PC2)] <- NA
PC3[is.infinite(PC3)] <- NA
PC4[is.infinite(PC4)] <- NA
PC5[is.infinite(PC5)] <- NA
PC6[is.infinite(PC6)] <- NA
PC7[is.infinite(PC7)] <- NA
PC8[is.infinite(PC8)] <- NA

#Load PCA raster 1km made in ArcMap
PC1 <- raster("1km/PC_1km/PC1.tif")
PC2 <- raster("1km/PC_1km/PC2.tif")
PC3 <- raster("1km/PC_1km/PC3.tif")
PC4 <- raster("1km/PC_1km/PC4.tif")
PC5 <- raster("1km/PC_1km/PC5.tif")
PC6 <- raster("1km/PC_1km/PC6.tif")
PC7 <- raster("1km/PC_1km/PC7.tif")
PC8 <- raster("1km/PC_1km/PC8.tif")

pca_preds <- stack(PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8)
names(pca_preds) <- c("PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8")

rf_pred_pca <- raster::predict(object=pca_preds,model=rfmod)
#writeRaster(rf_pred_pca,"rf_pred_map.tif")
#rf_pred_pca <- raster("rf_pred_map.tif")
# Further processing again only shown for local values 
# Global Q10 map and Latitude plot

WorldData <- ggplot2::map_data("world")
library(dplyr)
WorldData %>% filter(region != "Antarctica") -> WorldData # Global shapefile
WorldData <- fortify(WorldData)
ewbrks <- c(seq(-180,180,40),0)
nsbrks <- seq(-60,80,20)
ewlbls <- unlist(lapply(ewbrks, function(x) 
  ifelse(x < 0, gsub("[-]","",paste(x, "�W")), ifelse(x > 0, paste(x, "�E"),x))))
nslbls <- unlist(lapply(nsbrks, function(x) 
  ifelse(x < 0, gsub("[-]","",paste(x, "�S")), ifelse(x > 0, paste(x, "�N"),x))))

# Climatezones Map derivaded from FAO Agrizones (prepared in ArcMAP 10.4)
# Remove polar and desert regions

clim_rast <- raster("E:/Masterarbeit/Statistik/Raster/1km/Clim_raster.tif") 
clim_mat <- matrix(c(0,1,2,3,4,NA,1,1,1,1),ncol=2)
clim_rast <- reclassify(clim_rast,clim_mat)
clim_rast <- setExtent(clim_rast,rf_pred_pca)
clim_rast<- projectRaster(clim_rast,rf_pred_pca,method="ngb")
p_class <- P
p_class[!is.na(p_class)] <- 1
mean_rf_oP <- calc(stack(rf_pred_pca,mean_rf_oP,p_class),fun=function(x){x[1]*x[2]*x[3]})
#mean_rf_oP <- calc(stack(rf_pred_pca,mean_rf_oP),fun=function(x){x[1]*x[2]})

# Load the 1km file made with ArcMAP
mean_rf_1km <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/Pred_RF_1km_class.tif")

# Q10 raster as data frame for plotting

rf_df <- as(mean_rf_oP, "SpatialPixelsDataFrame")
rf_df <- as.data.frame(rf_df)
colnames(rf_df) <- c("value", "x", "y")

# Plot the Q10 with global map

map_rf <- ggplot()+geom_map(data=WorldData, map=WorldData,
                            aes(x=long, y=lat, group=group, map_id=region),
                            fill="grey", colour="grey", size=0.5)+
  geom_tile(data=rf_df, aes(x=x, y=y, fill=value), alpha=0.8)+
  annotate("text",label = 'a)', x = Inf, y = Inf, 
           hjust = 1.25, vjust = 1.25,size=5)+
  labs(fill="Q10")+xlab("")+ylab("")+
  scale_fill_gradientn(colours = rev(rainbow(5)),breaks=c(1.75,2,2.25,2.5,2.75,3),
                       values = c(0,0.1,0.2,0.25,0.3,0.35,0.4,0.5,0.75,1),
                       guide = guide_colourbar(direction = "horizontal",
                                               title.position = "top",
                                               title.hjust=0.5))+
  theme_bw()+
  scale_x_continuous(breaks = ewbrks, labels = ewlbls, expand = c(0, 0)) +
  scale_y_continuous(breaks = nsbrks, labels = nslbls)+
  theme(legend.position="bottom",
        legend.key.width=unit(2.25,"cm"),
        legend.margin=ggplot2::margin(-10,-10,0,0))


################################################################################
# Calculate differences between 1km map (extra made) and 0.5� map for assessment
# of potential bias due to data aggregation at coarser resolution.

mean_rf_1km <- projectRaster(mean_rf_1km,mean_rf_oP,method = "ngb")
Q10_dif <- calc(stack(mean_rf_oP,mean_rf_1km),
                fun=function(x){x[1]-x[2]})
Q10_mean <- cellStats(Q10_dif,mean)
Q10_sd <- cellStats(Q10_dif,sd)
Q10_per <- calc(stack(mean_rf_oP,mean_rf_1km),
                fun=function(x){(x[1]-x[2])/x[1]})
Q10_per_1 <- calc(Q10_per, fun = function(x){x[x < 0] <- x*(-1); return(x)})
Q10_m_per <- cellStats(Q10_per_1,mean)
Q10_sd_per <- cellStats(Q10_per_1,sd)

# Calculate differences between Hashimoto et al. 2015 Q10 map and the newly
# predicted Q10 map

Q10_hash <- raster("E:/Masterarbeit/Statistik/Raster/Respiration_Hashimoto/Q10_H15.tif")
Q10_hash <- projectRaster(Q10_hash,mean_rf_oP,method="ngb")
Q10_dif_H <- calc(stack(mean_rf_oP,Q10_hash),fun=function(x){(x[1]-x[2])/x[1]})
Q10_mean_H <- cellStats(Q10_dif_H,mean)
Q10_sd_H <- cellStats(Q10_dif_H,sd)

################################################################################
# Preparation for latitudial plot

# Load Global Land-Use map 

lu <- raster("E:/Masterarbeit/Statistik/Raster/1km/Land_Use_small.tif")
lu <- projectRaster(lu,mean_rf_oP,method = "ngb")
#lu <- setExtent(lu,bb,keepres = TRUE)

# Separate the data in Agriculture, Forest and Grassland

mat_agri <- matrix(c(1,2,3,4,1,NA,NA,NA),ncol=2)
lu_agri <- reclassify(lu,mat_agri)
# e <- extent(-180, 180, -55.63135, 83.86865)
# lu_agri <- setExtent(lu_agri, e, keepres=TRUE)
agri_tab <- calc(stack(lu_agri,mean_rf_oP),fun=function(x){x[1]*x[2]})

#agri_tab <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/pred_agri_1km.tif")
agri_tab <- as.data.frame(as(agri_tab,"SpatialPixelsDataFrame"))
agri_tab <- aggregate(layer~y,data=agri_tab,mean)
agri_tab$name <- "Agriculture"

mat_for <- matrix(c(1,2,3,4,NA,1,NA,NA),ncol=2)
lu_for <- reclassify(lu,mat_for)
# e <- extent(-180, 180, -55.63135, 83.86865)
# lu_for <- setExtent(lu_for, e, keepres=TRUE)
for_tab <- calc(stack(lu_for,mean_rf_oP),fun=function(x){x[1]*x[2]})
for_tab <- as.data.frame(as(for_tab,"SpatialPixelsDataFrame"))
for_tab <- aggregate(layer~y,data=for_tab,mean)
for_tab$name <- "Forest"

mat_gras <- matrix(c(1,2,3,4,NA,NA,1,NA),ncol=2)
lu_gras <- reclassify(lu,mat_gras)
# e <- extent(-180, 180, -55.63135, 83.86865)
# lu_gras <- setExtent(lu_gras, e, keepres=TRUE)
gras_tab <- calc(stack(lu_gras,mean_rf_oP),fun=function(x){x[1]*x[2]})
gras_tab <- as.data.frame(as(gras_tab,"SpatialPixelsDataFrame"))
gras_tab <- aggregate(layer~y,data=gras_tab,mean)
gras_tab$name <- "Grassland"

# Combine the land-use data frames

df_lu <- rbind(agri_tab,for_tab,gras_tab)

# Plot the data as path

rf_lat_plot <- ggplot(df_lu)+
  geom_path(aes(x=layer,y=y,color=name),lineend = "square")+
  theme_bw(base_size=14)+
  geom_map(data=WorldData, map=WorldData,
           aes(x=long, y=lat, group=group, map_id=region),
           fill=NA, colour=NA, size=0)+
  annotate("text",label = 'b)', x = Inf, y = Inf, hjust = 1.25,
           vjust = 1.25,size=5)+ylab("")+xlab("Q10")+
  scale_y_continuous(breaks=nsbrks,labels = nslbls)+
  scale_x_continuous(breaks=c(2,2.5,3),limits=c(2,3),expand = c(0.01,0))+
  theme(legend.title=element_blank(), legend.position=c(0.7,0.5),
        legend.background = element_rect(fill=NULL,linetype="solid",
                                         colour ="black"),
        legend.key.size = unit(1, "cm"))

# Combine the global map and latitudial plot
# Get the plot heights 

p1.common.y <- ggplot_gtable(ggplot_build(map_rf))
p2.common.y <- ggplot_gtable(ggplot_build(rf_lat_plot))

# Copy the plot height from p1 to p2
p2.common.y$heights <- p1.common.y$heights

## Uncertainty Maps
# https://stats.stackexchange.com/questions/56895/do-the-predictions-of-a-random-forest-model-have-a-prediction-interval
pc_tab_global <- as.data.frame(pca_preds,xy=TRUE)
pc_tab_global <- data.frame(na.omit(pc_tab_global))
rf_pred_ci <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE)

rf_pred_ci2 <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE,interval="confidence")
rf_pred_ci3 <- predict(rfmod$finalModel,newdata=pc_tab_global,predict.all=TRUE,interval="prediction")
tab_ci <- data.frame(rf_pred_ci2$individual)
tab_ci$mean <- rowMeans(tab_ci[,c(1:500)])
tab_ci$sd <- apply(tab_ci[,1:500],1,sd)
tab_ci$ratio <- tab_ci$sd/tab_ci$mean
tab_ci$relunc <- tab_ci$ratio*100
tab_ci2 <- data.frame(rf_pred_ci3$individual)

pred.rf.int <- apply(rf_pred_ci$individual, 1, function(x) {
  c(mean(x) + c(-1.96, 1.96) * 0.608928, 
    quantile(x, c(0.05, 0.95)))
})
pred.rf.int <- t(pred.rf.int)

mean.rf <- rf_pred_ci$aggregate
sd.rf <- mean(sqrt(rfmod$finalModel$mse))
pred.rf.int3 <- cbind.data.frame(lower=mean.rf - 1.96*sd.rf, upper=mean.rf + 1.96*sd.rf,pred=mean.rf)
pred.rf.int3$Mean <- (pred.rf.int3$lower+pred.rf.int3$upper)/2
# Ratio between prediction interval and mean prediction (Domke et al. 2017 - Towards inventory- based estimates of soil organic carbon  in forests of the United States)
pred.rf.int3$ratio <- (pred.rf.int3$pred/pred.rf.int3$upper)*(pred.rf.int3$lower/pred.rf.int3$pred)


#uncer_tab <- cbind(pc_tab_global,pred.rf.int3)
uncer_tab <- cbind(pc_tab_global,tab_ci$relunc)
#uncer_rast <- rasterFromXYZ(uncer_tab[,c(1,2,15)],crs = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
uncer_rast <- rasterFromXYZ(uncer_tab[,c(1,2,11)],crs = "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
clim_rast <- raster("E:/Masterarbeit/Statistik/Raster/1km/Clim_raster.tif") 
clim_mat <- matrix(c(0,1,2,3,4,NA,1,1,1,1),ncol=2)
clim_rast<- projectRaster(clim_rast,uncer_rast,method="ngb")
clim_rast <- reclassify(clim_rast,clim_mat)
p_class <- P
p_class <- projectRaster(p_class,uncer_rast,method="ngb")
p_class[!is.na(p_class)] <- 1
uncer_rast <- calc(stack(uncer_rast,clim_rast,p_class),fun=function(x){x[1]*x[2]*x[3]})
uncer_tab <- as.data.frame(uncer_rast,xy=TRUE)
#uncer_tab$layer <- uncer_tab$layer*100

uncer_map <- ggplot()+geom_map(data=WorldData, map=WorldData,
                               aes(x=long, y=lat, group=group, map_id=region),
                               fill="grey", colour="grey", size=0.5)+
  geom_tile(data=uncer_tab,aes(x=x,y=y,fill=layer))+
  annotate("text",label = 'c)', x = Inf, y = Inf, 
           hjust = 1.25, vjust = 1.25,size=5)+
  labs(fill="Relative Uncertainty [%]")+xlab("")+ylab("")+
  scale_fill_gradientn(colours = rev(rainbow(7)),na.value = NA,
                       guide = guide_colourbar(direction = "horizontal",
                                               title.position = "top",
                                               title.hjust=0.5))+theme_bw()+
  scale_x_continuous(breaks = ewbrks, labels = ewlbls, expand = c(0, 0)) +
  scale_y_continuous(breaks = nsbrks, labels = nslbls)+
  theme(legend.position="bottom",
        legend.key.width=unit(2.25,"cm"),
        legend.margin=ggplot2::margin(-10,-10,0,0))

################################################################################
# Preparation for latitudial plot

# Load Global Land-Use map 

lu <- raster("E:/Masterarbeit/Statistik/Raster/1km/Land_Use_small.tif")
lu <- projectRaster(lu,mean_rf_oP,method = "ngb")
#lu <- setExtent(lu,bb,keepres = TRUE)

# Separate the data in Agriculture, Forest and Grassland

mat_agri <- matrix(c(1,2,3,4,1,NA,NA,NA),ncol=2)
lu_agri <- reclassify(lu,mat_agri)
# e <- extent(-180, 180, -55.63135, 83.86865)
lu_agriuncer_rast <- projectRaster(lu_agri, uncer_rast)
agri_tab_unc <- calc(stack(lu_agriuncer_rast,uncer_rast),fun=function(x){x[1]*x[2]})

#agri_tab <- raster("E:/Masterarbeit/Statistik/Raster/1km/PC_1km/pred_agri_1km.tif")
agri_tab_unc <- as.data.frame(as(agri_tab_unc,"SpatialPixelsDataFrame"))
agri_tab_unc <- aggregate(layer~y,data=agri_tab_unc,mean)
agri_tab_unc$name <- "Agriculture"

mat_for <- matrix(c(1,2,3,4,NA,1,NA,NA),ncol=2)
lu_for <- reclassify(lu,mat_for)
# e <- extent(-180, 180, -55.63135, 83.86865)
lu_foruncer_rast <- projectRaster(lu_for, uncer_rast)
for_tab_unc <- calc(stack(lu_foruncer_rast,uncer_rast),fun=function(x){x[1]*x[2]})
for_tab_unc <- as.data.frame(as(for_tab_unc,"SpatialPixelsDataFrame"))
for_tab_unc <- aggregate(layer~y,data=for_tab_unc,mean)
for_tab_unc$name <- "Forest"

mat_gras <- matrix(c(1,2,3,4,NA,NA,1,NA),ncol=2)
lu_gras <- reclassify(lu,mat_gras)
# e <- extent(-180, 180, -55.63135, 83.86865)
lu_grasuncer_rast <- projectRaster(lu_gras, uncer_rast)
gras_tab_unc <- calc(stack(lu_grasuncer_rast,uncer_rast),fun=function(x){x[1]*x[2]})
gras_tab_unc <- as.data.frame(as(gras_tab_unc,"SpatialPixelsDataFrame"))
gras_tab_unc <- aggregate(layer~y,data=gras_tab_unc,mean)
gras_tab_unc$name <- "Grassland"

# Combine the land-use data frames

df_lu_unc <- rbind(agri_tab_unc,for_tab_unc,gras_tab_unc)

# Plot the data as path

rf_lat_unc_plot <- ggplot(df_lu_unc)+
  geom_path(aes(x=layer,y=y,color=name),lineend = "square")+
  theme_bw(base_size=14)+
  geom_map(data=WorldData, map=WorldData,
           aes(x=long, y=lat, group=group, map_id=region),
           fill=NA, colour=NA, size=0)+
  annotate("text",label = 'd)', x = Inf, y = Inf, hjust = 1.25,
           vjust = 1.25,size=5)+ylab("")+xlab("Q10")+
  scale_y_continuous(breaks=nsbrks,labels = nslbls)+
  scale_x_continuous(limits=c(22,40),expand = c(0.01,0))+
  theme(legend.title=element_blank(), legend.position=c(0.3,0.5),
        legend.background = element_rect(fill=NULL,linetype="solid",
                                         colour ="black"),
        legend.key.size = unit(1, "cm"))

# Combine the global map and latitudial plot
# Get the plot heights 

p3.common.y <- ggplot_gtable(ggplot_build(uncer_map))
p4.common.y <- ggplot_gtable(ggplot_build(rf_lat_unc_plot))

# Copy the plot height from p1 to p2
p4.common.y$heights <- p3.common.y$heights

# Arrange the plots

library(gridExtra)
grid.arrange(p1.common.y,p2.common.y,p3.common.y,p4.common.y,ncol=2,widths=c(5,2))
map_plot <- arrangeGrob(p1.common.y,p2.common.y,ncol=2,widths=c(5,2))
map_plot_uncer <- arrangeGrob(p1.common.y,p2.common.y,p3.common.y,p4.common.y,ncol=2,widths=c(5,2))
# Save the plot (Figure 2)

ggsave("uncer_map2.pdf",map_plot_uncer,
       width=40,height=30,unit="cm",dpi=600)
ggsave("uncer_map2.tiff",map_plot_uncer,
       width=40,height=30,unit="cm",dpi=600)


################################################################################
# Test for Europe with and without Lab or Field Studies 
df_europe <- cbind.data.frame(df[c(8,9,16,17,18:32)],df_treat_all[,c(11:14)])

df_europe$Sequential.warming <- ifelse(df_europe$Sequential.warming=="warmed consecutively ",1,0)
df_europe$Treated.Added <- ifelse(df_europe$Treated.Added=="None",1,0)
df_europe <- df_europe[-which(df_europe$Autotrophic.Heterotrophic=="Autotrophic"),]
df_europe$Autotrophic.Heterotrophic <- ifelse(df_europe$Autotrophic.Heterotrophic=="Both",1,0)

# Load map of Europe
euro <- shapefile("E:/Masterarbeit/GIS/Europe/Europe_2.shp")

# Make data frame to point shapefile
coordinates(df_europe) <- ~WGS.84.E+WGS.84.N
proj4string(df_europe) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")

# Get Points in Europe polygon
df_euro_only <- as.data.frame(df_europe[euro,])

# Get Field and Lab data frame
df_euro_Field <- df_euro_only[which(df_euro_only$Type=="Field"),] # n = 549
df_euro_Lab <- df_euro_only[which(df_euro_only$Type=="Lab"),] # n = 237

mean(df_euro_only$Q10)
sd(df_euro_only$Q10)

mean(df_euro_Field$Q10)
sd(df_euro_Field$Q10)

mean(df_euro_Lab$Q10)
sd(df_euro_Lab$Q10)

# Define data frames for PCA
pred.wI_euro <- df_euro_only[,c(3,5:19,21:23)]

pred.wI_euro$'CN:P' <- pred.wI_euro$CN/pred.wI_euro$P
pred.wI_euro$'Clay:MAT' <- pred.wI_euro$MAT/pred.wI_euro$Clay
pred.wI_euro$'PET:NDVI' <- pred.wI_euro$PET/pred.wI_euro$NDVI
pred.wI_euro$'MAP:PET' <- pred.wI_euro$MAP/pred.wI_euro$PET
pred.wI_euro$PET <- NULL
pred.wI_euro$'Bsat:CEC' <- pred.wI_euro$Bsat/pred.wI_euro$CEC
pred.wI_euro$'Bsat:pH' <- pred.wI_euro$Bsat/pred.wI_euro$pH
pred.wI_euro$'CEC:Clay' <- pred.wI_euro$CEC/pred.wI_euro$Clay
pred.wI_euro$'MAT:MAP' <- pred.wI_euro$MAT/pred.wI_euro$MAP

sdx_euro <- as.matrix(pred.wI_euro)
nc_euro <- ncol(sdx_euro)
nr_euro <- nrow(sdx_euro)
rmin_euro <- 1
rmax_euro <- nc_euro
sdx_euro <- scale(sdx_euro)
cx_euro <- cov(sdx_euro) # Covariance Matrix
ea_euro <- svd(cx_euro, nv=0)
nopc_euro <- ncol(ea_euro$u)
sdevalues_euro <- matrix(rep(sqrt(ea_euro$d), nopc_euro), nrow = nopc_euro, ncol=nopc_euro,byrow = TRUE)
loadings_euro <- ea_euro$u*sdevalues_euro
scores_euro <- sdx_euro %*% ea_euro$u
scmx_euro <- matrix(rep(colMeans(scores_euro), nr_euro), nrow = nr_euro, ncol = nc_euro, byrow = TRUE) 
scsx_euro <- matrix(rep(sd(scores_euro[,]), nr_euro), nrow = nr_euro, ncol = nc_euro, byrow = TRUE) 
scsdx_euro <- (scores_euro-scmx_euro)/scsx_euro
scsdx_euro <- scsdx_euro[1:nr_euro,1:nc_euro]
varex_euro <- ea_euro$d / sum(ea_euro$d) * 100
pc_or_euro <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_euro))
colnames(pc_or_euro) <- c("PC","EV","Variance","cumvar")
for(i in 1:rmax_euro){
  pc_or_euro[i,1] <- i
  pc_or_euro[i,2] <- round(ea_euro$d[i],3)
  pc_or_euro[i,3] <- round(varex_euro[i],3)
  pc_or_euro[i,4] <- round(sum(varex_euro[seq(1,i)]),3)
}
pc_or_euro$EV <- as.numeric(pc_or_euro$EV)

# Rotation of PCA with VARIMAX

vm_euro <- varimax(loadings_euro[,1:8],normalize=TRUE)
vm_euro$rotmat <- vm_euro$rotmat[,order(colSums(vm_euro$loadings^2), decreasing = TRUE)]
rloadings_euro <- loadings_euro[,1:8] %*% vm_euro$rotmat
library(corpcor)
rcoef_euro <- pseudoinverse(as.matrix(cx_euro)) %*% rloadings_euro
rscores_euro <- as.matrix(sdx_euro) %*% rcoef_euro
rscmx_euro <- matrix(rep(colMeans(rscores_euro), nrow(rscores_euro)), nrow = nrow(rscores_euro), 
                ncol = ncol(rscores_euro), byrow = TRUE) 
rscsx_euro <- matrix(rep(sd(rscores_euro[,]), nrow(rscores_euro)), nrow = nrow(rscores_euro), 
                ncol = ncol(rscores_euro), byrow = TRUE) 
rscsdx_euro <- (rscores_euro-rscmx_euro)/rscsx_euro
rscores_euro <- rscsdx_euro[1:nrow(rscores_euro),1:8]
rvalues_euro <- colSums(rloadings_euro^2)
vmt_euro <- varimax(loadings_euro, normalize=TRUE)
vmtloadings_euro <- loadings_euro %*% vmt_euro$rotmat
allrvalues_euro <- colSums(vmtloadings_euro^2)
rvarex_euro <- rvalues_euro / sum(allrvalues_euro) * 100

pc_wr_euro <- as.data.frame(matrix(NA, ncol=4, nrow=8))
colnames(pc_wr_euro) <- c("PC", "Eigenvalue", "Variance", "Cumulative")
for(i in 1:8){
  pc_wr_euro[i,1] <- i
  pc_wr_euro[i,2] <- round(rvalues_euro[i],3)
  pc_wr_euro[i,3] <- round(rvarex_euro[i], 3)
  pc_wr_euro[i,4] <- round(sum(rvarex_euro[seq(1,i)]),3)
}
library(lessR)
colnames(rscores_euro) <- to("PC", ncol(rscores_euro),same.size=FALSE)
colnames(rloadings_euro) <- to("PC", ncol(rloadings_euro),same.size=FALSE)
row.names(rloadings_euro) <- colnames(pred.wI_euro)
pca_wr.new_euro <- data.frame(t(pc_wr_euro[,-1]))
colnames(pca_wr.new_euro) <- to("PC", ncol(pca_wr.new_euro),same.size=FALSE)
pca_wr.new_euro <- do.call(cbind,lapply(pca_wr.new_euro,function(x)as.numeric(as.character(x))))
pca.allbind_euro <- rbind.data.frame(pca_wr.new_euro, round(rloadings_euro,3))
row.names(pca.allbind_euro)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)")

Q10_euro <- df_euro_only$Q10
all_data_mod_euro <- cbind.data.frame(Q10_euro,rscores_euro)
colnames(all_data_mod_euro)[1] <- "Q10"
n_euro <- to("PC", ncol(rscores_euro),same.size=FALSE)

# Define formula
form2_euro <- as.formula(paste("Q10~",paste(n_euro, collapse='+')))

library(caret);library(randomForest)
# Define Resampling method
dp_euro <- createDataPartition(all_data_mod_euro$Q10, p=.80, times=20)

# Define cross-validation method
tr_euro <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_euro,
                   preProcOptions=list(center=T,scale=T),
                   selectionFunction = "oneSE")

# Random Forest (RF)

rfmod_euro <- train(form2_euro, data=all_data_mod_euro, method="rf",metric="RMSE",
               trControl=tr_euro,ntrees=1000,
               importance=TRUE)
arg_rf_euro <- rfmod_euro$results[is.element(rownames(rfmod_euro$results),
                                   rownames(rfmod_euro$bestTune)),]

# Predicted vs observed (Figure 1a)

pred_rf_euro <- predict(rfmod_euro, all_data_mod_euro) # Predict Q10

# Combine predicted and observed Q10 data

pred_rf_euro <- data.frame(pred = pred_rf_euro, Q10 = all_data_mod_euro$Q10)

# Plot the results

rfplot_PCA_euro <- ggplot(data = pred_rf_euro, aes(y=pred, x= Q10))+
  geom_point()+
  geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line
  geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line
  annotate("text",label = 'a)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)+
  xlim(1,4.5)+ylim(1,4.5)+theme_bw()+
  labs(x="Q10 observed",y="Q10 predicted")+
  annotate("text",label=paste("RMSE = ",round(arg_rf_euro$RMSE,2),"\nrRMSE = ",
                              round(arg_rf_euro$RMSE/mean(Q10)*100,0),"%","\nR� = ",
                              round(arg_rf_euro$Rsquared,2)),
           x=1,y=4.3,hjust=0,size=5)+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14))

## Field
# Define data frames for PCA
pred.wI_field <- df_euro_Field[,c(3,5:19,21:23)]

pred.wI_field$'CN:P' <- pred.wI_field$CN/pred.wI_field$P
pred.wI_field$'Clay:MAT' <- pred.wI_field$MAT/pred.wI_field$Clay
pred.wI_field$'PET:NDVI' <- pred.wI_field$PET/pred.wI_field$NDVI
pred.wI_field$'MAP:PET' <- pred.wI_field$MAP/pred.wI_field$PET
pred.wI_field$PET <- NULL
pred.wI_field$'Bsat:CEC' <- pred.wI_field$Bsat/pred.wI_field$CEC
pred.wI_field$'Bsat:pH' <- pred.wI_field$Bsat/pred.wI_field$pH
pred.wI_field$'CEC:Clay' <- pred.wI_field$CEC/pred.wI_field$Clay
pred.wI_field$'MAT:MAP' <- pred.wI_field$MAT/pred.wI_field$MAP

sdx_field <- as.matrix(pred.wI_field)
nc_field <- ncol(sdx_field)
nr_field <- nrow(sdx_field)
rmin_field <- 1
rmax_field <- nc_field
sdx_field <- scale(sdx_field)
cx_field <- cov(sdx_field) # Covariance Matrix
ea_field <- svd(cx_field, nv=0)
nopc_field <- ncol(ea_field$u)
sdevalues_field <- matrix(rep(sqrt(ea_field$d), nopc_field), nrow = nopc_field, ncol=nopc_field,byrow = TRUE)
loadings_field <- ea_field$u*sdevalues_field
scores_field <- sdx_field %*% ea_field$u
scmx_field <- matrix(rep(colMeans(scores_field), nr_field), nrow = nr_field, ncol = nc_field, byrow = TRUE) 
scsx_field <- matrix(rep(sd(scores_field[,]), nr_field), nrow = nr_field, ncol = nc_field, byrow = TRUE) 
scsdx_field <- (scores_field-scmx_field)/scsx_field
scsdx_field <- scsdx_field[1:nr_field,1:nc_field]
varex_field <- ea_field$d / sum(ea_field$d) * 100
pc_or_field <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_field))
colnames(pc_or_field) <- c("PC","EV","Variance","cumvar")
for(i in 1:rmax_field){
  pc_or_field[i,1] <- i
  pc_or_field[i,2] <- round(ea_field$d[i],3)
  pc_or_field[i,3] <- round(varex_field[i],3)
  pc_or_field[i,4] <- round(sum(varex_field[seq(1,i)]),3)
}
pc_or_field$EV <- as.numeric(pc_or_field$EV)

# Rotation of PCA with VARIMAX

vm_field <- varimax(loadings_field[,1:8],normalize=TRUE)
vm_field$rotmat <- vm_field$rotmat[,order(colSums(vm_field$loadings^2), decreasing = TRUE)]
rloadings_field <- loadings_field[,1:8] %*% vm_field$rotmat
library(corpcor)
rcoef_field <- pseudoinverse(as.matrix(cx_field)) %*% rloadings_field
rscores_field <- as.matrix(sdx_field) %*% rcoef_field
rscmx_field <- matrix(rep(colMeans(rscores_field), nrow(rscores_field)), nrow = nrow(rscores_field), 
                     ncol = ncol(rscores_field), byrow = TRUE) 
rscsx_field <- matrix(rep(sd(rscores_field[,]), nrow(rscores_field)), nrow = nrow(rscores_field), 
                     ncol = ncol(rscores_field), byrow = TRUE) 
rscsdx_field <- (rscores_field-rscmx_field)/rscsx_field
rscores_field <- rscsdx_field[1:nrow(rscores_field),1:8]
rvalues_field <- colSums(rloadings_field^2)
vmt_field <- varimax(loadings_field, normalize=TRUE)
vmtloadings_field <- loadings_field %*% vmt_field$rotmat
allrvalues_field <- colSums(vmtloadings_field^2)
rvarex_field <- rvalues_field / sum(allrvalues_field) * 100

pc_wr_field <- as.data.frame(matrix(NA, ncol=4, nrow=8))
colnames(pc_wr_field) <- c("PC", "Eigenvalue", "Variance", "Cumulative")
for(i in 1:8){
  pc_wr_field[i,1] <- i
  pc_wr_field[i,2] <- round(rvalues_field[i],3)
  pc_wr_field[i,3] <- round(rvarex_field[i], 3)
  pc_wr_field[i,4] <- round(sum(rvarex_field[seq(1,i)]),3)
}
library(lessR)
colnames(rscores_field) <- to("PC", ncol(rscores_field),same.size=FALSE)
colnames(rloadings_field) <- to("PC", ncol(rloadings_field),same.size=FALSE)
row.names(rloadings_field) <- colnames(pred.wI_field)
pca_wr.new_field <- data.frame(t(pc_wr_field[,-1]))
colnames(pca_wr.new_field) <- to("PC", ncol(pca_wr.new_field),same.size=FALSE)
pca_wr.new_field <- do.call(cbind,lapply(pca_wr.new_field,function(x)as.numeric(as.character(x))))
pca.allbind_field <- rbind.data.frame(pca_wr.new_field, round(rloadings_field,3))
row.names(pca.allbind_field)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)")

Q10_field <- df_euro_Field$Q10
all_data_mod_field <- cbind.data.frame(Q10_field,rscores_field)
colnames(all_data_mod_field)[1] <- "Q10"
n_field <- to("PC", ncol(rscores_field),same.size=FALSE)

# Define formula
form2_field <- as.formula(paste("Q10~",paste(n_field, collapse='+')))

library(caret);library(randomForest)
# Define Resampling method
dp_field <- createDataPartition(all_data_mod_field$Q10, p=.80, times=20)

# Define cross-validation method
tr_field <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_field,
                        preProcOptions=list(center=T,scale=T),
                        selectionFunction = "oneSE")

# Random Forest (RF)

rfmod_field <- train(form2_field, data=all_data_mod_field, method="rf",metric="RMSE",
                    trControl=tr_field,ntrees=1000,
                    importance=TRUE)
arg_rf_field <- rfmod_field$results[is.element(rownames(rfmod_field$results),
                                             rownames(rfmod_field$bestTune)),]

# Predicted vs observed (Figure 1a)

pred_rf_field <- predict(rfmod_field, all_data_mod_field) # Predict Q10

# Combine predicted and observed Q10 data

pred_rf_field <- data.frame(pred = pred_rf_field, Q10 = all_data_mod_field$Q10)

# Plot the results

rfplot_PCA_field <- ggplot(data = pred_rf_field, aes(y=pred, x= Q10))+
  geom_point()+
  geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line
  geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line
  annotate("text",label = 'b)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)+
  xlim(1,4.5)+ylim(1,4.5)+theme_bw()+
  labs(x="Q10 observed",y="Q10 predicted")+
  annotate("text",label=paste("RMSE = ",round(arg_rf_field$RMSE,2),"\nrRMSE = ",
                              round(arg_rf_field$RMSE/mean(Q10)*100,0),"%","\nR� = ",
                              round(arg_rf_field$Rsquared,2)),
           x=1,y=4.3,hjust=0,size=5)+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14))

## Lab
# Define data frames for PCA
pred.wI_lab <- df_euro_Lab[,c(3,5:19,21:23)]

pred.wI_lab$'CN:P' <- pred.wI_lab$CN/pred.wI_lab$P
pred.wI_lab$'Clay:MAT' <- pred.wI_lab$MAT/pred.wI_lab$Clay
pred.wI_lab$'PET:NDVI' <- pred.wI_lab$PET/pred.wI_lab$NDVI
pred.wI_lab$'MAP:PET' <- pred.wI_lab$MAP/pred.wI_lab$PET
pred.wI_lab$PET <- NULL
pred.wI_lab$'Bsat:CEC' <- pred.wI_lab$Bsat/pred.wI_lab$CEC
pred.wI_lab$'Bsat:pH' <- pred.wI_lab$Bsat/pred.wI_lab$pH
pred.wI_lab$'CEC:Clay' <- pred.wI_lab$CEC/pred.wI_lab$Clay
pred.wI_lab$'MAT:MAP' <- pred.wI_lab$MAT/pred.wI_lab$MAP

sdx_lab <- as.matrix(pred.wI_lab)
nc_lab <- ncol(sdx_lab)
nr_lab <- nrow(sdx_lab)
rmin_lab <- 1
rmax_lab <- nc_lab
sdx_lab <- scale(sdx_lab)
cx_lab <- cov(sdx_lab) # Covariance Matrix
ea_lab <- svd(cx_lab, nv=0)
nopc_lab <- ncol(ea_lab$u)
sdevalues_lab <- matrix(rep(sqrt(ea_lab$d), nopc_lab), nrow = nopc_lab, ncol=nopc_lab,byrow = TRUE)
loadings_lab <- ea_lab$u*sdevalues_lab
scores_lab <- sdx_lab %*% ea_lab$u
scmx_lab <- matrix(rep(colMeans(scores_lab), nr_lab), nrow = nr_lab, ncol = nc_lab, byrow = TRUE) 
scsx_lab <- matrix(rep(sd(scores_lab[,]), nr_lab), nrow = nr_lab, ncol = nc_lab, byrow = TRUE) 
scsdx_lab <- (scores_lab-scmx_lab)/scsx_lab
scsdx_lab <- scsdx_lab[1:nr_lab,1:nc_lab]
varex_lab <- ea_lab$d / sum(ea_lab$d) * 100
pc_or_lab <- as.data.frame(matrix(NA, ncol=4, nrow=rmax_lab))
colnames(pc_or_lab) <- c("PC","EV","Variance","cumvar")
for(i in 1:rmax_lab){
  pc_or_lab[i,1] <- i
  pc_or_lab[i,2] <- round(ea_lab$d[i],3)
  pc_or_lab[i,3] <- round(varex_lab[i],3)
  pc_or_lab[i,4] <- round(sum(varex_lab[seq(1,i)]),3)
}
pc_or_lab$EV <- as.numeric(pc_or_lab$EV)

# Rotation of PCA with VARIMAX

vm_lab <- varimax(loadings_lab[,1:8],normalize=TRUE)
vm_lab$rotmat <- vm_lab$rotmat[,order(colSums(vm_lab$loadings^2), decreasing = TRUE)]
rloadings_lab <- loadings_lab[,1:8] %*% vm_lab$rotmat
library(corpcor)
rcoef_lab <- pseudoinverse(as.matrix(cx_lab)) %*% rloadings_lab
rscores_lab <- as.matrix(sdx_lab) %*% rcoef_lab
rscmx_lab <- matrix(rep(colMeans(rscores_lab), nrow(rscores_lab)), nrow = nrow(rscores_lab), 
                      ncol = ncol(rscores_lab), byrow = TRUE) 
rscsx_lab <- matrix(rep(sd(rscores_lab[,]), nrow(rscores_lab)), nrow = nrow(rscores_lab), 
                      ncol = ncol(rscores_lab), byrow = TRUE) 
rscsdx_lab <- (rscores_lab-rscmx_lab)/rscsx_lab
rscores_lab <- rscsdx_lab[1:nrow(rscores_lab),1:8]
rvalues_lab <- colSums(rloadings_lab^2)
vmt_lab <- varimax(loadings_lab, normalize=TRUE)
vmtloadings_lab <- loadings_lab %*% vmt_lab$rotmat
allrvalues_lab <- colSums(vmtloadings_lab^2)
rvarex_lab <- rvalues_lab / sum(allrvalues_lab) * 100

pc_wr_lab <- as.data.frame(matrix(NA, ncol=4, nrow=8))
colnames(pc_wr_lab) <- c("PC", "Eigenvalue", "Variance", "Cumulative")
for(i in 1:8){
  pc_wr_lab[i,1] <- i
  pc_wr_lab[i,2] <- round(rvalues_lab[i],3)
  pc_wr_lab[i,3] <- round(rvarex_lab[i], 3)
  pc_wr_lab[i,4] <- round(sum(rvarex_lab[seq(1,i)]),3)
}
library(lessR)
colnames(rscores_lab) <- to("PC", ncol(rscores_lab),same.size=FALSE)
colnames(rloadings_lab) <- to("PC", ncol(rloadings_lab),same.size=FALSE)
row.names(rloadings_lab) <- colnames(pred.wI_lab)
pca_wr.new_lab <- data.frame(t(pc_wr_lab[,-1]))
colnames(pca_wr.new_lab) <- to("PC", ncol(pca_wr.new_lab),same.size=FALSE)
pca_wr.new_lab <- do.call(cbind,lapply(pca_wr.new_lab,function(x)as.numeric(as.character(x))))
pca.allbind_lab <- rbind.data.frame(pca_wr.new_lab, round(rloadings_lab,3))
row.names(pca.allbind_lab)[1:3] <- c("Eigenvalue", "Variability (%)", "Cumulative (%)")

Q10_lab <- df_euro_Lab$Q10
all_data_mod_lab <- cbind.data.frame(Q10_lab,rscores_lab)
colnames(all_data_mod_lab)[1] <- "Q10"
n_lab <- to("PC", ncol(rscores_lab),same.size=FALSE)

# Define formula
form2_lab <- as.formula(paste("Q10~",paste(n_lab, collapse='+')))

library(caret);library(randomForest)
# Define Resampling method
dp_lab <- createDataPartition(all_data_mod_lab$Q10, p=.80, times=20)

# Define cross-validation method
tr_lab <- trainControl(method = "LGOCV",p=0.8, number=100,index=dp_lab,
                         preProcOptions=list(center=T,scale=T),
                         selectionFunction = "oneSE")

# Random Forest (RF)

rfmod_lab <- train(form2_lab, data=all_data_mod_lab, method="rf",metric="RMSE",
                     trControl=tr_lab,ntrees=1000,
                     importance=TRUE)
arg_rf_lab <- rfmod_lab$results[is.element(rownames(rfmod_lab$results),
                                               rownames(rfmod_lab$bestTune)),]

# Predicted vs observed (Figure 1a)

pred_rf_lab <- predict(rfmod_lab, all_data_mod_lab) # Predict Q10

# Combine predicted and observed Q10 data

pred_rf_lab <- data.frame(pred = pred_rf_lab, Q10 = all_data_mod_lab$Q10)

# Plot the results

rfplot_PCA_lab <- ggplot(data = pred_rf_lab, aes(y=pred, x= Q10))+
  geom_point()+
  geom_abline(intercept=0,slope=1,size=1.5,color="red")+ # 1:1-Line
  geom_smooth(method='lm',formula=y~x,level=0.99)+ # Regression line
  annotate("text",label = 'c)', x = Inf, y = -Inf, 
           hjust = 1.25, vjust = -0.25,size=5)+
  xlim(1,4.5)+ylim(1,4.5)+theme_bw()+
  labs(x="Q10 observed",y="Q10 predicted")+
  annotate("text",label=paste("RMSE = ",round(arg_rf_lab$RMSE,2),"\nrRMSE = ",
                              round(arg_rf_lab$RMSE/mean(Q10)*100,0),"%","\nR� = ",
                              round(arg_rf_lab$Rsquared,2)),
           x=1,y=4.3,hjust=0,size=5)+
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14))

# Arrange plots
library(gridExtra)

euro_plot_fig <- arrangeGrob(rfplot_PCA_euro,rfplot_PCA_field,rfplot_PCA_lab ,nrow=1)

# Save plots
# with experiment data
ggsave("euro_mod_lf.pdf",euro_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )
ggsave("euro_mod_lf.tiff",euro_plot_fig, 
       width=42,height=17, unit="cm",dpi=600 )

##############################################################################
# Analyses of soil taxonomy
library(Rmisc); library(ggplot2); library(gridExtra)
library(pgirmess); library(multcompView)
# Load table with Q10 and soil taxonomy data

q10_tax <- read.csv("E:/Masterarbeit/Statistik/Raster/Soil_Taxonomy/Q10_soiltaxo.csv",header=TRUE,sep=";")

q10_tax2 <- q10_tax[,c(1,13,17,35,36)]

legend <- read.csv("E:/Masterarbeit/Statistik/Raster/Soil_Taxonomy/Taxo_legend.csv",header=TRUE,sep=";",stringsAsFactors = FALSE,na.string="")
# Reclassify the subgroup to bigger groups
q10_tax2$USDA <- legend$USDA[match(q10_tax2$TAXOUSDA,legend$Number_USDA)]
q10_tax2$WRB<- legend$WRB[match(q10_tax2$TAXNWRB,legend$Number_WRB)]

# USDA
kmc_usda <- kruskalmc(Q10~USDA,data=q10_tax2)
test_usda <- kmc_usda$dif.com$difference 
names(test_usda) <- row.names(kmc_usda$dif.com)

usda_sum <- summarySE(q10_tax2, measurevar="Q10", groupvars="USDA")
# Define Letters

let_usda <- multcompLetters(test_usda, compare="<", threshold=0.05,
                            Letters=c(letters, LETTERS, "."),
                            reversed = FALSE)
letters_usda <- data.frame(let_usda$Letters)

# Define text for figure

letters_usda$Zones <- row.names(letters_usda)

usda_all <- merge(usda_sum,letters_usda,by.x = "USDA",
                  by.y = "Zones")

# WRB
kmc_wrb <- kruskalmc(Q10~WRB,data=q10_tax2)
test_wrb <- kmc_wrb$dif.com$difference 
names(test_wrb) <- row.names(kmc_wrb$dif.com)

wrb_sum <- summarySE(q10_tax2, measurevar="Q10", groupvars="WRB")
# Define Letters

let_wrb <- multcompLetters(test_wrb, compare="<", threshold=0.05,
                           Letters=c(letters, LETTERS, "."),
                           reversed = FALSE)
letters_wrb <- data.frame(let_wrb$Letters)

# Define text for figure

letters_wrb$Zones <- row.names(letters_wrb)

wrb_all <- merge(wrb_sum,letters_wrb,by.x = "WRB",
                 by.y = "Zones")

# Landnutzungen
q10_tax2$Vegetation <- as.character(q10_tax2$Vegetation)
# 1 Forest

q10_taxfor <- q10_tax2[which(q10_tax2$Vegetation=="forest"),]

# USDA
kmc_usda_for <- kruskalmc(Q10~USDA,data=q10_taxfor)
test_usda_for <- kmc_usda_for$dif.com$difference 
names(test_usda_for) <- row.names(kmc_usda_for$dif.com)

usda_sum_for <- summarySE(q10_taxfor, measurevar="Q10", groupvars="USDA")
# Define Letters

let_usda_for <- multcompLetters(test_usda_for, compare="<", threshold=0.05,
                                Letters=c(letters, LETTERS, "."),
                                reversed = FALSE)
letters_usda_for <- data.frame(let_usda_for$Letters)

# Define text for figure

letters_usda_for$Zones <- row.names(letters_usda_for)

usda_for <- merge(usda_sum_for,letters_usda_for,by.x = "USDA",
                  by.y = "Zones")

# WRB
kmc_wrb_for <- kruskalmc(Q10~WRB,data=q10_taxfor)
test_wrb_for <- kmc_wrb_for$dif.com$difference 
names(test_wrb_for) <- row.names(kmc_wrb_for$dif.com)

wrb_sum_for <- summarySE(q10_taxfor, measurevar="Q10", groupvars="WRB")
# Define Letters

let_wrb_for <- multcompLetters(test_wrb_for, compare="<", threshold=0.05,
                               Letters=c(letters, LETTERS, "."),
                               reversed = FALSE)
letters_wrb_for <- data.frame(let_wrb_for$Letters)

# Define text for figure

letters_wrb_for$Zones <- row.names(letters_wrb_for)

wrb_for <- merge(wrb_sum_for,letters_wrb_for,by.x = "WRB",
                 by.y = "Zones")
# 2 Grassland

q10_taxgras <- q10_tax2[which(q10_tax2$Vegetation=="grassland"),]

# USDA
kmc_usda_gras <- kruskalmc(Q10~USDA,data=q10_taxgras)
test_usda_gras <- kmc_usda_gras$dif.com$difference 
names(test_usda_gras) <- row.names(kmc_usda_gras$dif.com)

usda_sum_gras <- summarySE(q10_taxgras, measurevar="Q10", groupvars="USDA")
# Define Letters

let_usda_gras <- multcompLetters(test_usda_gras, compare="<", threshold=0.05,
                                 Letters=c(letters, LETTERS, "."),
                                 reversed = FALSE)
letters_usda_gras <- data.frame(let_usda_gras$Letters)

# Define text gras figure

letters_usda_gras$Zones <- row.names(letters_usda_gras)

usda_gras <- merge(usda_sum_gras,letters_usda_gras,by.x = "USDA",
                   by.y = "Zones")

# WRB
kmc_wrb_gras <- kruskalmc(Q10~WRB,data=q10_taxgras)
test_wrb_gras <- kmc_wrb_gras$dif.com$difference 
names(test_wrb_gras) <- row.names(kmc_wrb_gras$dif.com)

wrb_sum_gras <- summarySE(q10_taxgras, measurevar="Q10", groupvars="WRB")
# Define Letters

let_wrb_gras <- multcompLetters(test_wrb_gras, compare="<", threshold=0.05,
                                Letters=c(letters, LETTERS, "."),
                                reversed = FALSE)
letters_wrb_gras <- data.frame(let_wrb_gras$Letters)

# Define text gras figure

letters_wrb_gras$Zones <- row.names(letters_wrb_gras)

wrb_gras <- merge(wrb_sum_gras,letters_wrb_gras,by.x = "WRB",
                  by.y = "Zones")

# 3 Agriculture

q10_taxagri <- q10_tax2[which(q10_tax2$Vegetation=="agriculture"),]

# USDA
kmc_usda_agri <- kruskalmc(Q10~USDA,data=q10_taxagri)
test_usda_agri <- kmc_usda_agri$dif.com$difference 
names(test_usda_agri) <- row.names(kmc_usda_agri$dif.com)

usda_sum_agri <- summarySE(q10_taxagri, measurevar="Q10", groupvars="USDA")
# Define Letters

let_usda_agri <- multcompLetters(test_usda_agri, compare="<", threshold=0.05,
                                 Letters=c(letters, LETTERS, "."),
                                 reversed = FALSE)
letters_usda_agri <- data.frame(let_usda_agri$Letters)

# Define text agri figure

letters_usda_agri$Zones <- row.names(letters_usda_agri)

usda_agri <- merge(usda_sum_agri,letters_usda_agri,by.x = "USDA",
                   by.y = "Zones")
# WRB
kmc_wrb_agri <- kruskalmc(Q10~WRB,data=q10_taxagri)
test_wrb_agri <- kmc_wrb_agri$dif.com$difference 
names(test_wrb_agri) <- row.names(kmc_wrb_agri$dif.com)

wrb_sum_agri <- summarySE(q10_taxagri, measurevar="Q10", groupvars="WRB")
# Define Letters

let_wrb_agri <- multcompLetters(test_wrb_agri, compare="<", threshold=0.05,
                                Letters=c(letters, LETTERS, "."),
                                reversed = FALSE)
letters_wrb_agri <- data.frame(let_wrb_agri$Letters)

# Define text agri figure

letters_wrb_agri$Zones <- row.names(letters_wrb_agri)

wrb_agri <- merge(wrb_sum_agri,letters_wrb_agri,by.x = "WRB",
                  by.y = "Zones")