################################################## # R code for analysis of CERES dataset ## # ## ################################################## library(readxl) library(gplots) #library(car) library(MASS) library(klaR) #library(DAAG) #library(ggfortify) # Read data data1 <- read_excel("F:/Hohenheim_Feb_2021/Hohenheim_May_2019/Dataset/CERES/Summary 2018-19-7.xlsx", sheet = "Data") var <- c("...1", "Assessm", "1subcen", "2subrat2", "3maxcen", "4maxrat3", "5sumcen", "6sumrat2") data <- data1[,var] # correct clas for sample 5 and 14 data[data$...1==5,]$Assessm <- "Drift" data[data$...1==14,]$Assessm <- "Drift" # PCA d <- data.frame(Assessm = data$Assessm, scale(data[,c(3:8)])) d$Assessm <- as.factor(d$Assessm) heatmap.2(as.matrix(d[,2:7]), density.info="none", trace="none", xlab = "Variables", ylab="", labRow = paste(row.names(d), d[,1], sep = " ") , cexCol = 1, cexRow = 0.7, margins = c(4,5), srtCol = 0, key.title = "", keysize = 1) pc <- prcomp(d[,2:7]) plot(pc) summary(pc) biplot(pc, col=c("red", "green", "blue")) p <- autoplot(pc, data = d, colour = 'Assessm', size=0, type=19, label=T, label.size=5, label.show.legend=T, asp = 1, loadings=T, loadings.colour="grey", loadings.label=T, loadings.label.colour='grey', loadings.label.vjust = 0.95, loadings.label.repel=T) p + theme(legend.position = "none") p + scale_y_continuous(limits=c(-0.75, 0.40), breaks=seq(-0.75, 0.5, 0.25)) + scale_x_continuous(limits=c(-0.55, 0.25), breaks=seq(-0.50, 0.25, 0.25)) + theme_bw() + theme(legend.position = "bottom", panel.grid.minor.y = element_blank() , panel.grid.minor.x = element_blank(), panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank() ) # ANOVA data$Assessm[data$Assessm == "Unclear"] <- "Drift" data$Assessm <- as.factor(data$Assessm) names(data) <- c("Assessm", "v1", "v2", "v3", "v4", "v5", "v6") anova(lm(v3 ~ Assessm, data=data)) ########## lda ################### d <- data names(d) <- c("samp", "Assessm", "X1subcen", "X2subrat2", "X3maxcen", "X4maxrat3", "X5sumcen", "X6sumrat2") train <- subset(d, Assessm != "Unclear") train <- train[-c(42, 48),] test <- subset(d, Assessm == "Unclear") test <- rbind(d[c(42, 48),], test) train$Assessm <- as.factor(train$Assessm) test$Assessm <- as.factor(test$Assessm) lda1 <- lda(Assessm ~ X2subrat2 + X3maxcen + X4maxrat3 + X6sumrat2, data=train) confusion(train$Assessm, predict(lda1)$class) errormatrix(train$Assessm, predict(lda1, train)$class) plot(lda1) partimat(data=train, Assessm ~ X2subrat2 + X3maxcen + X4maxrat3 + X6sumrat2, method = "lda", plot.matrix = TRUE, imageplot = T) # CV lda2 <- lda(Assessm ~ X2subrat2 + X3maxcen + X4maxrat3 + X6sumrat2, data=train, CV=T) confusion(train$Assessm , lda2$class) errormatrix(train$Assessm, lda2$class)