#PREPROCESSING # Set directory setwd("DIR RAW DATA/DATA") # Get data library(GEOquery) # Get supplemental files getGEOSuppFiles("GSE80081") # Download normalised data for pheno data data1 <- getGEO("GSE80081");data1=data1[[1]]; # Extract pheno data pheno1=pData(data1) setwd("DIR RAW DATA/DATA/GSE80081") write.table(pheno1,"GSE80081_pheno.txt", sep = '\t') # -> Unpack data in windows explorer ################ setwd("DIR RAW DATA/DATA/GSE80081") library(limma) #Annotation: is in BGX file # raw data: is in IDAT files bgxfile = dir(pattern="bgx") idatfiles = dir(pattern="idat") x <- read.idat(idatfiles, bgxfile) x.norm <- neqc(x) ############### #### Determine mean of each gene #### x.normE=x.norm$E SYMBOL=x.norm$genes$Symbol y.norm=aggregate(x.normE,by=list(SYMBOL),FUN="mean") SYMBOL=y.norm$Group.1 y.norm=y.norm[,2:7] # PCA plot jpeg('pca.jpg') color=c('Orange','Orange','Orange','Red','Red','Red') data.PC = prcomp(t(y.norm),scale.=TRUE) plot(data.PC$x,col=color);dev.off() # scale individual expression data scaled=data.frame(scale(y.norm)) # Analyse data # create sample source data SKR=control, RXF=cachectic samples=c("SKR","SKR","SKR","RXF","RXF","RXF") # Set names for scaled values colnames(scaled) <- samples # convert into factors samples <- as.factor(samples) # set up the experimental design design <- model.matrix(~0 + samples) colnames(design) <- levels(samples) # fit the linear model to the filtered expression set fit <- lmFit(y.norm, design) # set up a contrast matrix to compare different groups contrast.matrix <- makeContrasts( Tumor = RXF - SKR, levels=design) # Now the contrast matrix is combined with the per-probeset linear model fit. tumor_fits <- contrasts.fit(fit, contrast.matrix) tumor_ebFit <- eBayes(tumor_fits) source("IBMT.R") tumor_ibmtFit = IBMT(tumor_ebFit) # IBMT correctie tumor_dfFit = data.frame(tumor_ibmtFit) # Omzetten naar data frame fdr.fun = function(x) p.adjust(x, method = 'fdr') # FDR adj.p = apply(tumor_dfFit[c("p.value")], 2, fdr.fun) # FDR apply on p-values colnames(adj.p) = paste('adj.', colnames(adj.p), sep = '') # rename adj.p.imbt = apply(tumor_dfFit[c("IBMT.p")], 2, fdr.fun) # FDR apply on IBMT p-values colnames(adj.p.imbt) = paste('adj.', colnames(adj.p.imbt), sep = '') tumor_dfFit_adj = cbind(tumor_dfFit, adj.p, adj.p.imbt) # bind columns resultsdf <- cbind(tumor_dfFit_adj,SYMBOL, scaled) setwd("DIR PROCESSED DATA/DATA/") write.table(resultsdf,"GSE80081_genemean.txt", sep = '\t')