diff --git a/tools/microarray/R/calculate-fold-change.R b/tools/microarray/R/calculate-fold-change.R index b4a744f59..5af588e04 100644 --- a/tools/microarray/R/calculate-fold-change.R +++ b/tools/microarray/R/calculate-fold-change.R @@ -1,28 +1,32 @@ -# TOOL calculate-fold-change.R: "Calculate fold change" (Calculates a geometric average of gene expression for replicate chips and then -# calculates a difference or ratio between the averages. The output fold change will be represented in log2 scale. +# TOOL calculate-fold-change.R: "Calculate fold change" (Calculates a geometric or arithmetic average of gene expression for replicate chips and then +# calculates a difference or ratio between the averages. The output fold change can be represented either in log2 or linear scale. # Note that the tool is applicable only if you have defined two groups of samples, and the reference group is determined by the smaller group number.) # INPUT normalized.tsv: normalized.tsv TYPE GENE_EXPRS # INPUT META phenodata.tsv: phenodata.tsv TYPE GENERIC # OUTPUT fold-change.tsv: fold-change.tsv # PARAMETER column: Column TYPE METACOLUMN_SEL DEFAULT group (Phenodata column describing the groups. Samples of which group attribute is NA are excluded from the analysis.) +# PARAMETER geometric: "Which mean to use" TYPE [geometric: geometric, arithmetic: arithmetic] DEFAULT geometric (Should the geometric or arithmetic mean be used in the calculation of average expression for the sample groups?) +# PARAMETER outscale: Scale TYPE [log2: log2, linear: linear] DEFAULT log2 (What scale to use for expressing the results. Log2 yields a symmetric distribution around zero with no change being equal to 0, up-regulation taking positive values and down-regulation negative values. Conversely, in linear scale up-regulation is represented by values greater than 1 and down-regulation values being between 0 and 1.) # JTT 30.7.2007, Calculate fold changes between groups of samples # MG, 3.5.2011, added parameters for choosing aritmetic or geometric mean and for choosing linear or log scale # OH, 30.11.2011, support for names as group categories # MG, 30.11.2011, added parameter do define reference group # ML, 18.10.2016 -# ES, 27.06.2021 set mean to geometric and scale to log2 - # PARAMETER OPTIONAL geometric: "Which mean to use" TYPE [yes:geometric, no:arithmetic] DEFAULT yes (Should the geometric or arithmetic mean be used in the calculation of average expression for the sample groups?) - # PARAMETER OPTIONAL scale: Scale TYPE [log2, linear] DEFAULT log2 (What scale to use for expressing the results. Log2 yields a symmetric distribution around zero with no change being equal to 0, up-regulation taking positive values and down-regulation negative values. Conversely, in linear scale - # up-regulation is represented by values greater than 1 and down-regulation values being between 0 and 1.) +# OH, 09.09.2021, additional parameters for phenodata read.table +# OH, 09.09.2021, option geometric or arithmetic mean and option output log2 or linear + +if( ! exists("geometric", mode="character") ) { + geometric <- "geometric" +} +if( ! exists("outscale", mode="character") ) { + outscale <- "log2" +} - -geometric<- "yes" -scale<- "log2" # Loads the normalized data and phenodata file<-c("normalized.tsv") dat<-read.table(file, header=T, sep="\t", row.names=1) -phenodata<-read.table("phenodata.tsv", header=T, sep="\t") +phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='') #remove rows that are NAs na_samples <- phenodata[which(is.na(phenodata[,pmatch(column,colnames(phenodata))])), 1] @@ -46,10 +50,10 @@ if(length(unique(groups))>2) { stop("CHIPSTER-NOTE: You have more than two groups! I don't know how to calculate fold change.") } -# If arithmetic mean, then transform values to linear scale, arithmetic option removed -#if (geometric == "no") { - #"dat2 <- as.data.frame (2^dat2) -#} +# If arithmetic mean, then transform values to linear scale +if (geometric == "arithmetic") { + dat2 <- as.data.frame (2^dat2) +} # Calculating averages columnnames<-c() @@ -70,21 +74,21 @@ rownames(dat3)<-rownames(dat2) # Calculating the fold change # Treatment divided by the control -if (geometric == "yes") { +if (geometric == "geometric") { FC <- dat3[,2]-dat3[,1] -} #else { - #FC <- dat3[,2] / dat3 [,} - +} else { + FC <- dat3[,2] / dat3[,1] +} -# If arithmetic mean and log2 scale, then transform values back to log2 scale, arithmetic mean removed -#if (geometric == "no" && scale == "log2") { -# FC <- log2(FC) -#} +# If arithmetic mean and log2 scale, then transform values back to log2 scale +if (geometric == "arithmetic" && outscale == "log2") { + FC <- log2(FC) +} -# If geometric mean and linear scale, then transform values to linear scale, linear scale removed -#if (geometric == "yes" && scale == "linear") { -# FC <- 2^FC -#} +# If geometric mean and linear scale, then transform values to linear scale +if (geometric == "geometric" && outscale == "linear") { + FC <- 2^FC +} # Saving the results write.table(data.frame(dat, FC=FC), file="fold-change.tsv", sep="\t", row.names=T, col.names=T, quote=F)