chipster · oheil · Sep 3, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 8, 2021
diff --git a/tools/microarray/R/average-replicates.R b/tools/microarray/R/average-replicates.R
@@ -13,11 +13,12 @@
 # rewritten by IS, 6.9.2010
 # modified by IS, 16.2.2011
 # modified by IS,12.10.2012
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # load inputs
 file <- 'normalized.tsv'
 dat <- read.table(file, header=TRUE, sep='\t', quote='', row.names=1, check.names=FALSE)
-phenodata <- read.table('phenodata.tsv', header=TRUE, sep='\t', as.is=TRUE)
+phenodata <- read.table('phenodata.tsv', header=TRUE, sep='\t', quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # identify replicates
 replicates <- unique(phenodata[duplicated(phenodata[,column]), column])

diff --git a/tools/microarray/R/calculate-fold-change.R b/tools/microarray/R/calculate-fold-change.R
@@ -1,28 +1,32 @@
-# TOOL calculate-fold-change.R: "Calculate fold change" (Calculates a geometric average of gene expression for replicate chips and then
-# calculates a difference or ratio between the averages. The output fold change will be represented in log2 scale. 
+# TOOL calculate-fold-change.R: "Calculate fold change" (Calculates a geometric or arithmetic average of gene expression for replicate chips and then
+# calculates a difference or ratio between the averages. The output fold change can be represented either in log2 or linear scale. 
 # Note that the tool is applicable only if you have defined two groups of samples, and the reference group is determined by the smaller group number.)
 # INPUT normalized.tsv: normalized.tsv TYPE GENE_EXPRS 
 # INPUT META phenodata.tsv: phenodata.tsv TYPE GENERIC 
 # OUTPUT fold-change.tsv: fold-change.tsv 
 # PARAMETER column: Column TYPE METACOLUMN_SEL DEFAULT group (Phenodata column describing the groups. Samples of which group attribute is NA are excluded from the analysis.)
+# PARAMETER geometric: "Which mean to use" TYPE [geometric: geometric, arithmetic: arithmetic] DEFAULT geometric (Should the geometric or arithmetic mean be used in the calculation of average expression for the sample groups?)
+# PARAMETER outscale: Scale TYPE [log2: log2, linear: linear] DEFAULT log2 (What scale to use for expressing the results. Log2 yields a symmetric distribution around zero with no change being equal to 0, up-regulation taking positive values and down-regulation negative values. Conversely, in linear scale up-regulation is represented by values greater than 1 and down-regulation values being between 0 and 1.)
 
 # JTT 30.7.2007, Calculate fold changes between groups of samples
 # MG, 3.5.2011, added parameters for choosing aritmetic or geometric mean and for choosing linear or log scale
 # OH, 30.11.2011, support for names as group categories
 # MG, 30.11.2011, added parameter do define reference group
 # ML, 18.10.2016
-# ES, 27.06.2021 set mean to geometric and scale to log2
-	# PARAMETER OPTIONAL geometric: "Which mean to use" TYPE [yes:geometric, no:arithmetic] DEFAULT yes (Should the geometric or arithmetic mean be used in the calculation of average expression for the sample groups?)
-	# PARAMETER OPTIONAL scale: Scale TYPE [log2, linear] DEFAULT log2 (What scale to use for expressing the results. Log2 yields a symmetric distribution around zero with no change being equal to 0, up-regulation taking positive values and down-regulation negative values. Conversely, in linear scale 
-	# up-regulation is represented by values greater than 1 and down-regulation values being between 0 and 1.)
+# OH, 09.09.2021, additional parameters for phenodata read.table
+# OH, 09.09.2021, option geometric or arithmetic mean and option output log2 or linear
+
+if( ! exists("geometric", mode="character") ) {
+	geometric <- "geometric"
+}
+if( ! exists("outscale", mode="character") ) {
+	outscale <- "log2"
+}
 
-
-geometric<- "yes"
-scale<- "log2"
 # Loads the normalized data and phenodata
 file<-c("normalized.tsv")
 dat<-read.table(file, header=T, sep="\t", row.names=1)
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 #remove rows that are NAs
 na_samples <- phenodata[which(is.na(phenodata[,pmatch(column,colnames(phenodata))])), 1]
@@ -46,10 +50,10 @@ if(length(unique(groups))>2) {
 	stop("CHIPSTER-NOTE: You have more than two groups! I don't know how to calculate fold change.")
 }
 
-# If arithmetic mean, then transform values to linear scale, arithmetic option removed
-#if (geometric == "no") {
-	#"dat2 <- as.data.frame (2^dat2)
-#}
+# If arithmetic mean, then transform values to linear scale
+if (geometric == "arithmetic") {
+	dat2 <- as.data.frame (2^dat2)
+}
 
 # Calculating averages
 columnnames<-c()
@@ -70,21 +74,21 @@ rownames(dat3)<-rownames(dat2)
 
 # Calculating the fold change
 # Treatment divided by the control
-if (geometric == "yes") {
+if (geometric == "geometric") {
 	FC <- dat3[,2]-dat3[,1]
-} #else {
-	#FC <- dat3[,2] / dat3 [,}
-
+} else {
+	FC <- dat3[,2] / dat3 [,1]
+}
 
-# If arithmetic mean and log2 scale, then transform values back to log2 scale, arithmetic mean removed 
-#if (geometric == "no" && scale == "log2") {
-#	FC <- log2(FC)
-#}
+# If arithmetic mean and log2 scale, then transform values back to log2 scale
+if (geometric == "arithmetic" && outscale == "log2") {
+	FC <- log2(FC)
+}
 
-# If geometric mean and linear scale, then transform values to linear scale,  linear scale removed
-#if (geometric == "yes" && scale == "linear") {
-#	FC <- 2^FC
-#}
+# If geometric mean and linear scale, then transform values to linear scale
+if (geometric == "geometric" && outscale == "linear") {
+	FC <- 2^FC
+}
 
 # Saving the results
 write.table(data.frame(dat, FC=FC), file="fold-change.tsv", sep="\t", row.names=T, col.names=T, quote=F)

diff --git a/tools/microarray/R/classification-knn.R b/tools/microarray/R/classification-knn.R
@@ -10,6 +10,8 @@
 
 # JTT 26.6.2006: KNN classification created
 # MK 02.07.2013: Bugs fixed, added group and traning column selection to the header section
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
+
 
 # Parameter settings (default) for testing purposes
 #number.of.nearest.neighbors<-2
@@ -29,7 +31,7 @@ file<-c("normalized.tsv")
 dat<-read.table(file, sep="\t", header=T, row.names=1)
 
 # Reads the phenodata table
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Checks whether the training variable is present if phenodata
 # If training part of phenodata has not been filled, but the column (header) is present, 

diff --git a/tools/microarray/R/classification.R b/tools/microarray/R/classification.R
@@ -16,6 +16,7 @@
 # JTT 22.01.2009
 # MK 18.06.2013 Chip-prediction table added to the output
 # Prediction has not yet been implemented!
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Parameter settings (default) for testing purposes
 #method<-"knn"
@@ -43,7 +44,7 @@ if(nrow(dat) > max.genes) {
 }
 
 # Reads the phenodata table
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Separates expression values and flags
 calls<-dat[,grep("flag", names(dat))]

diff --git a/tools/microarray/R/correlation-analysis-mirna.R b/tools/microarray/R/correlation-analysis-mirna.R
@@ -17,6 +17,7 @@
 # IS, 8.6.2010, bugfix
 # IS, 28.7.2010, now allows additional samples in the two data sets, but only those ones with a pair are used in the analysis
 # MG, 10.2.2011, now gets the gene symbols from the org.HS.eg.db package and adds rownames the output tables to allow use of Venn diagram
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Loads the libraries
 library(RmiR)
@@ -25,8 +26,8 @@ library(org.Hs.eg.db)
 # Loads the normalized data and phenodata files
 data_1 <- read.table(file="normalized_mirna.tsv", header=T, sep="\t", row.names=1)
 data_2 <- read.table(file="normalized_gene.tsv", header=T, sep="\t", row.names=1)
-phenodata_1 <- read.table("phenodata_mirna.tsv", header=T, sep="\t")
-phenodata_2 <- read.table("phenodata_gene.tsv", header=T, sep="\t")
+phenodata_1 <- read.table("phenodata_mirna.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
+phenodata_2 <- read.table("phenodata_gene.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Figure out which is the miRNA data
 if (phenodata_1$chiptype[1] == "miRNA") {
@@ -150,4 +151,4 @@ results.negative.significant <- results.negative[results.negative[,5]<=p.value.t
 write.table(results.positive.significant, file="mirna-gene-positive-correlation.tsv", sep="\t", quote=FALSE, row.names=TRUE)
 write.table(results.negative.significant, file="mirna-gene-negative-correlation.tsv", sep="\t", quote=FALSE, row.names=TRUE)
 
-# EOF
+# EOF
diff --git a/tools/microarray/R/delete-and-subtract-columns.R b/tools/microarray/R/delete-and-subtract-columns.R
@@ -8,6 +8,7 @@
 
 # Deletes columns from data and subtacts their (average values) from other columns which they are associated with
 # MK 13.08.2013
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Sanity checks
 if(column1=="EMPTY" || column2=="EMPTY") {
@@ -17,7 +18,7 @@ if(column1=="EMPTY" || column2=="EMPTY") {
 # Loads the data
 file<-c("normalized.tsv")
 dat<-read.table(file, sep="\t", header=T, row.names=1)
-phenodata <- read.table('phenodata.tsv', header=TRUE, sep='\t', as.is=TRUE)
+phenodata <- read.table('phenodata.tsv', header=TRUE, sep='\t', quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # identify matrices (chip, flag, segmented, ...) present in the data
 datnames <- colnames(dat)

diff --git a/tools/microarray/R/merge-datasets.R b/tools/microarray/R/merge-datasets.R
@@ -9,6 +9,8 @@
 # Ilari Scheinin <[email protected]>
 # 2012-10-12
 
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
+
 # read data set 1
 file1 <- 'normalized_1.tsv'
 dat <- read.table(file1, header=TRUE, sep='\t', quote='', row.names=1, check.names=FALSE)
@@ -60,8 +62,8 @@ if (ncol(ratios) == ncol(calls))
   dat <- cbind(dat, calls)
 
 # process phenodata
-phenodata1 <- read.table('phenodata_1.tsv', header=TRUE, sep='\t', as.is=TRUE)
-phenodata2 <- read.table('phenodata_2.tsv', header=TRUE, sep='\t', as.is=TRUE)
+phenodata1 <- read.table('phenodata_1.tsv', header=TRUE, sep='\t', quote='', as.is=TRUE, check.names=FALSE, comment.char='')
+phenodata2 <- read.table('phenodata_2.tsv', header=TRUE, sep='\t', quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # fill in columns present only in one phenodata table
 for (col in setdiff(colnames(phenodata1), colnames(phenodata2)))

diff --git a/tools/microarray/R/norm-combat.R b/tools/microarray/R/norm-combat.R
@@ -9,6 +9,7 @@
 # ComBat batch correction analysis
 # MK 25.06.2013
 # ML 24.08.2016 Fixed (batch as.numeric)
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Loading libraries
 library(sva)
@@ -19,7 +20,7 @@ dat <- read.table(file, header=T, sep="\t", row.names=1)
 
 # Loads Batch information
 file <- "phenodata.tsv";
-phenodata <- read.table(file, header=T, sep="\t")
+phenodata <- read.table(file, header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Merge batches to matrix
 if(batch1 == "EMPTY" & batch2 == "EMPTY" & batch3 == "EMPTY") { stop("CHIPSTER-NOTE: no batches defined"); }

diff --git a/tools/microarray/R/norm-lme.R b/tools/microarray/R/norm-lme.R
@@ -10,6 +10,8 @@
 # JTT: 12.7.2006 Crated linear Mixed Model
 # JTT: 19.10.2006: Modified to use nlme library
 # MK: 22.05.2013: Noise factor parameter added 
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
+
 
 #column.groups<-"group"
 #column.random<-"age"
@@ -26,7 +28,7 @@ calls<-dat[,grep("flag", names(dat))]
 dat2<-dat[,grep("chip", names(dat))]
 
 # Loads phenodata
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Test needs a parameter "groups" that specifies the grouping of the samples
 # and a parameter random that specifies the random effect such as day or technician

diff --git a/tools/microarray/R/norm-specific-samples.R b/tools/microarray/R/norm-specific-samples.R
@@ -7,6 +7,7 @@
 
 # Normalize the data to specific samples
 # JTT 5.12.2008
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Loads the data file
 file<-c("normalized.tsv")
@@ -16,7 +17,7 @@ dat<-read.table(file, header=T, sep="\t", row.names=1)
 dat2<-dat[,grep("chip", names(dat))]
 
 # Loads phenodata
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Extract the data from the phenodata column
 extract<-phenodata[,which(column.to.normalize.by==colnames(phenodata))]

diff --git a/tools/microarray/R/plot-annoheatmap.R b/tools/microarray/R/plot-annoheatmap.R
@@ -15,6 +15,7 @@
 
 # MK 17.10.2013 Annotated Heatmap
 # ML 29.07.2019 Add option to use gene symbols as row names
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 library("Heatplus")
 
@@ -40,7 +41,7 @@ if (ncol(dat2) > 20000) {
 }
 
 # Read phenodata and generate annotation features
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 colnames(dat2)<-gsub(" ", "", phenodata$description)
 if(column != "EMPTY") {
 	groups<-phenodata[,pmatch(column,colnames(phenodata))]

diff --git a/tools/microarray/R/plot-boxplot.R b/tools/microarray/R/plot-boxplot.R
@@ -9,6 +9,7 @@
 # JTT 02.10.2007: Boxplot
 # MG 15.09.2011: Updated colors and legend
 # MK 10.10.2013: User can choose which phenodata column is used for coloring bars
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Renaming variables
 w<-image.width
@@ -23,7 +24,7 @@ calls<-dat[,grep("flag", names(dat))]
 dat2<-dat[,grep("chip", names(dat))]
 
 # Loads phenodata
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 if(length(grep(column, colnames(phenodata))) == 0) {
 	stop("CHIPSTER-NOTE: You need to select phenodata column to run this analysis")
 }

diff --git a/tools/microarray/R/plot-dendrogram.R b/tools/microarray/R/plot-dendrogram.R
@@ -11,6 +11,7 @@
 
 # JTT 03.10.2007: Dendrogram
 # MG 25.11.2010: Increased the gene/sample limit to 20000
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Renaming variables
 w<-image.width
@@ -28,7 +29,7 @@ file<-c("normalized.tsv")
 dat<-read.table(file, header=T, sep="\t", row.names=1)
 
 # Loads the phenodata
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 groups<-phenodata[,pmatch(column,colnames(phenodata))]
 
 # Separates expression values and flags

diff --git a/tools/microarray/R/sample-size-with-bh.R b/tools/microarray/R/sample-size-with-bh.R
@@ -19,9 +19,11 @@
 # Ilari Scheinin <[email protected]>
 # 2012-10-12
 
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
+
 file <- 'normalized.tsv'
 dat <- read.table(file, header=TRUE, sep='\t', quote='', row.names=1, check.names=FALSE)
-phenodata <- read.table("phenodata.tsv", header=TRUE, sep="\t")
+phenodata <- read.table("phenodata.tsv", header=TRUE, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 groups <- phenodata[,column]
 
 if(length(unique(groups[!is.na(groups)]))!=2)

diff --git a/tools/microarray/R/sort-samples.R b/tools/microarray/R/sort-samples.R
@@ -7,12 +7,13 @@
 
 # JTT, 06.02.2008, Sort samples
 # MG,  16.11.2010, modified to also generate a re-ordered phenodata file to reflect the re-ordered data
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Default parameters
 #column<-"group"
 
 # Loads libraries
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 
 # Loads data (which file to search)
 file<-c("normalized.tsv")

diff --git a/tools/microarray/R/stat-chisq-snp.R b/tools/microarray/R/stat-chisq-snp.R
@@ -7,6 +7,7 @@
 
 # JTT: 25.4.2008: Association analysis with normalized SNP data
 # MG, 14.10.2009: modified 
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Read in data
 dat<-read.table("normalized.tsv", header=T, sep="\t", row.names=1, comment.char = "")
@@ -16,7 +17,7 @@ calls<-dat[,grep("flag", names(dat))]
 dat2<-dat[,grep("chip", names(dat))]
 
 # Test needs a parameter "groups" that specifies the grouping of the samples
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 groups<-phenodata[,pmatch(column,colnames(phenodata))]
 
 # Select only samples in group 1 (controls)

diff --git a/tools/microarray/R/stat-correlate-with-phenodata.R b/tools/microarray/R/stat-correlate-with-phenodata.R
@@ -9,6 +9,7 @@
 
 # Find genes that correlate with phenodata
 # JTT 19.7.2007
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 # Parameter settings (default) for testing purposes
 #correlation.cutoff<-c(0.95)
@@ -28,7 +29,7 @@ calls<-dat[,grep("flag", names(dat))]
 dat2<-dat[,grep("chip", names(dat))]
 
 # Test needs a parameter "groups" that specifies the grouping of the samples
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 groups<-phenodata[,pmatch(column,colnames(phenodata))]
 
 # Sanity checks

diff --git a/tools/microarray/R/stat-geneset.R b/tools/microarray/R/stat-geneset.R
@@ -16,14 +16,15 @@
 # MG 03.05.2010: to exclude single gene genesets and added group labels in plots. Added more info columns in results table
 # MK 03.09.2013: updated to R.3.0 which does not support globaltest / geneplot functions
 # EK 05.11.2014: clarified the text.
+# modified by OH,09.09.2021, additional parameters for phenodata read.table
 
 #column<-"group"
 #pathway.or.genelist <-"KEGG"
 #use.multiple.testing.correction <-"yes"
 #number.of.groups.to.visualize <-16
 
 # Loading the libraries
-phenodata<-read.table("phenodata.tsv", header=T, sep="\t")
+phenodata<-read.table("phenodata.tsv", header=T, sep="\t", quote='', as.is=TRUE, check.names=FALSE, comment.char='')
 if(phenodata$chiptype[1]!="cDNA" | phenodata$chiptype[1]!="Illumina") {
    # Saves the chiptype into object lib
    lib<-phenodata$chiptype[1]