diff --git a/CytoSpill.Rproj b/CytoSpill.Rproj index 497f8bf..270314b 100644 --- a/CytoSpill.Rproj +++ b/CytoSpill.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/DESCRIPTION b/DESCRIPTION index d6bf752..f2c3f21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,9 +10,7 @@ Encoding: UTF-8 LazyData: true biocViews: Imports: - flowCore, - CATALYST, nloptr, nnls, flexmix -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index d75f824..70aa46b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,4 @@ -exportPattern("^[[:alpha:]]+") +# Generated by roxygen2: do not edit by hand + +export(GetSpillMat) +export(SpillComp) diff --git a/R/DeriveCutoffs.R b/R/DeriveCutoffs.R index b033dc8..d535ad7 100644 --- a/R/DeriveCutoffs.R +++ b/R/DeriveCutoffs.R @@ -1,7 +1,6 @@ .DeriveCutoffs <- function(data, cols, n, flexrep){ data_sample <- data[sample(1:nrow(data), n, replace = FALSE), ] data_work <- data_sample[,cols] - # cutoffs <- .DeriveCutoffsHelper(x = data_work, emt = 0.001, quantile = 0.1) cutoffs <- .DeriveCutoffsHelper(x = data_work, quantile = 0.1, flexrep = flexrep) return(cutoffs) } @@ -12,8 +11,6 @@ for (i in 1:dim(x)[2]) { s <- x[, i][which(x[, i] > 0)] - # s <- asinh(s/5) - model_fit <- flexmix::initFlexmix(s~1, k = c(1,2,3,4,5), nrep = flexrep) model_chosen <- flexmix::getModel(model_fit, which = "ICL") @@ -50,16 +47,6 @@ func_list = list(d1,d2,d3,d4,d5) } - - # i_min <- which.min(param[1,]) - # sum_func <- function (x){sum(sapply(func_list, function(f) f(x)))} - # type1 <- 0.05 - # min_modal <- eval(parse(text=paste("d",i_min,sep = ""))) - # p <- function(x) { - # min_modal(x)/(sum_func(x))-1+type1 - # } - # interval <- c(0, min(param[1,][param[1,]!=min(param[1,])])) - i_min <- which.min(param[1,]) i_max <- which.max(param[1,]) min_modal <- eval(parse(text=paste("d",i_min,sep = ""))) @@ -73,7 +60,6 @@ interval <- c(0, max(param[1,])) if (p(0)>0){ - # if (p(0) < (1-type1)){ root <- uniroot(p,interval)$root[1] cutoffs[i] <- root } else{ @@ -84,111 +70,7 @@ cutoffs[i] <- quantile(s, probs = quantile) print("using quantile") } - # cutoffs[i] <- sinh(cutoffs[i])*5 print(cutoffs[i]) } return(cutoffs) } -# -# .DeriveCutoffsHelper <- function(x, quantile){ -# cutoffs <- rep(NA, dim(x)[2]) -# -# for (i in 1:dim(x)[2]) { -# s <- x[, i][which(x[, i] > 0)] -# s_asinh <- asinh(s / 5) -# q = quantile(s_asinh, 0.999) -# s_asinh[s_asinh > q] = q -# model_fit <- flexmix::initFlexmix(s_asinh~1, k = c(1,2,3,4,5)) -# model_chosen <- flexmix::getModel(model_fit, which = "ICL") -# -# if (length(model_chosen@prior)>1){ -# param_list <- list() -# param <- flexmix::parameters(model_chosen) -# func_list <- list() -# #get parameters for each component, prior probability, mean, sd -# k <- length(model_chosen@prior) -# if (k == 2){ -# d1 <- function(x) model_chosen@prior[1]*dnorm(x, param[1,1], param[2,1]) -# d2 <- function(x) model_chosen@prior[2]*dnorm(x, param[1,2], param[2,2]) -# func_list = list(d1,d2) -# } -# if (k == 3){ -# d1 <- function(x) model_chosen@prior[1]*dnorm(x, param[1,1], param[2,1]) -# d2 <- function(x) model_chosen@prior[2]*dnorm(x, param[1,2], param[2,2]) -# d3 <- function(x) model_chosen@prior[3]*dnorm(x, param[1,3], param[2,3]) -# func_list = list(d1,d2,d3) -# } -# if (k == 4){ -# d1 <- function(x) model_chosen@prior[1]*dnorm(x, param[1,1], param[2,1]) -# d2 <- function(x) model_chosen@prior[2]*dnorm(x, param[1,2], param[2,2]) -# d3 <- function(x) model_chosen@prior[3]*dnorm(x, param[1,3], param[2,3]) -# d4 <- function(x) model_chosen@prior[4]*dnorm(x, param[1,4], param[2,4]) -# func_list = list(d1,d2,d3,d4) -# } -# if (k == 5){ -# d1 <- function(x) model_chosen@prior[1]*dnorm(x, param[1,1], param[2,1]) -# d2 <- function(x) model_chosen@prior[2]*dnorm(x, param[1,2], param[2,2]) -# d3 <- function(x) model_chosen@prior[3]*dnorm(x, param[1,3], param[2,3]) -# d4 <- function(x) model_chosen@prior[4]*dnorm(x, param[1,4], param[2,4]) -# d5 <- function(x) model_chosen@prior[5]*dnorm(x, param[1,5], param[2,5]) -# func_list = list(d1,d2,d3,d4,d5) -# } -# # for (i in seq_along(model_chosen@prior)){ -# # #prior, mean, sd -# # param_list[[i]] <- c(model_chosen@prior[i], param[1,i], param[2,i]) -# # var <- paste("d", i, sep = "") -# # assign(var, (function(x) model_chosen@prior[i]*dnorm(x, param[1,i], param[2,i]))) -# # # assign(var, (function(x) substitute(model_chosen@prior[i]*dnorm(x, param[1,i], param[2,i]), list(i=i)))) -# # func_list[[i]] <- eval(parse(text=var)) -# # } -# i_min <- which.min(param[1,]) -# sum_func <- function (x){sum(sapply(func_list, function(f) f(x)))} -# type1 <- 0.05 -# min_modal <- eval(parse(text=paste("d",i_min,sep = ""))) -# p <- function(x) { -# min_modal(x)/(sum_func(x))-1+type1 -# } -# interval <- c(0, min(param[1,][param[1,]!=min(param[1,])])) -# if (p(0)>0){ -# root <- uniroot(p,interval)$root -# cutoffs[i] <- (sinh(root)) * 5 -# } else{ -# cutoffs[i] <- quantile(s, probs = quantile) -# } -# } else{ -# cutoffs[i] <- quantile(s, probs = quantile) -# } -# } -# return(cutoffs) -# } - -# .DeriveCutoffsHelper <- function(x, emt, quantile) { -# # arguments: x: data matrix, emt: EM criterion -# # em, cutoff are the functions from package cutoff which are edited to suit the current R version -# cutoffs <- rep(NA, dim(x)[2]) -# for (i in 1:dim(x)[2]) { -# a <- x[, i][which(x[, i] > 0)] -# a_asinh <- asinh(a / 5) -# if (diptest::dip.test(a_asinh)[2] > 0.05) { -# cutoff_channeli <- quantile(a_asinh, probs = quantile) -# } else { -# fit_channeli <- try(em(a_asinh, "normal", "log-normal", t = 0.001), silent=FALSE) -# if (isTRUE(class(fit_channeli) == "try-error")) { -# cutoff_channeli <- quantile(a_asinh, probs = quantile) -# } else { -# cutoff_channeli <- try(cutoff(fit_channeli, t = 0.001, nb = 10, distr = 1, type1 = 0.05, level = 0.95)) -# if (isTRUE(class(cutoff_channeli) == "try-error")) { -# cutoff_channeli <- fit_channeli$param[1] + fit_channeli$param[2] -# } else{ -# cutoff_channeli <- cutoff_channeli[1] -# } -# } -# } -# cutoff_channeli <- (sinh(cutoff_channeli)) * 5 -# if (cutoff_channeli <= min(a)){ -# cutoff_channeli <- quantile(a[which(a > 0)], probs = quantile) -# } -# cutoffs[i] <- cutoff_channeli -# } -# return(cutoffs) -# } diff --git a/R/EstimateSpill.R b/R/EstimateSpill.R index 8906c59..b8e67b7 100644 --- a/R/EstimateSpill.R +++ b/R/EstimateSpill.R @@ -1,37 +1,11 @@ .EstimateSpill <- function(data, cutoffs, cols, upperbound, neighbor) { results <- list() data <- data[,cols] - spill_cols <- .SpillColsData(data, l= CATALYST::isotope_list, nneighbor=neighbor) + spill_cols <- .SpillColsData(data, l= CytoSpill::isotope_list, nneighbor=neighbor) xcols <- .GetFmla(data, spill_cols = spill_cols) - # #old method# - # for (i in 1:ncol(data)) { - # if (!is.na(xcols[[i]][1])) { - # A = as.matrix(data[which(data[,i] < cutoffs[i]), xcols[[i]]]) - # b = data[which(data[,i] < cutoffs[i]), i] - # x0 = runif(ncol(A), min = 0, max = upperbound) - # fn = function(x) { - # vec = A%*%x-b - # norm(vec, type = "2") - # } - # result = try(nloptr::slsqp(x0, fn, lower=rep(0, length(x0)), upper = rep(upperbound, length(x0)))) - # if (isTRUE(class(result) == "try-error")) { - # result <- NULL - # xcols[[i]] <- NA - # } else{ - # result <- result$par - # } - # results[[i]] <- result - # } else { - # results[[i]] <- NULL - # } - # } - - - for (i in 1:ncol(data)) { if (!is.na(xcols[[i]][1])) { - # A = as.matrix(data[which(data[,i] < cutoffs[i]), xcols[[i]]]) A = as.matrix(data[which(data[,i] < cutoffs[i]), c(i,xcols[[i]])]) for (j in seq_along(xcols[[i]])){ @@ -65,28 +39,8 @@ result <- result$par } results[[i]] <- result - - # ## - # print(dim(A)) - # ## - # model <- nnls::nnls(A=A,b=b) - # ## print("sqp modelling done") - # ## - # print(model$x) - # results[[i]] <- model$x } - # ## - # print(dim(A)) - # ## - # - # # b = data[which(data[,i] < cutoffs[i]), i] - # model <- nnls::nnls(A=A,b=b) - # ## - # print("nnls modelling done") - # ## - # print(model$x) - # results[[i]] <- model$x } else { results[[i]] <- NA print("no spillover cols for this channel") @@ -95,84 +49,6 @@ return(list(results, xcols)) } -# -# .EstimateSpill <- function(data, cutoffs, cols, upperbound = 0.1) { -# results <- list() -# data <- data[,cols] -# xcols <- .GetFmla(data, spill_cols = .SpillColsData(data)) -# # for (i in 1:ncol(data)) { -# # if (!is.na(xcols[[i]][1])) { -# # A = as.matrix(data[which(data[,i] < cutoffs[i]), xcols[[i]]]) -# # b = data[which(data[,i] < cutoffs[i]), i] -# # x0 = runif(ncol(A), min = 0, max = upperbound) -# # fn = function(x) { -# # vec = A%*%x-b -# # norm(vec, type = "2") -# # } -# # result = try(nloptr::slsqp(x0, fn, lower=rep(0, length(x0)), upper = rep(upperbound, length(x0)))) -# # if (isTRUE(class(result) == "try-error")) { -# # result <- NULL -# # xcols[[i]] <- NA -# # } else{ -# # result <- result$par -# # } -# # results[[i]] <- result -# # } else { -# # results[[i]] <- NULL -# # } -# -# for (i in 1:ncol(data)) { -# if (!is.na(xcols[[i]][1])) { -# # A = as.matrix(data[which(data[,i] < cutoffs[i]), xcols[[i]]]) -# -# A = as.matrix(data[which(data[,i] < cutoffs[i]), c(i,xcols[[i]])]) -# for (j in seq_along(xcols[[i]])){ -# A = A[which(A[,j+1]>=cutoffs[xcols[[i]][j]]),] -# if (is.null(dim(A))) {A = matrix(A, nrow = 1)} -# } -# ## -# print(dim(A)) -# ## -# if (nrow(A) == 0) { #where there is only 1 row remains -# results[[i]] <- NULL -# print("no datapoint available for this channel") -# } else { -# if (nrow(A) == 1) { -# b = A[,1] -# A = matrix(A[,-1], nrow = 1) -# } else { -# b = A[,1] -# A = as.matrix(A[,-1]) -# } -# ## -# print(dim(A)) -# ## -# model <- nnls::nnls(A=A,b=b) -# ## -# print("nnls modelling done") -# ## -# print(model$x) -# results[[i]] <- model$x -# } -# # ## -# # print(dim(A)) -# # ## -# # -# # # b = data[which(data[,i] < cutoffs[i]), i] -# # model <- nnls::nnls(A=A,b=b) -# # ## -# # print("nnls modelling done") -# # ## -# # print(model$x) -# # results[[i]] <- model$x -# } else { -# results[[i]] <- NULL -# print("no spillover cols for this channel") -# } -# } -# return(list(results, xcols)) -# } - .GetFmla <- function(data, spill_cols) { fmlacols <- list() cs <- c(1:ncol(data)) @@ -226,15 +102,6 @@ spill_cols[[i]] <- unique(c(m1, m2, p1, p2, iso, ox)) } } - # #only consider +-1 neighboring channel - # for (i in seq_along(ms)) { - # p1 <- m1 <- ox <- iso <- NULL - # if ((ms[i] + 1) %in% ms) p1 <- which(ms == (ms[i] + 1)) - # if ((ms[i] - 1) %in% ms) m1 <- which(ms == (ms[i] - 1)) - # if ((ms[i] + 16) %in% ms) ox <- which(ms == (ms[i] + 16)) - # iso <- l[[mets[i]]] - # iso <- which(ms %in% iso[iso != ms[i]]) - # spill_cols[[i]] <- unique(c(m1, p1, iso, ox)) - # } + return(spill_cols) } diff --git a/R/SpillComp.R b/R/SpillComp.R index c5982eb..c965264 100644 --- a/R/SpillComp.R +++ b/R/SpillComp.R @@ -11,5 +11,5 @@ SpillComp <- function(file = NULL, data = NULL, cols, n, output = NULL, threshol data[,cols] <- data_compensated colnames(data) <- data_colnames if (!is.null(output)) {flowCore::write.FCS(flowCore::flowFrame(data), filename = output)} - return(list(flowFrame(data), spillmat, cutoffs)) + return(list(flowCore::flowFrame(data), spillmat, cutoffs)) } diff --git a/data/160805_Exp3_cells-only.fcs b/data/160805_Exp3_cells-only.fcs deleted file mode 100755 index d56eab6..0000000 Binary files a/data/160805_Exp3_cells-only.fcs and /dev/null differ diff --git a/data/160805_Exp3_spillover_matrix.csv b/data/160805_Exp3_spillover_matrix.csv deleted file mode 100755 index af61e08..0000000 --- a/data/160805_Exp3_spillover_matrix.csv +++ /dev/null @@ -1,62 +0,0 @@ -"","Time","Event_length","Y89Di","Pd102Di","Rh103Di","Pd104Di","Pd105Di","Pd106Di","Pd108Di","Pd110Di","In113Di","In115Di","Ba138Di","La139Di","Ce140Di","Pr141Di","Nd142Di","Nd143Di","Nd144Di","Nd145Di","Nd146Di","Sm147Di","Nd148Di","Sm149Di","Nd150Di","Eu151Di","Sm152Di","Eu153Di","Sm154Di","Gd155Di","Gd156Di","Gd157Di","Gd158Di","Tb159Di","Gd160Di","Dy161Di","Dy162Di","Dy163Di","Dy164Di","Ho165Di","Er166Di","Er167Di","Er168Di","Tm169Di","Er170Di","Yb171Di","Yb172Di","Yb173Di","Yb174Di","Lu175Di","Yb176Di","Ir191Di","Ir193Di","Pt195Di","Pt196Di","Pb208Di","Center","Offset","Width","Residual","FileNum" -"Time",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Event_length",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Y89Di",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd102Di",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Rh103Di",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd104Di",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd105Di",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd106Di",0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd108Di",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pd110Di",0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"In113Di",0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"In115Di",0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Ba138Di",0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"La139Di",0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Ce140Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Pr141Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,7.9908427864359e-05,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0186207291181022,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Nd142Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,8.27197722399494e-25,9.05435452967847e-05,1,0.0038261373638954,0.0021993017885316,0.000144648022115939,0.00040317330106028,5.48018289472137e-23,4.23501798558815e-22,4.68585271761626e-25,3.97032936148889e-23,9.17755607102664e-27,9.40529801140778e-27,1.68383400380533e-28,2.6414007661244e-25,6.17758522295187e-21,5.61161062666916e-21,4.01326163894745e-20,0.0172519435974768,0,-6.36192759860725e-21,1.41548175405507e-21,4.30750616477746e-21,8.96270685994473e-23,1.98793423932528e-23,1.18071786515873e-26,5.28786515330316e-25,1.41036246271437e-26,1.33753698648404e-27,2.17269996257912e-29,1.10548121134583e-25,4.31043528587768e-24,5.29324277948634e-24,-1.50822281711423e-23,7.59174990906231e-21,-4.44130167991427e-23,-3.46389684870541e-23,0,0,0,0,0,0,0,0,0,0 -"Nd143Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.28105609403712e-26,3.18873139264348e-22,0.0077520037370099,1,0.0204099038271791,0.00153275121159246,0.00119938127260696,6.93475740718979e-23,0.000100119365306874,-1.21976085206497e-23,2.98448283841025e-05,-4.66296767948361e-24,4.64009128618001e-26,-2.6995854461378e-26,3.33255634926876e-26,3.45507980232527e-23,5.30136890187071e-23,-3.54458498384418e-22,2.8560286903302e-20,0.0176497912734162,1.08413506523222e-19,7.88115095660097e-21,3.94057547830048e-21,2.59831693309575e-22,-6.5983098902106e-22,-1.98451130110393e-25,-2.07656801630777e-22,7.00496351313024e-25,-4.70430939729769e-25,-1.31920857171271e-28,1.70925570831301e-26,1.62091034058804e-24,-1.72286424763558e-24,-1.14933390815375e-24,-7.84027692865325e-23,-4.26837926172127e-21,-1.55901454376814e-22,0,0,0,0,0,0,0,0,0,0 -"Nd144Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.42386968302591e-27,5.48461564758921e-23,0.0021165091762623,0.00288259075965678,1,0.0151765210190999,0.00436176373643952,5.00528131554953e-22,0.000170207097860445,-2.65964919983848e-23,8.15258474112294e-05,-1.05432427013686e-23,7.17817203485503e-25,-1.36650008656394e-26,4.59280530637854e-25,1.69377522708577e-21,5.75241086047729e-25,3.38696091257462e-21,-5.40706779607161e-20,-2.59842382472163e-20,0.0181946550405376,5.42006387409299e-20,2.71003193704649e-20,9.0842842140396e-22,-7.92242670123037e-22,-5.08246047417815e-25,-8.88131703674978e-22,4.16678770987521e-24,-1.67462299182129e-24,-6.66860132441142e-27,3.31239578719004e-24,7.94107310491791e-23,1.5880328599738e-22,1.0586070700312e-22,2.11713630511414e-22,6.57046963625669e-24,3.38773024903017e-20,0,0,0,0,0,0,0,0,0,0 -"Nd145Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.51435298453234e-27,5.5150624957719e-23,0.001699862529576,0.00131689367120854,0.0108078217797081,1,0.0389869335903057,5.08183100396079e-21,0.000445400079556869,4.84252463986876e-24,0.000312997593205496,-1.74798002427363e-23,-3.34282685509031e-24,-1.35349627831414e-25,4.94555402552879e-24,-1.4664713406618e-24,4.9218229698864e-23,-2.78760223432381e-23,7.19536719792676e-21,-8.79585212151291e-21,2.20913059801754e-20,0.0184298324242591,0,8.58623618903537e-21,7.80940840793745e-21,4.35076870572179e-25,-1.37999576589142e-21,2.95213072516609e-24,-1.73703647012087e-24,-2.8094019569918e-28,1.39625210137667e-25,9.39398544255412e-25,6.47640665763704e-25,-9.03343113784942e-26,3.75092862359944e-25,-2.44376046055222e-24,1.0815477753767e-22,0,0,0,0,0,0,0,0,0,0 -"Nd146Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.57505312054278e-27,2.76091682980173e-23,0.00102504712298509,0.000662352056371072,0.00212028991971963,0.00179230219963919,1,0.000306879090032487,0.00170686493092499,1.48226325512821e-21,0.000412875235453553,3.5238499201793e-23,4.2350378717949e-22,4.6837615131737e-25,2.11751893589745e-22,4.19276403090797e-24,1.26809704867984e-23,3.16269688477787e-24,2.41984983682973e-21,-2.82240640721395e-21,-3.34775473156662e-22,-2.12167811365295e-21,0.0179869511231023,-1.31286174025642e-19,-1.70066450520714e-20,1.83550824086158e-23,-1.36709690172517e-21,1.64226057487206e-23,-3.35510754439487e-24,9.75863153128566e-28,4.77674857585683e-26,2.15345763073303e-25,7.09348858898046e-25,3.19404608815554e-25,-2.14223199477122e-25,2.35195298439258e-25,5.51919020452387e-23,0,0,0,0,0,0,0,0,0,0 -"Sm147Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-4.09048305325468e-28,6.300347641697e-24,0,0,0.000539834924650822,-2.71047442378096e-20,0.000252363206640893,1,0.0226471910291985,0.00475609425593607,0.000879858753608515,-3.87697460101724e-22,0.00316815887390685,-1.8572523391043e-22,0.00193459115584339,-3.47423419030828e-22,8.41837696036076e-25,1.97196493193474e-24,4.20654138565456e-22,1.12391479128532e-22,2.35466438552915e-21,1.0640413109651e-21,5.65553471362027e-21,0.0038493451250623,-2.16837953902477e-19,6.83842348852945e-21,-5.88545406213382e-21,5.78574676657191e-23,-2.12323509639217e-21,1.13527379144721e-26,-4.18371399849492e-22,-3.28005534340563e-25,2.32161395201103e-25,1.16208228705934e-25,-4.23683392303578e-24,8.71428659390513e-25,2.10211440354529e-23,0,0,0,0,0,0,0,0,0,0 -"Nd148Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.06923525136962e-26,4.27653288004707e-22,0.013060165338281,0.00812239154799306,0.0157104477872686,0.00522421725345571,0.0170009576077901,0.000136208708729933,1,0.000300253417119193,0.0136633048344199,-3.67333102815157e-21,2.11753252839933e-22,-2.35794860026185e-23,1.05876626419967e-22,7.6883661656493e-23,1.22385367674545e-22,8.35435795663042e-23,7.75577214143651e-20,-6.47372562457804e-20,-3.3880691926898e-20,1.21969904472098e-19,8.13136158329824e-20,1.8100482098016e-25,0.0159510163260436,2.75279228691913e-21,-3.52847507145776e-20,1.6773338609387e-21,1.02328601647329e-22,-1.8648273611175e-26,8.9503960965275e-24,1.29264292281868e-24,-9.1762741145388e-25,1.60262497347192e-24,-7.96660214654596e-23,-6.13971367440671e-23,3.49061872088419e-22,0,0,0,0,0,0,0,0,0,0 -"Sm149Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,5.02849314489956e-28,-2.59761502914748e-25,-4.06573139722326e-20,-3.38810949768605e-20,-6.7762189953721e-20,-1.69405474884303e-20,-2.71048759814884e-20,0.00132001930978002,0.00721420723524335,1,0.0158553067220669,-2.64360580752094e-21,0.00353345779204876,1.98522039871183e-23,0.00175916299996313,-8.40535940508769e-23,2.51788468445635e-25,1.02032969329841e-24,-2.61264832992309e-22,-2.09115553495391e-22,-5.75461097873106e-22,1.03486720571308e-22,1.16387173062925e-21,6.07451345679681e-22,-2.75242537928498e-22,0.00396911009608645,-1.3713240843857e-19,7.14679323111152e-22,1.10510601152658e-21,2.48152400753056e-24,-1.04555045530921e-21,6.83399905407891e-25,1.50434784872627e-26,8.63008152336939e-28,-2.1408913023677e-25,6.43014437072526e-25,-4.38427073924328e-25,0,0,0,0,0,0,0,0,0,0 -"Nd150Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-4.22863093793529e-27,1.42430957804695e-24,0.00270875239813948,0.0013426050644215,0.00400880865419268,0.00105564033400292,0.00253574228646426,3.12042041259696e-22,0.00156586913813053,0.000166964383843921,1,0.000345679695384729,3.97046685063387e-23,-1.27054939220284e-21,1.56104724255398e-22,7.25023303924086e-24,4.18451654376138e-23,-8.51850688033819e-23,4.72158629189227e-21,-1.00237421235219e-20,-1.24048601241971e-20,-3.61684746148523e-21,2.56620145808159e-22,2.38228059416034e-22,5.13576262231758e-22,-1.27046448426549e-21,0.0137878411628682,-2.22147620292965e-20,5.97385888354157e-22,-4.56593768299619e-25,2.69992737193611e-22,7.45924411188609e-25,3.81194122282762e-25,7.23798023018011e-25,4.70526289520269e-24,-6.08685742970479e-24,1.43903373745842e-22,0,0,0,0,0,0,0,0,0,0 -"Eu151Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-8.22880037415418e-31,-3.37214412566545e-27,5.27998136552854e-23,-3.97711912047006e-23,2.89544011913348e-25,-2.59580511230002e-23,-5.36382869439354e-23,-5.29368922194555e-23,-1.05873784438911e-22,-1.05873784438911e-22,6.56317189449197e-05,1,0.0003418332342938,0.00810215289875525,-1.27048541326693e-21,2.53683962439837e-25,9.68047262514465e-28,2.55436637131677e-28,2.297797169771e-25,-4.65855136599217e-25,1.53726099069944e-26,-8.90127495055422e-25,-1.01842426613791e-24,-2.78552558680004e-26,1.27525567309449e-24,-1.76545051909896e-25,-1.69275702210822e-22,0.000106652374547705,3.17621353316733e-22,-4.23486104594679e-22,2.6207388898265e-23,-3.01300365757612e-26,-6.55391106274609e-28,1.31507903530649e-29,3.30594353144784e-27,-6.28364566580538e-28,5.08268240261967e-27,0,0,0,0,0,0,0,0,0,0 -"Sm152Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.85808924561299e-29,-1.18449770911728e-25,-4.5272190006117e-22,-2.31250708454625e-22,8.37292483979195e-22,1.47325529462921e-21,-2.16351632028281e-21,0.000420859723058296,0.00071127939817252,0.000805819822966716,0.000997021118034681,0.000297327230679652,1,0.000359484190808316,0.00657114818542586,-4.23519400757089e-22,2.06773363211593e-25,-3.76950682277733e-26,-1.3757768223452e-24,-1.78639829053827e-23,1.40290158391545e-23,5.96699381663312e-23,2.39573154813217e-23,-4.97684980049725e-22,-6.58870978511852e-22,3.13494247507811e-21,-7.27471148312674e-21,-4.8702053431458e-21,0.00264100240263453,2.64687737248244e-23,6.77589194659565e-21,4.43728786497453e-24,2.12256396724507e-26,-2.75397982848362e-27,-9.1638573008437e-26,-1.21876899523953e-26,4.09014499296734e-26,0,0,0,0,0,0,0,0,0,0 -"Eu153Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.60954496076076e-33,-9.7331700021016e-29,-4.85363579444917e-25,-4.73679477673454e-25,-4.06754822846774e-25,1.28652305130387e-25,4.1476265704232e-25,-2.64697793669262e-23,-5.29395587338524e-23,-1.05879117467705e-22,1.05879117467705e-22,0.00620541140666512,7.218350176936e-05,1,0.000315654192747803,-5.29395587338524e-23,-3.87740908695208e-26,-2.31729423030253e-26,-3.41169386184934e-26,-1.88518716968528e-26,-7.34108543018742e-27,-9.96553031995114e-27,3.40878678230821e-27,1.71059428506004e-25,-4.30288474652516e-25,-3.49278862027204e-25,-1.09956452673827e-23,1.3648479950051e-22,-1.04024179684388e-22,0.000124882990049272,-3.17637352403114e-22,1.33684750598174e-25,1.22264966100346e-26,2.72079365669078e-28,2.53393222868749e-27,-4.36255958491641e-29,-8.00644167837176e-29,0,0,0,0,0,0,0,0,0,0 -"Sm154Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.24630666125525e-29,-6.78684049158515e-26,-4.44937044059562e-21,-2.96583904024816e-21,-1.47741248441248e-21,-1.36786201401322e-21,-7.00025951561095e-21,0.000406380653833024,0.000585911429111397,0.000629856895018289,0.000452621705225047,-9.26442255186518e-22,0.00577915339100072,2.80984262116964e-05,1,0.000281020337775546,0,-5.29395585766622e-23,-4.21862107407777e-23,3.5769343583532e-24,2.43062397896419e-23,1.30468774900994e-23,-6.86559925261705e-23,-1.35507358511817e-22,-5.29395585766622e-21,1.97404763223971e-21,-3.38813174890638e-21,9.49934454359571e-27,-2.03287835406632e-20,-2.11757524370321e-22,0.00204182624256885,4.23516468613298e-22,-8.81133532091927e-24,-4.84746634314719e-25,3.12169424759834e-24,-2.29763571058148e-26,-3.54081929846175e-25,0,0,0,0,0,0,0,0,0,0 -"Gd155Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.44887271613247e-33,5.8385456276383e-30,1.03522200895736e-25,1.07652999703237e-26,8.00731738065287e-26,2.92913655807623e-26,-1.32774899541436e-25,3.29021523894557e-24,9.91382948367126e-24,6.61744490042421e-24,1.32472282840419e-23,7.08646210139766e-26,2.11659533877855e-22,2.0640960763369e-25,4.22507710773413e-05,1,0.000932253911416254,0.000456237193785056,1.58927880362987e-05,1.75774542232494e-24,2.11756624571968e-22,5.76869084224364e-28,-6.11936074446367e-27,-1.94343395367096e-26,2.65067369840427e-28,-1.46477427517326e-26,3.08415138056198e-27,-1.60302808158134e-23,-8.48259734396077e-24,-5.87919861773244e-25,-1.30529107852965e-21,0.00445536158591966,-1.07148852033416e-19,-4.71310912020453e-21,-1.69380529857093e-21,1.22842355180668e-24,2.07808490077665e-22,0,0,0,0,0,0,0,0,0,0 -"Gd156Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.74151774039192e-34,-2.84192479425517e-29,5.80271750004805e-26,-5.65728721211873e-26,3.72983028643847e-25,3.7631703454157e-26,5.55327340697293e-26,-1.98252453731841e-23,-1.31536804629171e-23,5.40205092573484e-26,1.33429894656974e-23,3.61858712313911e-26,-1.04148543267174e-22,-4.11889727789036e-25,0.000132805102609032,0.00816334855374289,1,0.0272220465716867,0.00769848491938228,3.03769860760827e-33,0.0015804722938024,3.22488146485715e-27,3.01970943318369e-27,-4.85027871063872e-26,3.35332357412368e-25,2.07889688155694e-26,3.30851517248696e-24,-3.3272787675796e-24,-1.69929273422699e-24,1.76787747903939e-27,-6.7621029069242e-22,-4.03625166295136e-20,0.00385312260939823,-7.26187814562311e-20,-4.23245604332107e-20,-1.32346968208914e-23,-4.23510298268525e-21,0,0,0,0,0,0,0,0,0,0 -"Gd157Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -"Gd158Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.06528787795291e-36,-1.9051339420938e-32,3.75295568271122e-28,-1.73831669018355e-28,2.27866676012162e-28,-3.95735905926764e-29,-4.26244562764457e-28,-1.9455886322484e-26,7.82772846688102e-27,3.07679788246401e-27,4.14972888867549e-26,3.07301067175415e-29,1.25900684422493e-25,1.00660448262587e-27,5.64308598148649e-38,0.000464952396521893,0.00170490574617957,0.00954236566967761,1,0.000322350922363812,0.00561880107806126,2.39355222829834e-30,1.65707386519046e-29,4.67300638063508e-30,1.5331974629339e-29,-4.98928410496644e-29,3.73168579945066e-27,6.21812485759433e-27,-4.44375127612354e-26,1.68152586889064e-27,3.30901589674705e-24,6.7807641567293e-23,8.27964330499792e-22,-1.46322083231213e-21,0.00312539494550552,-4.23502914534642e-22,1.01638995480014e-20,0,0,0,0,0,0,0,0,0,0 -"Tb159Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-3.18419875976777e-40,2.20900945716168e-36,-3.23790340496834e-32,5.39462107975049e-33,2.3845426386946e-32,1.41899971064241e-32,-7.21766085290342e-32,-6.09942227846142e-30,-2.73385980084831e-30,3.15544362089139e-30,-2.94263255099609e-30,6.84655427389137e-32,7.90710109035815e-29,-1.70950369214426e-31,7.52134439337964e-39,-2.64697790580843e-23,-2.64697790580843e-23,0,1.62571591093267e-05,1,0.000244036475047962,-6.16371181206507e-31,2.54208945251949e-31,4.47353513236754e-30,-2.08451441309349e-28,-2.83801643279449e-30,-6.38092991961852e-27,1.40374522983686e-27,1.41704029984267e-28,3.7866477662426e-28,1.24077089334768e-24,1.81986788159824e-23,-1.40617172678319e-23,7.4442725033108e-24,5.2939558116164e-23,0.00333252717787338,-5.29395581161686e-22,0,0,0,0,0,0,0,0,0,0 -"Gd160Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.76307403810238e-36,7.73856499915201e-33,1.34603827873197e-28,1.0168981444484e-29,6.01622751578908e-28,1.82637406982516e-28,8.92893416259881e-29,-3.04012226894697e-26,-2.11057780250409e-26,2.89007915204688e-27,-7.33213801424806e-27,2.00973058795226e-28,1.99148085361459e-25,-2.9621422055281e-28,-1.24080375394142e-36,0.000436736373326646,0.000956824374419318,0.00103018754403815,0.00569707242446339,8.41558754357159e-05,1,7.06819371078002e-28,1.32528632077125e-28,1.99187378568299e-27,3.01175260699793e-26,-3.15544646145376e-28,-1.54970625355822e-24,-4.38292104135244e-24,-3.30872478845183e-24,-3.59199949918497e-25,7.13606210109971e-36,-6.77626357803402e-21,-3.38813178901719e-21,3.38813178901719e-21,1.28129846876617e-33,-8.47032947254242e-22,0.00322131071682414,0,0,0,0,0,0,0,0,0,0 -"Dy161Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.96282293261725e-39,2.5353555471574e-35,-1.27352398441301e-31,-3.52590866556497e-31,-9.82297266078443e-32,-1.40746603494938e-31,-9.32328505831429e-31,1.39685397253207e-29,2.66081768635957e-29,7.56728466602539e-29,2.80346795274881e-29,5.91837070334647e-31,7.83395406701567e-28,3.15152128693117e-30,1.80760755783355e-25,1.93332931698171e-25,8.47419613117697e-22,6.36597675228586e-22,1.69314058887948e-21,1.06160955711759e-22,0.00184282298826477,1,0.026633108691999,0.00657204161264673,0.00300632134625427,-2.00428514351544e-22,1.14326547221252e-25,2.17654719176092e-27,-2.63646431697048e-28,6.57317950145734e-29,7.26494772413455e-25,5.23832413113146e-24,3.82068853092264e-24,7.60084985460036e-24,5.97112482895086e-23,7.37371747999039e-25,1.35280774777061e-21,0,0,0,0,0,0,0,0,0,0 -"Dy162Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-7.94399050193187e-40,-4.28306878759958e-36,1.0861012608573e-32,-1.32004196311311e-32,5.40648959387924e-34,-4.29986962613783e-33,-6.78338362959386e-32,-4.65206812644129e-30,-2.95485798057532e-30,9.90901981034071e-31,-4.6331558025435e-30,2.72078237858699e-32,2.81034327110941e-31,-5.83713657988087e-32,-7.63756734703251e-28,1.28871261815235e-23,3.87372293272327e-23,6.21212565370243e-23,2.53059865318068e-23,1.01142529982619e-23,0.000163124376665773,0.0153039819834704,1,0.0408416992995325,0.00656906903944576,4.23293924983663e-22,4.13372973616859e-25,6.12303986414382e-27,4.08709630091787e-27,-4.39953814258755e-29,3.42117498906898e-26,2.98424370188728e-25,5.44734617208202e-25,4.34760634345848e-25,2.77157052105492e-24,1.29466128611648e-25,1.96594090821647e-22,0,0,0,0,0,0,0,0,0,0 -"Dy163Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.18658527032102e-40,-8.15310691074945e-37,-5.60316976452706e-33,6.51342559657981e-34,4.63214193551813e-32,-5.85348646758713e-35,1.35152965444966e-32,-1.03714442644412e-31,-9.37058912506019e-31,-1.60348612730929e-30,-3.02824914293756e-31,1.09644330405762e-32,-5.09380166034275e-30,-1.88959341179514e-31,-3.14168796429004e-27,-6.89071209223369e-25,1.14918576563928e-24,-1.07170818714345e-23,5.12370223150799e-23,3.9330369798864e-24,7.49311990257317e-05,0.0029837424769349,0.0117551373814532,1,0.0159549714763509,0,0,3.01536528366328e-26,-5.33548299532353e-28,2.87745677032466e-30,4.69164748032419e-26,3.49489396358786e-25,6.58340054327534e-25,3.28954386483025e-25,1.72959378565344e-24,-2.06237186618959e-26,5.97729331704814e-23,0,0,0,0,0,0,0,0,0,0 -"Dy164Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.12761373237379e-39,-3.34172366552293e-36,-5.10308856537366e-32,-2.58747407195903e-32,2.54985099796021e-32,-1.50531345683036e-32,-5.84873532553467e-32,-3.54328144932883e-30,-9.80231130504126e-30,-1.54510085860575e-29,-1.65150122476662e-29,1.11044093147109e-32,-9.07025868894409e-29,9.01835554220743e-33,-2.5080329961771e-26,-5.28336348140092e-23,-1.01510729654254e-22,3.16014133419931e-24,-4.13679548869962e-22,1.39159003417875e-23,0.000288327529840711,0.0049071762630251,0.00800342057836766,0.0287846146546147,1,0.000632444746126355,0,8.17958925862313e-25,4.39212549409815e-25,7.75304462893074e-30,-5.30411434750365e-26,1.22449941957663e-24,-8.37446372342022e-25,9.35860560023062e-25,1.04993822131447e-23,2.31092635640222e-25,1.01145173345457e-22,0,0,0,0,0,0,0,0,0,0 -"Ho165Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.78699155031782e-48,7.81066026596478e-59,-1.67384253096461e-39,-3.14864938980399e-41,-2.83003209281338e-40,-7.20393261941418e-40,-1.14363315413743e-39,-9.40395480657831e-38,-1.23019076696192e-37,-7.59993026633006e-38,-1.01284430223385e-37,-6.20545595450473e-40,-1.7364689980958e-36,-4.97186148441115e-39,-1.11134015878131e-34,6.67311952368372e-31,2.4309790570528e-31,2.21346510708543e-47,8.00774342842046e-30,1.8589546749539e-31,1.61558713389263e-27,2.58493941422821e-26,7.9658321741506e-27,1.55096364853693e-25,0,1,0.000413046092265253,-1.99360855720825e-21,-2.71935610662321e-22,-7.75482006321686e-26,-1.44563442608239e-23,-7.09858909098845e-27,-1.64563770743466e-27,-4.52855436829822e-29,-3.22446876087865e-29,-2.15704141113824e-32,6.31088687144445e-30,0,0,0,0,0,0,0,0,0,0 -"Er166Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.6609389235788e-44,-3.20118179149215e-43,7.59277550613367e-37,9.44351206384202e-38,4.64096059399049e-36,-1.30287022599833e-36,2.06427778020601e-37,2.30502887661468e-38,1.61682516302353e-34,-1.73285957107813e-34,-2.3896600713668e-34,-1.73119078505564e-36,-2.28240837661456e-33,-1.20432185164475e-35,-7.17764460741214e-31,-2.57780733719698e-28,-6.97998582721773e-27,-3.20273305497303e-30,4.38883950868535e-27,-3.64562562324099e-29,1.6575118468027e-27,6.45971661465333e-26,-1.05877992148633e-22,4.33237635182863e-25,3.54098583096357e-05,0.000142039986285829,1,0.0139757370047105,0.002705370127583,-2.35369166011262e-22,0.000250441430758587,-2.73706024222019e-23,-4.17813021180727e-24,-1.57528961505089e-25,-1.06039027120881e-25,-1.09041156557913e-29,-2.85122224685273e-26,0,0,0,0,0,0,0,0,0,0 -"Er167Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-2.34339268875604e-46,1.79756666882007e-42,1.26623202296909e-38,6.45973561843144e-39,-1.64523694879978e-38,-1.09136782289748e-38,-1.38868939995219e-37,1.5296367943684e-36,-3.74482291626199e-36,-1.48363217450866e-37,-3.22592261159426e-36,-4.7082144743442e-39,-7.73315516176225e-36,-1.7856540367636e-37,6.56443466926327e-34,4.51843247008305e-31,-1.71347480601724e-29,-9.85148326810438e-30,1.51617594560803e-28,-2.77269210429128e-30,5.25874237526951e-27,9.95460368561652e-25,-1.37490592870075e-24,1.45620691437303e-24,1.46438685975107e-22,-4.26324006174456e-22,0.0131154476716887,1,0.0332360504571742,-1.69377909570407e-21,0.000794678679520609,-9.88248339771941e-23,-5.38494926279889e-24,-1.82022462573162e-25,-5.22231856206357e-26,9.99504472956954e-29,-1.23307255776519e-25,0,0,0,0,0,0,0,0,0,0 -"Er168Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-5.2684364166439e-47,3.93827793069135e-43,3.14418371616873e-40,-1.35009542252364e-38,-2.28808980601984e-38,-3.82106534681823e-38,-4.38642053690576e-38,7.59917134489356e-37,-1.00457691032791e-36,-4.3631837250611e-37,8.91122325557438e-37,-7.85099082086526e-40,4.91866092930483e-36,-1.07949098582551e-37,-9.24530156897391e-35,-5.40040557980182e-30,-5.27780566105158e-30,-9.27875738509501e-30,-1.85341849668959e-29,-5.07977538377077e-31,1.12177465986259e-26,3.62996870617772e-25,2.36468613893057e-25,6.18010659461143e-24,2.64693379820118e-23,-8.18769147488944e-23,0.00167371471601072,0.00497923381117102,1,0.000289324035122413,0.00169489539461858,2.11754703856094e-22,-1.14869662689868e-23,-7.72610737432732e-25,-5.47349811790157e-26,3.40069875690472e-28,-2.20512720502946e-25,0,0,0,0,0,0,0,0,0,0 -"Tm169Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-4.38172886409448e-47,-7.73854739702072e-43,-7.38280440374518e-39,3.61757542416622e-41,-6.30375010342659e-40,-7.58924057279173e-39,-1.24514306428508e-39,3.763876434245e-37,-4.55874361898758e-37,-2.38001253100036e-37,3.76387643424501e-37,-2.32814078306663e-39,3.00926553810506e-36,5.1944671844776e-38,1.98647816635109e-33,8.13468979352619e-30,1.57772181044202e-29,8.96647027662853e-30,2.52281507602895e-29,1.03818271021065e-30,1.15670095277607e-26,5.9951575180958e-26,3.10318871817351e-25,1.72139265715693e-24,0,2.26021869413013e-25,-2.54109774980388e-21,-1.69406516653592e-21,0.000304992907356411,1,0.000771394559229795,0,-1.18446149127837e-23,-5.76939280298895e-25,-4.3445912524318e-26,9.03672402403543e-29,-1.40108393302582e-25,0,0,0,0,0,0,0,0,0,0 -"Er170Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.48448327071632e-43,-1.46932311637575e-39,5.23424263897422e-36,6.6082084197879e-36,3.49464901443872e-35,-5.61306396516328e-36,3.97633161541383e-36,1.45754783169249e-38,-4.63174399592066e-34,-9.69272811976575e-34,-1.27581977033882e-33,-8.29444177482987e-36,-2.04023345408349e-32,-2.41588200801208e-35,-1.9065793903745e-30,-6.65243432897818e-28,-1.42996119082427e-26,-1.29245489809442e-26,-5.69740233721258e-26,5.69471283195204e-28,6.61745325797484e-24,2.11762711766918e-22,-7.36150670289276e-25,1.69409852726401e-21,0.000134843941386211,-2.14738494127474e-22,0.0069969461657012,0.00520344477950038,0.00979301170540169,0.000557067521133949,1,0.000736200796408511,-6.77625777802317e-21,-2.11758055563224e-22,2.11758055563224e-22,-2.01633202671861e-25,5.2939513890806e-23,0,0,0,0,0,0,0,0,0,0 -"Yb171Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-8.59147682026926e-47,4.45579263387604e-43,3.34133986896601e-38,1.26049437114291e-38,3.83423271532267e-38,-4.92134347620388e-39,2.99922933036456e-39,-1.14256405289131e-36,-1.08786481952321e-36,-1.67972009694446e-36,-3.94161377205819e-37,-4.59750005714557e-40,-1.23275032330528e-35,-9.95118423852166e-38,2.55409418022813e-34,4.47468273159708e-30,1.89893818271006e-29,3.97872281517456e-30,-5.04924716199659e-29,-6.0498787512051e-31,1.4364033693266e-26,5.63199392825376e-25,6.26497314961257e-25,2.0159317300406e-24,4.89339669775663e-25,9.99999454010723e-25,-4.4126379182319e-25,-5.06685903229861e-21,-3.35500051048988e-21,-3.18730559641516e-22,0.00117592548589702,1,0.0380344448841669,0.00216012985768397,0.00244225654637358,-4.06485478663199e-22,0.00040666712426585,0,0,0,0,0,0,0,0,0,0 -"Yb172Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.30879055006137e-47,4.51290841185552e-44,3.05225288525313e-39,2.19439957820772e-39,3.73385281517201e-39,-3.36357091994355e-39,-3.84721009925556e-39,-3.81651112017144e-37,-8.20745072586965e-37,-3.77209127148542e-37,-4.76624584928899e-37,-8.03057010324497e-40,-7.67819823980947e-36,-2.66802586489027e-38,-1.00499453202649e-33,3.05375772052435e-30,1.67136819813044e-30,1.97365444063894e-30,-4.16259453754629e-31,3.83694098527346e-31,5.5533541329682e-27,1.80553709244456e-25,1.62199679761919e-25,1.21720709632097e-24,1.29863952190366e-23,1.48148156383866e-25,-7.91848499926783e-24,3.74138329105189e-22,8.35686629874066e-22,-3.0068049008639e-23,0.000352091763092858,0.00806245316140492,1,0.0377088908410302,0.0100768859322136,-1.5720516236457e-21,0.000650933333204864,0,0,0,0,0,0,0,0,0,0 -"Yb173Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,4.201272741662e-49,-1.3113960996352e-43,1.753405543339e-39,1.28417123929997e-39,5.41697502935811e-39,-7.04989908713753e-40,1.04778928530244e-40,8.55569793468745e-38,-2.77964265694419e-38,4.69987108521985e-38,-1.01552673715441e-37,-3.42778673366765e-40,-1.65323484345622e-36,-5.79327752604172e-39,1.29347774799184e-34,1.82832491490723e-30,-1.50072586104772e-30,-1.13343562387949e-30,-1.68367237447072e-31,1.40810308079936e-31,-8.17534584733633e-29,4.95704495589577e-26,5.61916338547073e-26,3.85327936646656e-25,-6.41779645084512e-24,1.14886721007482e-25,-2.11540556730404e-22,-2.20775277683848e-22,4.26774179467359e-22,-2.78603265411039e-23,0.000221352193971226,0.00274549971162162,0.0148280190641988,1,0.0328948841202106,-3.81091343350843e-21,0.00210043672185884,0,0,0,0,0,0,0,0,0,0 -"Yb174Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,3.83003923844191e-49,-3.33861781883967e-45,-6.10969821353746e-41,-3.36616239700028e-41,-4.20041260144264e-41,4.76018026628266e-42,-3.52207206018356e-42,5.36791576760996e-39,2.20572090512289e-39,2.07889345466243e-39,2.32903207690307e-39,7.49510012251011e-42,3.18677984337525e-38,1.07803644341178e-41,1.21469754386324e-35,1.46998442086164e-32,4.99854269334863e-32,-3.3747439372377e-32,-1.73050283587567e-31,2.47469084668407e-33,5.84731556872359e-30,5.69183543176577e-28,3.04581568266065e-28,-1.06235289158677e-27,8.83347828177893e-26,1.31490565903075e-27,5.43250450434387e-24,7.14423184402332e-24,2.54382757985505e-24,5.80431398407878e-25,8.4682884225049e-22,0.000364201672830282,0.00166216681132378,0.00559814073424309,1,0.000618988440305126,0.0044387313762992,0,0,0,0,0,0,0,0,0,0 -"Lu175Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.31568683611633e-52,-9.49144560105106e-46,-1.80036089873099e-42,-5.71626309854219e-42,1.05401816865642e-41,-3.67360739437605e-42,-2.12386878982359e-41,3.69602317405123e-40,-4.74682815464376e-40,-3.88392486326718e-40,1.38477285897007e-39,3.28319955750939e-42,1.39872792838781e-39,2.567310567484e-41,9.34825430499473e-37,6.16771608343505e-33,-6.14401477645557e-33,-1.37926241803155e-32,-9.28926113723464e-32,-5.66149925209145e-34,-7.36374255545241e-31,2.32075330187105e-28,2.85455587044237e-28,1.33095547313821e-27,-2.58096299476144e-26,1.54509781610842e-28,8.2972552101176e-25,-4.3601860994426e-25,-1.65436122510606e-24,-2.73521291962813e-25,-2.11757776810795e-22,-1.69406221448636e-21,-6.77624885794545e-21,-3.38812442897272e-21,0.00038457569303972,1,0.00345045641258079,0,0,0,0,0,0,0,0,0,0 -"Yb176Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1.72324147805635e-50,1.11731735129191e-43,4.06004891230003e-39,4.38977360916896e-40,8.27729639463722e-39,-3.17143919854914e-39,-2.54166217471371e-39,-6.15155888522515e-39,-4.61455120113159e-37,-4.24240525975443e-37,1.8313962087418e-37,-7.30574100284121e-41,-3.10991069480472e-36,-1.3439381323208e-38,6.90857978863114e-35,1.92461934637561e-30,7.39632296425557e-32,-5.84208211631381e-31,-1.28088927172631e-29,4.27520083943875e-32,3.62900343611673e-27,2.20130978751329e-25,2.62024208600717e-25,1.13012691209082e-24,6.18962278690081e-26,4.68350621821966e-26,-8.28811153520133e-25,-8.63495137825545e-22,-4.75937528107202e-24,-8.08495535317171e-23,0.000298468302013127,0.00366748489510392,0.00678881600716015,0.00452694924854097,0.0195372793962441,0.000629569381463226,1,0,0,0,0,0,0,0,0,0,0 -"Ir191Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 -"Ir193Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 -"Pt195Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 -"Pt196Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 -"Pb208Di",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 -"Center",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 -"Offset",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 -"Width",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 -"Residual",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 -"FileNum",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 diff --git a/data/Ana_labeled.fcs b/data/Ana_labeled.fcs deleted file mode 100644 index ba6203a..0000000 Binary files a/data/Ana_labeled.fcs and /dev/null differ diff --git a/data/Ana_marker_population.Rdata b/data/Ana_marker_population.Rdata deleted file mode 100644 index bd616ff..0000000 Binary files a/data/Ana_marker_population.Rdata and /dev/null differ diff --git a/data/Bo_862043_labeled.fcs b/data/Bo_862043_labeled.fcs deleted file mode 100644 index 0d6bfbd..0000000 Binary files a/data/Bo_862043_labeled.fcs and /dev/null differ diff --git a/data/Bo_marker.csv b/data/Bo_marker.csv deleted file mode 100644 index 7422c60..0000000 --- a/data/Bo_marker.csv +++ /dev/null @@ -1,52 +0,0 @@ -BCKG190Di -CD11b -Cd112Di -Ce140Di -Center -Beta7 -CXCR3 -CCR10 -CD73 -NKG2D -CXCR5 -CD160 -CD39 -CCR9 -BTLA -Event_length -CCR7 -CD49F -2B4 -PD-1 -CD25 -CD57 -Ir191Di -Ir193Di -Lu175Di -CD27 -Nd143Di -PANKIR -CD45RA -CD28 -CCR2 -CD26 -Offset -CCR6 -Pt194Di -Pt195Di -HLA-DR -Residual -Rh103Di -CD127 -CCR4 -TIGIT -CCR5 -ICOS -DNAM-1 -CD161 -KLRG1 -CD95 -CD56 -CD38 -Time -label \ No newline at end of file diff --git a/data/Levine32_example.Rdata b/data/Levine32_example.Rdata deleted file mode 100644 index eb30278..0000000 Binary files a/data/Levine32_example.Rdata and /dev/null differ diff --git a/data/Levine_32dim_colnames.Rdata b/data/Levine_32dim_colnames.Rdata deleted file mode 100644 index 95737c9..0000000 Binary files a/data/Levine_32dim_colnames.Rdata and /dev/null differ diff --git a/data/Levine_32dim_notransform.fcs b/data/Levine_32dim_notransform.fcs deleted file mode 100755 index 1306481..0000000 Binary files a/data/Levine_32dim_notransform.fcs and /dev/null differ diff --git a/data/Samusik_01_notransform.fcs b/data/Samusik_01_notransform.fcs deleted file mode 100755 index cb8eb2d..0000000 Binary files a/data/Samusik_01_notransform.fcs and /dev/null differ diff --git a/data/Samusik_colnames_population.Rdata b/data/Samusik_colnames_population.Rdata deleted file mode 100644 index e646db1..0000000 Binary files a/data/Samusik_colnames_population.Rdata and /dev/null differ diff --git a/data/ff_list.Rdata b/data/ff_list.Rdata deleted file mode 100644 index b3fbe18..0000000 Binary files a/data/ff_list.Rdata and /dev/null differ diff --git a/data/isotope_list.rda b/data/isotope_list.rda new file mode 100644 index 0000000..6a32dbf Binary files /dev/null and b/data/isotope_list.rda differ diff --git a/scripts/Ana_analysis.Rmd b/scripts/Ana_analysis.Rmd deleted file mode 100644 index 077080d..0000000 --- a/scripts/Ana_analysis.Rmd +++ /dev/null @@ -1,404 +0,0 @@ ---- -title: "Ana_analysis" -author: "Qi Miao" -output: html_document ---- - -#Analysis of healthy human cord blood data, from which generated Figure 3 - -```{r} -library(CytoSpill) -library(flowCore) -library(ggplot2) -library(Rtsne) -library(dplyr) -library(RColorBrewer) -library(reshape2) -library(Rphenograph) -``` - - -#read data -```{r} -#read expression -data_Ana <- flowCore::exprs(flowCore::read.FCS("/Users/qmiao/CytoSpill copy/data/Ana_labeled.fcs", transformation = FALSE, truncate_max_range = FALSE)) -#remove negative values if any -data_Ana[which(data_Ana<0)] <- 0 - -load(file = "/Users/qmiao/CytoSpill copy/data/Ana_marker_population.Rdata") -Ana_marker -population_name_Ana -``` - -###select channels used for compensation and analysis -```{r} -data_Ana_temp <- data_Ana[,3:46] -#remove duplicates -duplicates_id <- duplicated(data_Ana_temp) -data_Ana_temp <- data_Ana_temp[!duplicates_id,] -``` - -###use CytoSpill for compensation -```{r} -Ana_results <- SpillComp(data = data_Ana_temp, cols = 1:44, n = 20000, threshold = 0.1, flexrep = 5, neighbor = 1) -compensated_Ana <- Ana_results[[1]] -``` - - -### add back population label -```{r} -##compensated exprs -compensated_Ana_exprs <- as.data.frame(flowCore::exprs(compensated_Ana)) -compensated_Ana_exprs[,"label"] <- as.factor(data_Ana[,"label"][!duplicates_id]) -##uncompensated exprs -data_Ana_temp <- as.data.frame(data_Ana_temp) -data_Ana_temp[,"label"] <- as.factor(data_Ana[,"label"][!duplicates_id]) -``` - -### downsample -```{r} - -# downsample for faster calculation, plotting -nsample = 20000 -# subsample -set.seed(123) -rowsample <- sample(nrow(data_Ana_temp), nsample) -compensated_Ana_exprs_downsample <- compensated_Ana_exprs[rowsample,] -data_Ana_temp_downsample <- data_Ana_temp[rowsample,] - -#function to censor data, for clear heatmap -censor_dat <- function (x, a = 0.99){ - q = quantile(x, a) - x[x > q] = q - return(x) -} - -#function for arcsinh transform -transf <- function (x){asinh(x/5)} -``` - -#Run Rphenograph -```{r} -calculate_pheno <- function (data, cols, asinhtransfer = T){ - if (asinhtransfer <- T) { - data[,cols] <- transf(data[,cols]) - } - pheno_out <- Rphenograph::Rphenograph(data[,cols]) - cluster <- igraph::membership(pheno_out[[2]]) - return(cluster) -} -uncompensated_pheno <- calculate_pheno(data_Ana_temp_downsample, cols = 1:44) -compensated_pheno <- calculate_pheno(compensated_Ana_exprs_downsample, cols = 1:44) -``` - -### calculate tsne maps on uncompensated data -```{r} -calculate_tsne <- function (data, cols, asinhtransfer = T, verAnase = T, dims = 2, seed = 123){ - set.seed(seed) - tsne_dat <- data[,cols] - #asinh/5 transfer - if (asinhtransfer) {tsne_dat <- transf(tsne_dat)} - tsne_out <- Rtsne::Rtsne(tsne_dat, verAnase = verAnase, dims = dims) - return(tsne_out) -} - -#calculate based on uncompensated data -uncompensated_tsne = calculate_tsne(data_Ana_temp_downsample, cols = 1:44) - -# Setup some colors for plotting -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) -``` - -###plot population label on uncompensated tsne -```{r} -levels(data_Ana_temp_downsample$label) <- c(population_name_Ana) -tclust = data_Ana_temp_downsample[,"label"] - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Ana uncompensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###plot Rphenograhp cluster on uncompensated tsne -```{r} -tclust = uncompensated_pheno - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Ana uncompensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###calculate tsne on compensated data -```{r} -compensated_tsne = calculate_tsne(compensated_Ana_exprs_downsample, cols = 1:44) -``` - -### plot population on compensated tsne -```{r} -levels(compensated_Ana_exprs_downsample$label) <- c(population_name_Ana) -tclust = compensated_Ana_exprs_downsample[,"label"] - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Ana compensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###plot Rphenograhp cluster on compensated tsne -```{r} -tclust = compensated_pheno - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Ana compensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -### uncompensated marker density plots - -the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data. -```{r fig.height=8, fig.width=15} -pdat <- transf(data_Ana_temp_downsample[,-45]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -### add colnames -dimnames(censor_pdat)[[2]] <- unname(Ana_marker[3:46]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Ana markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Ana_uncompensated_marker.png", plot = p,width=15, height=8, dpi = 300) -``` - -#compensatd marker density plot -```{r fig.height=8, fig.width=15} -pdat <- transf(compensated_Ana_exprs_downsample[,-45]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- unname(Ana_marker[3:46]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Ana markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Ana_compensated_marker.png", plot = p,width=15, height=8, dpi = 300) - -``` - -#compensatd marker density plot on compensated tsne -```{r fig.height=8, fig.width=15} -pdat <- transf(compensated_Ana_exprs_downsample[,-45]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- unname(Ana_marker[3:46]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- compensated_tsne$Y[,1] -censor_pdat$tsne_2 <- compensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Ana markers on compensated tsne")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Ana_compensated_marker on compensated tsne.png", plot = p,width=15, height=8, dpi = 300) - -``` - - -#draw histograms -```{r} -library(reshape2) - -plot_multi_histogram <- function(df, feature, label_column, cutoff) { - plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + - geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + - geom_density(alpha=0.2) + - geom_vline(aes(xintercept=cutoff), color="black", linetype="dashed", size=1) + - labs(x=feature, y = "Density") + - theme_classic() - plt + guides(fill=guide_legend(title=label_column)) + - scale_fill_discrete(labels = c("Uncompensated", "Compensated")) -} - -prep_hist_pdata <- function(df1, df2, feature, cutoffs) { - exprs1 <- df1[feature] - exprs1 <- exprs1[which(exprs10),] - exprs2 <- df2[feature] - exprs2 <- exprs2[which(exprs20),] - label <- c(rep("dat1", length(exprs1)), rep("dat2", length(exprs2))) - df <- cbind(c(exprs1,exprs2), label) - colnames(df) <- c("value", "label") - df <- as.data.frame(df) - df[,1] <- as.numeric(as.character(df[,1])) - df[,1] <- asinh(df[,1]/5) - cutoff <- asinh(cutoffs[match(feature, colnames(df1))]/5) - return(list(df,cutoff)) -} - -ana_pt194di <- prep_hist_pdata(data_Ana_temp_downsample, compensated_Ana_exprs_downsample, feature = "Pt194Di", cutoffs = Ana_results[[3]]) -plot_multi_histogram(ana_pt194di[[1]], feature="value", label_column = "label", cutoff = ana_pt194di[[2]]) - -# ana_er166di <- prep_hist_pdata(data_Ana_temp_downsample, compensated_Ana_exprs_downsample, feature = "Er166Di", cutoffs = Ana_results[[3]]) -# plot_multi_histogram(ana_er166di[[1]], feature="value", label_column = "label", cutoff = ana_er166di[[2]]) - -``` - - -```{r} -write.FCS(flowFrame(as.matrix(data_Ana_temp[,-45])), filename = "~/CytoSpill copy/data/flowSOM/data_Ana_temp.fcs") -write.FCS(flowFrame(as.matrix(compensated_Ana_exprs[,-45])), filename = "~/CytoSpill copy/data/flowSOM/compensated_Ana_exprs.fcs") -``` - -```{r} -save.image("~/CytoSpill copy/data/Ana_analysis.Rdata") -sessionInfo() -``` diff --git a/scripts/Bo_analysis.Rmd b/scripts/Bo_analysis.Rmd deleted file mode 100644 index 18ffcc6..0000000 --- a/scripts/Bo_analysis.Rmd +++ /dev/null @@ -1,403 +0,0 @@ ---- -title: "Bo_analysis" -author: "Qi Miao" -output: html_document ---- - -#Analysis of leukemia patient blood data, from which generated Figure 4 - -```{r} -library(CytoSpill) -library(flowCore) -library(ggplot2) -library(Rtsne) -library(dplyr) -library(RColorBrewer) -library(reshape2) -library(Rphenograph) -``` - -#read data -```{r} -#read expression -data_Bo <- flowCore::exprs(flowCore::read.FCS("/Users/qmiao/CytoSpill copy/data/Bo_862043_labeled.fcs", transformation = FALSE, truncate_max_range = FALSE)) -#remove negative values if any -data_Bo[which(data_Bo<0)] <- 0 - -Bo_marker <- as.character(read.csv(file="/Users/qmiao/CytoSpill copy/data/Bo_marker.csv", header=F)$V1) -names(Bo_marker) <- colnames(data_Bo) - -population_names <- c("B_Cells", "CD4_T_cells", "CD8_T_cells", "NK cells") - -``` - -###select channels used for compensation and analysis -```{r} -data_Bo_temp <- data_Bo[,c(1:4,6:15,17:32,34:37,39:50)] -#remove duplicates -duplicates_id <- duplicated(data_Bo_temp) -data_Bo_temp <- data_Bo_temp[!duplicates_id,] -``` - -###use CytoSpill for compensation -```{r} -Bo_results <- SpillComp(data = data_Bo_temp, cols = 1:46, n = 20000, threshold = 0.1, flexrep = 5, neighbor = 1) -compensated_Bo <- Bo_results[[1]] -``` - - -### add back population label -```{r} -##compensated exprs -compensated_Bo_exprs <- as.data.frame(flowCore::exprs(compensated_Bo)) -compensated_Bo_exprs[,"label"] <- as.factor(data_Bo[,"label"][!duplicates_id]) -##uncompensated exprs -data_Bo_temp <- as.data.frame(data_Bo_temp) -data_Bo_temp[,"label"] <- as.factor(data_Bo[,"label"][!duplicates_id]) -``` - -### downsample -```{r} - -# downsample for faster calculation, plotting -nsample = 20000 -# subsample -set.seed(123) -rowsample <- sample(nrow(data_Bo_temp), nsample) -compensated_Bo_exprs_downsample <- compensated_Bo_exprs[rowsample,] -data_Bo_temp_downsample <- data_Bo_temp[rowsample,] - -#function to censor data, for clear heatmap -censor_dat <- function (x, a = 0.99){ - q = quantile(x, a) - x[x > q] = q - return(x) -} - -#function for arcsinh transform -transf <- function (x){asinh(x/5)} -``` - - -#Run Rphenograph -```{r} -calculate_pheno <- function (data, cols, asinhtransfer = T){ - if (asinhtransfer <- T) { - data[,cols] <- transf(data[,cols]) - } - pheno_out <- Rphenograph::Rphenograph(data[,cols]) - cluster <- igraph::membership(pheno_out[[2]]) - return(cluster) -} -uncompensated_pheno <- calculate_pheno(data_Bo_temp_downsample, cols = 1:46) -compensated_pheno <- calculate_pheno(compensated_Bo_exprs_downsample, cols = 1:46) -``` - - -### calculate tsne maps on uncompensated data -```{r} -calculate_tsne <- function (data, cols, asinhtransfer = T, verbose = T, dims = 2, seed = 123){ - set.seed(seed) - tsne_dat <- data[,cols] - #asinh/5 transfer - if (asinhtransfer) {tsne_dat <- transf(tsne_dat)} - tsne_out <- Rtsne::Rtsne(tsne_dat, verbose = verbose, dims = dims) - return(tsne_out) -} - -#calculate based on uncompensated data -uncompensated_tsne = calculate_tsne(data_Bo_temp_downsample, cols = 1:46) - -# Setup some colors for plotting -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) -``` - -###plot population label on uncompensated tsne -```{r} -levels(data_Bo_temp_downsample$label) <- c(population_names) -tclust = data_Bo_temp_downsample[,"label"] - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#1965B0", "#882E72", - "#FF7F00", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Bo uncompensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###plot Rphenograhp cluster on uncompensated tsne -```{r} -tclust = uncompensated_pheno - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Bo uncompensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###calculate tsne on compensated data -```{r} -compensated_tsne = calculate_tsne(compensated_Bo_exprs_downsample, cols = 1:46) -``` - -### plot population on compensated tsne -```{r} -levels(compensated_Bo_exprs_downsample$label) <- c(population_names) -tclust = compensated_Bo_exprs_downsample[,"label"] - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#1965B0", "#882E72", - "#FF7F00", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=1, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Bo compensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###plot Rphenograhp cluster on compensated tsne -```{r} -tclust = compensated_pheno - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Bo compensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -### uncompensated marker density plots - -the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data. -```{r fig.height=8, fig.width=15} -pdat <- transf(data_Bo_temp_downsample[,-47]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -### add colnames -for (i in seq_along(Bo_marker)){ - names(Bo_marker)[i] <- paste(names(Bo_marker)[i], Bo_marker[i], sep ='-') -} -dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Uncomepensated Bo markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_uncompensated_marker.png", plot = p,width=15, height=8, dpi = 300) -``` - - -#compensatd marker density plot -```{r fig.height=8, fig.width=15} -pdat <- transf(compensated_Bo_exprs_downsample[,-47]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Bo markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_compensated_marker.png", plot = p,width=15, height=8, dpi = 300) -``` - -#compensatd marker density plot on compensated tsne plot -```{r fig.height=8, fig.width=15} -pdat <- transf(compensated_Bo_exprs_downsample[,-47]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Bo_marker[c(1:4,6:15,17:32,34:37,39:50)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- compensated_tsne$Y[,1] -censor_pdat$tsne_2 <- compensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Bo markers on compensated tsne")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Bo_compensated_marker_on_compensated_tsne.png", plot = p,width=15, height=8, dpi = 300) -``` - -#draw histograms -```{r} -library(reshape2) - -plot_multi_histogram <- function(df, feature, label_column, cutoff) { - plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) + - geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") + - geom_density(alpha=0.2) + - geom_vline(aes(xintercept=cutoff), color="black", linetype="dashed", size=1) + - labs(x=feature, y = "Density") + - theme_classic() - plt + guides(fill=guide_legend(title=label_column)) + - scale_fill_discrete(labels = c("Uncompensated", "Compensated")) -} - -prep_hist_pdata <- function(df1, df2, feature, cutoffs) { - exprs1 <- df1[feature] - exprs1 <- exprs1[,] - # exprs1 <- exprs1[which(exprs1>0),] - exprs2 <- df2[feature] - exprs2 <- exprs2[,] - # exprs2 <- exprs2[which(exprs2>0),] - label <- c(rep("dat1", length(exprs1)), rep("dat2", length(exprs2))) - df <- cbind(c(exprs1,exprs2), label) - colnames(df) <- c("value", "label") - df <- as.data.frame(df) - df[,1] <- as.numeric(as.character(df[,1])) - df[,1] <- asinh(df[,1]/5) - cutoff <- asinh(cutoffs[match(feature, colnames(df1))]/5) - return(list(df,cutoff)) -} - -bo_pt194di <- prep_hist_pdata(data_Bo_temp_downsample, compensated_Bo_exprs_downsample, feature = "Pt194Di", cutoffs = Bo_results[[3]]) -plot_multi_histogram(bo_pt194di[[1]], feature="value", label_column = "label", cutoff = bo_pt194di[[2]]) - - - -``` - -```{r} -write.FCS(flowFrame(as.matrix(data_Bo_temp[,-47])), filename = "~/CytoSpill copy/data/flowSOM/data_Bo_temp.fcs") -write.FCS(flowFrame(as.matrix(compensated_Bo_exprs[,-47])), filename = "~/CytoSpill copy/data/flowSOM/compensated_Bo_exprs.fcs") -``` - -```{r} -save.image("~/CytoSpill copy/data/Bo_analysis.Rdata") -sessionInfo() -``` diff --git a/scripts/Comparison_with_singlestained.Rmd b/scripts/Comparison_with_singlestained.Rmd deleted file mode 100644 index 6041f80..0000000 --- a/scripts/Comparison_with_singlestained.Rmd +++ /dev/null @@ -1,490 +0,0 @@ ---- -title: "comparison_catalyst_data" -author: "Qi Miao" -output: html_document ---- - -###Comparison with single-stained dependent compensation -###Figure 3 - -#Code adapted from correlation_analysis.Rmd Vito Zanotelli, et al. https://github.com/BodenmillerGroup/cyTOFcompensation/blob/master/scripts/correlation_analysis.Rmd - -```{r Libraries} -library(CytoSpill) -library(flowCore) -library(Rtsne) -library(Rphenograph) -library(CATALYST) -library(dplyr) -library(data.table) -library(dtplyr) -library(RColorBrewer) -library(gplots) -library(cba) -library(Hmisc) -library(mclust) -``` - -### Define some helper functions -```{r Define some helper functions functions} -flowFrame2dt <- function (datFCS){ - # converts a flow frame to a data table - dt = flowCore::exprs(datFCS) - colnames(dt) = make.names(make.unique(colnames(dt))) - fil = !is.na(datFCS@parameters@data$desc) - colnames(dt[, fil]) <- datFCS@parameters@data$desc[fil] - dt <- data.table::data.table(dt) - uninames = make.names(make.unique(datFCS@parameters@data$name)) - uni_newnames = make.names(make.unique(datFCS@parameters@data$desc)) - data.table::setnames(dt, uninames[fil], uni_newnames[fil]) - return(dt) -} - -censor_dat <- function (x, quant = 0.999){ - # censors the upper limit vector's value using the provided quantile - q = quantile(x, quant) - x[x > q] = q - return(x) -} - -``` - - -### Set the configuration variables -```{r Files} -# the FCS file used -fn_cells = '../data/160805_Exp3_cells-only.fcs' -name_condition = 'Exp3' - -# the file containing the spillover matirx generated by CATALYST -fn_sm = c('../data/160805_Exp3_spillover_matrix.csv') - -folder_out = '../data/CATALYST_comparison/' -do_load = T - -# subsample cellnumber -nsamp = 20000 - -transf = function(x) asinh(x/5) - -# defines some channel names that should not be considered -bad_channels = c( "CD3" , "CD45","SampleID","Time", "Event_length", "MCB102", "MCB104", "MCB105", "MCB106", - "MCB108","MCB110","MCB113","MCB115" ,"Beads140" , "File.Number", - "DNA193", "DNA191" , "Live194", "Live195" , "beadDist", "barcode", - "89Y" , "102Pd_BC1" , "104Pd_BC2" ,"105Pd_BC3" ,"106Pd_BC4","108Pd_BC5", "110Pd_BC6" , - "113In_BC7", "115In_BC8", "138Ba","140Ce", "191Ir_DNA1" , - "193Ir_DNA2","195Pt","196Pt","208Pb", "Center","Offset","Width", - "Residual" , "102Pd" , "103Rh" , "104Pd" , "105Pd" , "106Pd","108Pd", - "110Pd", "113In","115In" ,"157Gd" ,"155Gd-CD273",'File-Number' -) - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") -``` - -### Read the data as a flow frame -```{r Load the single cell data} -ff <- flowCore::read.FCS(fn_cells) -``` - -### Subsamples to speed the calculations up -```{r Subsample the data to speed the calculatios up} -set.seed(1234) -if (!is.na(nsamp)){ - ff = ff[sample(nrow(ff), nsamp)] -} -``` - -### Load the spillover matrixes generated and saved by CATALYST -```{r Load SM} -dat_sms = lapply(fn_sm, function(fn) read.csv(fn, row.names = 1)) -names(dat_sms) = sapply(fn_sm, function(fn) basename(fn)) -``` - -### Load the spillover matrix generated by CytoSpill -```{r} -es <- flowCore::exprs(ff) -###only column 15 to 51 were used as the same as CATALYST -# spillmat_results <- GetSpillMat(es, cols=15:51) -set.seed(2) -spillmat_cutoffs <- CytoSpill::.DeriveCutoffs(es, cols=15:51, n = nrow(es), flexrep = 30) -model <- .EstimateSpill(data=es, cutoffs=spillmat_cutoffs, cols=15:51, upperbound = 0.1, neighbor = 1) -threshold = 0.1 -estimates <- model[[1]] -xcols <- model[[2]] -spillmat <- diag(length(xcols)) -for (i in 1:length(xcols)) { - if (!is.na(xcols[[i]][1])) { - for (j in 1:length(xcols[[i]])) { - if (!is.na(estimates[[i]][j])){ - spillmat[xcols[[i]][j],i] <- min(estimates[[i]][j],threshold) - } - } - } -} - -sm_cytospill <- dat_sms -#substitute spill matrix values with spill matrix generated with CytoSpill -sm_cytospill[[1]][15:51,15:51] <- spillmat -names(sm_cytospill) <- "cytospill_estimated_spillover_matrix" -``` - -```{r} -#list of two spillover matrix one from single label control, one from CytoSpill -dat_sms[[2]] <- sm_cytospill[[1]] -names(dat_sms)[2] <- "cytospill_estimated_spillover_matrix" -``` - -### some notes here! -here the catalyst spillover matrix has all the colunms, not only the three sources. -### Compensate the data - -```{r Compensate the data using all the compensation matrices} -dats_compmeta = data.table(compensation= c('raw', paste(names(dat_sms),'_nnls'))) -dats_compmeta[, is_raw := compensation == 'raw'] -ff_list = list() -ff_list[[1]] = ff - -#source modified scripts from CATALSYT package, since the original version has error occurs. -source("../scripts/adaptSpillmat_(remove check_sm).R") -source("../scripts/helpers-compensation.R") - -for (i in seq_along(dat_sms)){ - tsm = dat_sms[[i]] - # helps with numerical instabilities sometimes observed during saving/loading - tsm = round(tsm, digits=8) - ff_list[[i+1]] = CATALYST::compCytof(ff, tsm, method='nnls') -} - -# for (i in seq_along(dat_sms)){ -# tsm = dat_sms[[i]] -# # There seem some numerical instabilities when loading the data (e.g. -5.596894e-33). -# # rounding fixes this issue. -# tsm = round(tsm, digits=8) -# ff_list[[i+(length(dat_sms)+1)]] = CATALYST::compCytof(ff, tsm, method='nnls') -# } - -names(ff_list) <- dats_compmeta$compensation -# ff_list with raw, flow compensated, nnls compensated -``` - -### Tidy the data into data.tables - -```{r Make a tidy data.table out of the data} -tidy_ff <- function(x, nm_condition){ - x = flowFrame2dt(x) - x[, id:=paste(as.character(1:.N), nm_condition)] - x = melt.data.table(x, id.vars = 'id',variable.name = 'channel', value.name='counts', variable.factor = F) - x[, counts_transf:= transf(counts)] - - setkey(x, id) - - return(x) -} - -dats= lapply(ff_list, function(x) tidy_ff(x, name_condition)) -``` - -### Clean up some channel names -```{r Clean channel names} -clean_channels = function(x){ - x[, channel :=as.character(gsub('X','',as.character(channel))) , by=channel] - x[, channel :=gsub('\\.','-',as.character(.BY)) , by=channel] - x[, channel :=gsub(' ','-',as.character(.BY)) , by=channel] - return(x) -} -dats= lapply(dats, clean_channels) -``` - - -### Create a metadata table -```{r Metadata} - -# for additional, global cell specific information, e.g. condition -datcell = dats[['raw']] %>% - dplyr::select(-c(counts, counts_transf, channel)) %>% - dplyr::filter(!duplicated(id)) %>% - mutate(condition= name_condition) -datcell <- as.data.table(datcell) -setkey(datcell, id) - -datmeta = data.table(name=ff@parameters$name, channel=ff@parameters$desc) -datmeta = clean_channels(datmeta) -setkey(datmeta, channel) - -good_channels =unique(datmeta$channel)[!unique(datmeta$channel) %in% bad_channels] - -``` - -### Add information which channels contain antibodies against the same target -```{r Add grouping antibody information} -datmeta[,antibody:=gsub('.....-','',channel)] - -#datmeta[antibody=='CD8b',antibody:='CD8'] -datmeta[,n_chan := length(unique(channel)), by=antibody] -datmeta[,metal:=substr(channel, 4, 5)] -datmeta[metal =='', metal:= as.character(1:.N)] -``` - -## Clustering and dimensionality reduction using TSNE -### Cluster the data using phenograph -```{r Define a helper function to do phenograph with the data structure used} - -# adapted from https://github.com/lmweber/FlowSOM-Rtsne-example/blob/master/FlowSOM_Rtsne_example.R - -do_phenograph<- function(data, channels, valuevar= 'counts_transf', seed=1234, subsample=FALSE){ - # A helper function - - pheno_dat = data.table::dcast.data.table(data[channel %in% channels, ],id~channel,value.var = valuevar) - all_ids = pheno_dat$id - - if (subsample == FALSE){ - subsample=1 - } - - - - sampids = pheno_dat[, sample(id, floor(.N*subsample),replace = F)] - pheno_dat_samp = pheno_dat[id %in%sampids, ] - ids =pheno_dat_samp$id - pheno_dat_samp[, id:=NULL] - set.seed(seed) - rpheno_out = Rphenograph::Rphenograph(pheno_dat_samp) - cluster = igraph::membership(rpheno_out[[2]]) - pheno_clust = data.table::data.table(x=ids) - setnames(pheno_clust, 'x', 'id') - pheno_clust[, cluster:=factor(cluster[as.character(seq(length(ids)))])] - data.table::setkey(pheno_clust, 'id') - pheno_clust = pheno_clust[all_ids ,] - - return(pheno_clust) -} -``` - -```{r Run phenograph on the data before and after compensation} -datspheno= lapply(dats, function(dat) do_phenograph(dat, good_channels, valuevar = 'counts_transf', seed=1234)) -``` - -### calculate tsne maps - -```{r} -calc_tsne <- function (input_dat, channels, value_var = "counts", channel_var = "channel", - id_var = "id", scale = F, verbose = T, dims = 2, seed=1234,...){ - set.seed(seed) - ids = input_dat[!duplicated(get(id_var)), get(id_var)] - dt = data.table::dcast(input_dat[(get(channel_var) %in% - channels) & get(id_var) %in% ids], formula = as.formula(paste(id_var, - "~", channel_var)), value.var = value_var) - if (scale) { - tsnedat = scale(dt[, channels, with = F]) - } - else { - tsnedat = dt[, channels, with = F] - } - tsne_out <- Rtsne::Rtsne(tsnedat, verbose = verbose, dims = dims, - ...) - tsne_out$Y = data.table(tsne_out$Y) - setnames(tsne_out$Y, names(tsne_out$Y), paste("bh", names(tsne_out$Y), - sep = "_")) - tsne_out$Y$id = dt[, get(id_var)] - data.table::setnames(tsne_out$Y, "id", id_var) - data.table::setkeyv(tsne_out$Y, id_var) - tsne_out$channels = channels - tsne_out$scale = F - return(tsne_out) -} - -# Setup some colors for plotting -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) - -``` - -The TSNE map is calculated on the uncompensated values -```{r Calculate the tsne map} -ttsne = calc_tsne(dats[['raw']], good_channels, value_var = 'counts_transf', channel_var = "channel") -``` - -### The phenograph clusters are plotted on the TSNE map - - -```{r Plot the phenograph cluster on the tsne map} - -tclust = datspheno[['raw']] -tdat = dats[['raw']] - -p = ggplot(subset(tdat, !duplicated(id))[ttsne$Y][tclust], aes(x=bh_V1, y=bh_V2))+ - geom_point(size=0.3, alpha=1, aes(color=as.factor(cluster)))+ - scale_color_manual(values = col_list)+ - ggtitle('Uncompensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5), ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p - -``` - -### select phenograph clustering to be used for all downstream analysis -The phenograph based on the uncompensated data is used for all future plots - -```{r select the phenograph clustering} -clusterdat = datspheno[['raw']] -``` - - -### Map the markers on the tsne map - -In the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data. - -```{r, fig.height=6, fig.width=15} -ptsne = ttsne -pdats = dats - -for (tdatname in names(pdats)){ - tdat = pdats[[tdatname]] - tdat[, c_counts := censor_dat(counts_transf,0.99), by=channel] - tdat[, c_counts_scaled := c_counts, by=channel] - tdat[, c_counts_scaled := (c_counts_scaled/max(c_counts_scaled)), by=channel] - p = ggplot(subset(tdat, channel %in% good_channels)[ptsne$Y], - aes(x=bh_V1, y=bh_V2, color=c_counts_scaled))+ - facet_wrap(~channel, scales = "free", ncol = 9)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle(tdatname)+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) - print(p) - #ggsave(filename = file.path(folder_out, paste(tdatname, 'tsne_all_markers.png',sep='_')), plot = p,width=15, height=6, dpi = 300) - -} - -``` - - - -## Plot heatmaps of the phenograph cluster statistics - -The chunck below defines some convenience functions -```{r Helper functions} -get_summarydat <- function(ssdat, clusterdat, valuevar){ - # calculates summary statistics per cluster - summary_dat = ssdat[clusterdat][ ,list( - median_val = median(get(valuevar), na.rm=T), - mean_val = median(get(valuevar), na.rm=T), - cell_cluster=.N - ), by=.(channel,cluster)] - return(summary_dat) -} - -dat2mat <- function(data, formula, valuevar){ - # calculates a matrix suitable for a heatmap from the long data - hm_dat = dcast.data.table(data =data, formula =formula, - value.var = valuevar) - - trownames = hm_dat$cluster - - # convert to a matrix - hm_dat = as.matrix(hm_dat[,-1,with=F]) - row.names(hm_dat) = trownames - - return(hm_dat) -} - - -``` - -### Heatmap of the cluster medians - -For each phenograph cluster the cluster median is calculated and a heatmap is generated. -The all heatmap are clustered based on the uncompensated data in order to make them visually easily comparable. -The colorscale corresponds to the asinh(x/5) transformed count and is kept constant for all the plots to make them directly comparable. - -```{r} -firstclust = 1 -pdats = dats -par(cex.main = 1) -names(pdats) = c("Uncompensated", "Single-stained controls compensated", "CytoSpill compensated") -for (i in seq_along(pdats)){ - cur_name = names(pdats)[i] - hm_dat = pdats[[i]][channel %in% good_channels,] %>% - get_summarydat(clusterdat, 'counts_transf') %>% - dat2mat('cluster ~ channel','median_val') - - if (firstclust){ - firstclust=F - tdist = as.dist(1-cor(t(hm_dat), method="pearson")) - tdist[is.na(tdist)] = 0 - hr <- hclust(tdist, method="ward.D") - co_r <- order.optimal(tdist, hr$merge) - hr$merge = co_r$merge - hr$order = co_r$order - - tdist = as.dist(1-cor((hm_dat), method="pearson")) - tdist[is.na(tdist)] = 0 - hc <- hclust(tdist, method="ward.D") - co_c <- order.optimal(tdist, hc$merge) - hc$merge = co_c$merge - hc$order = co_c$order - } - if (any(hm_dat<0)){ - cols = rev(brewer.pal(11,'RdBu')) - cmap = colorRampPalette(cols) - - } else { - cols = rev(brewer.pal(11,'RdBu')) - cmap = colorRampPalette(cols[6:11]) - - } - - heatmap.2(hm_dat, - scale ='none', - trace = "none", - # col=cmap(75), - col=rev(brewer.pal(11,'RdBu')), - Rowv=as.dendrogram(hr), - #RowSideColors=sel_col_vector[group_levels], - #dendrogram='column', - Colv=as.dendrogram(hc), - density.info ='none', - #Colv =NA, - #keyorient=2, - #sepwidth = c(0,0), - cexRow=0.9, - cexCol=0.5, - margins=c(4,4), - main=cur_name - #ylab ='Genes', - #comments = data.frame(names = row.names(tab_Prot)) - ) -} - - -``` - -These heatmaps use arcsinh(x/5) scaled data. There is clearly some spillover vanishing upon compensation. After compensation the same agentibodies stained in the different channels look much more comparable. -With the 'classical' compensation a slight overcompensation is visible that is not visible an the NNLS compensation. - - -```{r} -sessionInfo() -``` - diff --git a/scripts/Levine32_analysis.Rmd b/scripts/Levine32_analysis.Rmd deleted file mode 100644 index 31a1d1f..0000000 --- a/scripts/Levine32_analysis.Rmd +++ /dev/null @@ -1,397 +0,0 @@ ---- -title: "Levine32_analysis" -author: "Qi Miao" -output: html_document ---- - -#Analysis of healthy human bone marrow data, from which generated Figure 6. - -```{r} -library(CytoSpill) -library(flowCore) -library(ggplot2) -library(Rtsne) -library(dplyr) -library(RColorBrewer) -library(reshape2) -library(Rphenograph) -``` - -#read data -```{r} -#read expression -data_Levine32 <- flowCore::exprs(flowCore::read.FCS("/Users/qmiao/CytoSpill copy/data/Levine_32dim_notransform.fcs",transformation = FALSE,truncate_max_range = FALSE)) -#remove negative values -data_Levine32[which(data_Levine32<0)] <- 0 - -#load metals used for each channel -load("/Users/qmiao/CytoSpill copy/data/Levine_32dim_colnames.RData") -col_names - -Levine_marker <- colnames(data_Levine32) -names(Levine_marker)[1:39] <- col_names - -colnames(data_Levine32)[1:39] <- col_names - -population_names <- c("Basophils", "CD16- NK cells", "CD16+ NK cells", "CD34+CD38+CD123-_HSPCs", - "CD34+CD38+CD123+_HSPCs", "CD34+CD38lo_HSCs", "CD4_T_cells", "CD8_T_cells", - "Mature_B_cells", "Monocytes", "pDCs", "Plasma_B_cells", "Pre_B_cells", "Pro_B_cells") - -``` - -###select channels used for compensation and analysis -```{r} -data_Levine32_temp <- data_Levine32[,c(5:36)] -#remove duplicates -duplicates_id <- duplicated(data_Levine32_temp) -data_Levine32_temp <- data_Levine32_temp[!duplicates_id,] -``` - -###use CytoSpill for compensation -```{r} -# Levine_results <- SpillComp(data = data_Levine32_temp, cols = 1:32, n = 50000) -set.seed(123) -Levine_results <- SpillComp(data = data_Levine32_temp, cols = 1:32, n = 20000, threshold = 0.1, flexrep = 5, neighbor = 1) -compensated_Levine32 <- Levine_results[[1]] -``` - -### add back population label -```{r} -##compensated exprs -compensated_Levine32_exprs <- as.data.frame(flowCore::exprs(compensated_Levine32)) -compensated_Levine32_exprs[,"label"] <- as.factor(data_Levine32[,"label"][!duplicates_id]) -##uncompensated exprs -data_Levine32_temp <- as.data.frame(data_Levine32_temp) -data_Levine32_temp[,"label"] <- as.factor(data_Levine32[,"label"][!duplicates_id]) -``` - -### downsample -```{r} - -# downsample for faster calculation, plotting -nsample = 20000 -# subsample -set.seed(123) -rowsample <- sample(nrow(data_Levine32_temp), nsample) -compensated_Levine32_exprs_downsample <- compensated_Levine32_exprs[rowsample,] -data_Levine32_temp_downsample <- data_Levine32_temp[rowsample,] - -#function to censor data, for clear heatmap -censor_dat <- function (x, a = 0.99){ - q = quantile(x, a) - x[x > q] = q - return(x) -} - -#function for arcsinh transform -transf <- function (x){asinh(x/5)} -``` - -#Run Rphenograph -```{r} -calculate_pheno <- function (data, cols, asinhtransfer = T){ - if (asinhtransfer <- T) { - data[,cols] <- transf(data[,cols]) - } - pheno_out <- Rphenograph::Rphenograph(data[,cols]) - cluster <- igraph::membership(pheno_out[[2]]) - return(cluster) -} - -uncompensated_pheno <- calculate_pheno(data_Levine32_temp_downsample, cols = 1:32) -compensated_pheno <- calculate_pheno(compensated_Levine32_exprs_downsample, cols = 1:32) -``` - -### calculate tsne maps on uncompensated data -```{r} -calculate_tsne <- function (data, cols, asinhtransfer = T, verbose = T, dims = 2, seed = 123){ - set.seed(seed) - tsne_dat <- data[,cols] - #asinh/5 transfer - if (asinhtransfer) {tsne_dat <- transf(tsne_dat)} - tsne_out <- Rtsne::Rtsne(tsne_dat, verbose = verbose, dims = dims) - return(tsne_out) -} - -#calculate based on uncompensated data -uncompensated_tsne = calculate_tsne(data_Levine32_temp_downsample, cols = 1:32) - -# Setup some colors for plotting -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) -``` - -###plot population label on uncompensated tsne -```{r} -levels(data_Levine32_temp_downsample$label) <- c(population_names, "NA") -tclust = data_Levine32_temp_downsample[,"label"] -# nonNA <- !(data_Levine32_temp_downsample[,"label"]=="NA") - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "gray85", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Levine uncompensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###plot Rphenograhp cluster on uncompensated tsne -```{r} -tclust = uncompensated_pheno - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Levine uncompensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###calculate tsne on compensated data -```{r} -compensated_tsne = calculate_tsne(compensated_Levine32_exprs_downsample, cols = 1:32) -``` -### plot population on compensated tsne -```{r} -levels(compensated_Levine32_exprs_downsample$label) <- c(population_names, "NA") -tclust = compensated_Levine32_exprs_downsample[,"label"] -# nonNA <- !(compensated_Levine32_exprs_downsample[,"label"]=="NA") - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "gray80", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Levine compensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###plot Rphenograhp cluster on compensated tsne -```{r} -tclust = compensated_pheno - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Levine compensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -### compensated pheno cluster on uncompensated tsne -```{r} -tclust = compensated_pheno - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Levine uncompensated data tsne with compensated Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` -### uncompensated marker density plots - -the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data. -```{r fig.height=6, fig.width=15} -pdat <- transf(data_Levine32_temp_downsample[,-33]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -### add colnames -for (i in seq_along(Levine_marker)){ - names(Levine_marker)[i] <- paste(names(Levine_marker)[i], Levine_marker[i], sep ='-') -} -dimnames(censor_pdat)[[2]] <- names(Levine_marker[c(5:36)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Uncomepensated Levine markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p -# ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Levine_uncompensated_marker.png", plot = p,width=15, height=6, dpi = 300) -``` - -#compensatd marker density plot -```{r fig.height=6, fig.width=15} -pdat <- transf(compensated_Levine32_exprs_downsample[,-33]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Levine_marker[c(5:36)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Levine markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p -# ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Levine_compensated_marker.png", plot = p,width=15, height=6, dpi = 300) -``` - -###compensated marker on compensated tsne plot -```{r fig.height=6, fig.width=15} -pdat <- transf(compensated_Levine32_exprs_downsample[,-33]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Levine_marker[c(5:36)]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- compensated_tsne$Y[,1] -censor_pdat$tsne_2 <- compensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Levine markers on compensated tsne")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p -# ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Levine_compensated_marker_on_compensated_tsne.png", plot = p,width=15, height=6, dpi = 300) -``` - -```{r} -write.FCS(flowFrame(as.matrix(data_Levine32_temp[,-33])), filename = "~/CytoSpill copy/data/flowSOM/data_Levine32_temp.fcs") -write.FCS(flowFrame(as.matrix(compensated_Levine32_exprs[,-33])), filename = "~/CytoSpill copy/data/flowSOM/compensated_Levine32_exprs.fcs") -``` - -```{r} -# save.image("~/CytoSpill copy/data/Levine32_analysis.Rdata") -sessionInfo() -``` - diff --git a/scripts/Samusik_analysis.Rmd b/scripts/Samusik_analysis.Rmd deleted file mode 100644 index 6869f88..0000000 --- a/scripts/Samusik_analysis.Rmd +++ /dev/null @@ -1,377 +0,0 @@ ---- -title: "Samusik_analysis" -author: "Qi Miao" -output: html_document ---- - -#Analysis of mouse bone marrow data. - -```{r} -library(CytoSpill) -library(flowCore) -library(ggplot2) -library(Rtsne) -library(dplyr) -library(RColorBrewer) -library(reshape2) -library(Rphenograph) -``` - -#read data -```{r} -#read expression -data_Samusik <- flowCore::exprs(flowCore::read.FCS("/Users/qmiao/CytoSpill copy/data/Samusik_01_notransform.fcs", transformation = FALSE, truncate_max_range = FALSE)) -#remove negative values if any -data_Samusik[which(data_Samusik<0)] <- 0 - -#load metals used for each channel -load("/Users/qmiao/CytoSpill copy/data/Samusik_colnames_population.Rdata") -col_names -levels(population) - -Samusik_marker <- colnames(data_Samusik) -names(Samusik_marker)[1:51] <- col_names - -colnames(data_Samusik)[1:51] <- col_names -``` - -###select channels used for compensation and analysis -```{r} -data_Samusik_temp <- data_Samusik[,9:47] -#remove duplicates -duplicates_id <- duplicated(data_Samusik_temp) -data_Samusik_temp <- data_Samusik_temp[!duplicates_id,] -``` - -###use CytoSpill for compensation -```{r} -#step by step code for detail investigation. -spillmat_results <- GetSpillMat(data = data_Samusik_temp, cols = 1:39, n = 20000, threshold = 0.1, flexrep = 5, neighbor = 1) -data <- data_Samusik_temp -cols <- 1:39 -spillmat <- spillmat_results[[1]] -cutoffs <- spillmat_results[[2]] -data_compensated <- t(apply(data[,cols], 1, function(row) nnls::nnls(t(spillmat), row)$x)) -data_colnames <- colnames(data) -data[,cols] <- data_compensated -colnames(data) <- data_colnames - -compensated_Samusik <- flowFrame(data) -``` - - -### add back population label -```{r} -##compensated exprs -compensated_Samusik_exprs <- as.data.frame(flowCore::exprs(compensated_Samusik)) -compensated_Samusik_exprs[,"label"] <- as.factor(data_Samusik[,"label"][!duplicates_id]) -##uncompensated exprs -data_Samusik_temp <- as.data.frame(data_Samusik_temp) -data_Samusik_temp[,"label"] <- as.factor(data_Samusik[,"label"][!duplicates_id]) -``` - -### downsample -```{r} - -# downsample for faster calculation, plotting -nsample = 20000 -# subsample -set.seed(123) -rowsample <- sample(nrow(data_Samusik_temp), nsample) -compensated_Samusik_exprs_downsample <- compensated_Samusik_exprs[rowsample,] -data_Samusik_temp_downsample <- data_Samusik_temp[rowsample,] - -#function to censor data, for clear heatmap -censor_dat <- function (x, a = 0.99){ - q = quantile(x, a) - x[x > q] = q - return(x) -} - -#function for arcsinh transform -transf <- function (x){asinh(x/5)} -``` - -#Run Rphenograph -```{r} -calculate_pheno <- function (data, cols, asinhtransfer = T){ - if (asinhtransfer <- T) { - data[,cols] <- transf(data[,cols]) - } - pheno_out <- Rphenograph::Rphenograph(data[,cols]) - cluster <- igraph::membership(pheno_out[[2]]) - return(cluster) -} -uncompensated_pheno <- calculate_pheno(data_Samusik_temp_downsample, cols = 1:39) -compensated_pheno <- calculate_pheno(compensated_Samusik_exprs_downsample, cols = 1:39) -``` - -### calculate tsne maps on uncompensated data -```{r} -calculate_tsne <- function (data, cols, asinhtransfer = T, verbose = T, dims = 2, seed = 123){ - set.seed(seed) - tsne_dat <- data[,cols] - #asinh/5 transfer - if (asinhtransfer) {tsne_dat <- transf(tsne_dat)} - tsne_out <- Rtsne::Rtsne(tsne_dat, verbose = verbose, dims = dims) - return(tsne_out) -} - -#calculate based on uncompensated data -uncompensated_tsne = calculate_tsne(data_Samusik_temp_downsample, cols = 1:39) - -# Setup some colors for plotting -qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',] -col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals))) -``` - - -###plot population label on uncompensated tsne -```{r fig.height=3, fig.width=5} -levels(data_Samusik_temp_downsample$label) <- c(levels(population), "NA") -tclust = data_Samusik_temp_downsample[,"label"] -# nonNA <- !(data_Samusik_temp_downsample[,"label"]=="NA") - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "gray80", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Samusik uncompensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###plot Rphenograhp cluster on uncompensated tsne -```{r} -tclust = uncompensated_pheno - -tsne_coor <- uncompensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Samusik uncompensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - - -###calculate tsne on compensated data -```{r} -compensated_tsne = calculate_tsne(compensated_Samusik_exprs_downsample, cols = 1:39) -``` - -### plot population on compensated tsne -```{r fig.height=3, fig.width=5} -levels(compensated_Samusik_exprs_downsample$label) <- c(levels(population), "NA") -tclust = compensated_Samusik_exprs_downsample[,"label"] -# nonNA <- !(compensated_Samusik_exprs_downsample[,"label"]=="NA") - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "gray80", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "cell type")+ - ggtitle('Samusik compensated data tsne')+ - guides(color=guide_legend(override.aes=list(size=5)))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -###plot Rphenograhp cluster on compensated tsne -```{r} -tclust = compensated_pheno - -tsne_coor <- compensated_tsne$Y -colnames(tsne_coor) <- c("tsne_1", "tsne_2") - -col_list <- c("#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", - "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", - "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", - "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", - "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", - "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00") - -p = ggplot(as.data.frame(tsne_coor), aes(x=tsne_1, y=tsne_2))+ - geom_point(size=0.3, alpha=0.8, aes(color=as.factor(tclust)))+ - scale_color_manual(values = col_list, name = "Phenograph cluster")+ - ggtitle('Samusik compensated data tsne with Phenograph clusters')+ - guides(color=guide_legend(override.aes=list(size=5),ncol=2))+ - theme(strip.background = element_blank(), - panel.background=element_rect(fill='white', colour = 'black'), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank(), - legend.key = element_blank()) -p -``` - -### uncompensated marker density plots - -the following plots the asinh(x/5) transformed intensities were normalized between 0-1 by using the 0.99 percentile of the data. -```{r fig.height=6, fig.width=15} -pdat <- transf(data_Samusik_temp_downsample[,-40]) -censor_pdat <- apply(pdat, 2, censor_dat) - -#normalize 0-1 -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -### add colnames -for (i in seq_along(Samusik_marker)){ - names(Samusik_marker)[i] <- paste(names(Samusik_marker)[i], Samusik_marker[i], sep ='-') -} -dimnames(censor_pdat)[[2]] <- names(Samusik_marker[9:47]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Uncomepensated Samusik markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p - #ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Samusik_uncompensated_marker.png", plot = p,width=15, height=6, dpi = 300) -``` - -#compensatd marker density plot -```{r fig.height=6, fig.width=15} -pdat <- transf(compensated_Samusik_exprs_downsample[,-40]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Samusik_marker[9:47]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- uncompensated_tsne$Y[,1] -censor_pdat$tsne_2 <- uncompensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Samusik markers")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p -#ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Samusik_compensated_marker.png", plot = p,width=15, height=6, dpi = 300) -``` - -#compensatd marker density plot on compensated tsne -```{r fig.height=6, fig.width=15} -pdat <- transf(compensated_Samusik_exprs_downsample[,-40]) -censor_pdat <- apply(pdat, 2, censor_dat) - -censor_pdat <- apply(censor_pdat, MARGIN = 2, FUN = function(X) (X - min(X))/diff(range(X))) - -dimnames(censor_pdat)[[2]] <- names(Samusik_marker[9:47]) - -censor_pdat <- as.data.frame(censor_pdat) -censor_pdat$tsne_1 <- compensated_tsne$Y[,1] -censor_pdat$tsne_2 <- compensated_tsne$Y[,2] - -pdat_melt <- reshape2::melt(censor_pdat, id.vars = c("tsne_1","tsne_2"), variable.name = "channel") - -p = ggplot(pdat_melt, aes(x=tsne_1, y=tsne_2, color=value))+ - facet_wrap(~channel, scales = "free", ncol = 8)+ - geom_point(alpha=0.5, size=0.3)+ - scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts', limits=c(0, 1))+ - ggtitle("Compensated Samusik markers on compensated tsne")+ - theme(strip.background = element_blank(), - strip.text.x = element_text(size = 11), - axis.line=element_blank(), - axis.text.x=element_blank(), - axis.text.y=element_blank(), - axis.ticks=element_blank(), - axis.title.x=element_blank(), - axis.title.y=element_blank(), - panel.background=element_blank(), - panel.border=element_blank(), - panel.grid.major=element_blank(), - panel.grid.minor=element_blank(), - plot.background=element_blank()) -p -#ggsave(filename = "/Users/qmiao/CytoSpill copy/scripts/plot/Samusik_compensated_marker_on_compensated_tsne.png", plot = p,width=15, height=6, dpi = 300) -``` - -```{r} -write.FCS(flowFrame(as.matrix(data_Samusik_temp[,-40])), filename = "~/CytoSpill copy/data/flowSOM/data_Samusik_temp.fcs") -write.FCS(flowFrame(as.matrix(compensated_Samusik_exprs[,-40])), filename = "~/CytoSpill copy/data/flowSOM/compensated_Samusik_exprs.fcs") -``` - -```{r} -# save.image("~/CytoSpill copy/data/Samusik_analysis.Rdata") -sessionInfo() -``` diff --git a/scripts/Supplementary1.R b/scripts/Supplementary1.R deleted file mode 100644 index 3843333..0000000 --- a/scripts/Supplementary1.R +++ /dev/null @@ -1,200 +0,0 @@ -#generate supplementary 1 results (based on reviewer 2 comment 3) -#simulation -r2c3_simulation_data <- function(){ - Nd142Di <- c(rnorm(10000, 200, 50), rep(0, 10000)) - Nd142Di[which(Nd142Di < 0)] <- 0 - - Nd144Di <- c(rnorm(10000, 300, 75), rep(0, 10000)) - Nd144Di[which(Nd144Di < 0)] <- 0 - - Nd145Di <- c(rnorm(10000, 400, 100), rep(0, 10000)) - Nd145Di[which(Nd145Di < 0)] <- 0 - - Nd143Di <- c(rep(0, 10000), rnorm(10000, 300, 75)) - Nd143Di[which(Nd143Di < 0)] <- 0 - - data_1 <- cbind(Nd142Di, Nd143Di, Nd144Di, Nd145Di) - return(data_1) -} - -set.seed(1) -data_channelwithvalue <- r2c3_simulation_data() -#different value -data_channelwithvalue_level2 <- data_channelwithvalue -data_channelwithvalue_level2[,2] <- data_channelwithvalue[,2]*0.5 - -data_channelwithvalue_level3 <- data_channelwithvalue -data_channelwithvalue_level3[,2] <- data_channelwithvalue[,2]*2 - -data_channelwithvalue_level4 <- data_channelwithvalue -data_channelwithvalue_level4[,2] <- data_channelwithvalue[,2]*0.2 - -#empty channel no value present -data_emptychannel <- data_channelwithvalue -data_emptychannel[,2] <- rep(0, 20000) - -#get_spill_cols from CATALYST package -get_spill_cols <- function(ms, mets, l=CATALYST::isotope_list) { - ms <- as.numeric(ms) - spill_cols <- vector("list", length(ms)) - for (i in seq_along(ms)) { - p1 <- m1 <- ox <- iso <- NULL - if ((ms[i] + 1) %in% ms) p1 <- which(ms == (ms[i] + 1)) - if ((ms[i] - 1) %in% ms) m1 <- which(ms == (ms[i] - 1)) - if ((ms[i] + 16) %in% ms) ox <- which(ms == (ms[i] + 16)) - iso <- l[[mets[i]]] - iso <- which(ms %in% iso[iso != ms[i]]) - spill_cols[[i]] <- unique(c(m1, p1, iso, ox)) - } - spill_cols -} - -spill_cols_data <- function(data){ - ##get ms and mets - chs <- colnames(data) - ####metal mass number like 167,*** - ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) - ####metal name - mets <- gsub("[[:digit:]]+Di", "", chs) - ##get spillover cols - spill_cols <- get_spill_cols(ms, mets) - return(spill_cols) -} - -#function generate a simulated spillover matrix -sm_simulation <- function(data, n, ...){ - ##get ms and mets - chs <- colnames(data) - ####metal mass number like 167,*** - ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) - ####metal name - mets <- gsub("[[:digit:]]+Di", "", chs) - ##get spillover cols - spill_cols <- get_spill_cols(ms, mets) - - sm <- vector("list",n) - for (i in 1:n){ - m = diag(length(spill_cols)) - for (j in seq_along(spill_cols)){ - for (k in spill_cols[[j]]){ - m[j,k] <- runif(1, min=0, max=0.1) - } - } - sm[[i]] <- m - } - return(sm) -} - -#function generate simulated data -data_simulation <- function(signal, sm, ...){ - data <- vector("list",length(sm)) - for (i in seq_along(sm)){ - data[[i]] = signal%*%sm[[i]] - colnames(data[[i]]) = colnames(signal) - } - return(data) -} - -sm_simulated <- sm_simulation(data = data_channelwithvalue, n = 10) -##simulated data with different settings -data_simulated_1 <- data_simulation(signal = data_channelwithvalue, sm = sm_simulated) -data_simulated_2 <- data_simulation(signal = data_emptychannel, sm = sm_simulated) -data_simulated_level2 <- data_simulation(signal = data_channelwithvalue_level2, sm = sm_simulated) -data_simulated_level3 <- data_simulation(signal = data_channelwithvalue_level3, sm = sm_simulated) -data_simulated_level4 <- data_simulation(signal = data_channelwithvalue_level4, sm = sm_simulated) - -estimate_sm <- function(data, spill_cols){ - my_sm = vector("list",length(data)) - my_sm_nmf = vector("list",length(data)) - for (i in seq_along(data)){ - cutoffs_sim <- .DeriveCutoffs(data=data[[i]], cols=c(1:ncol(data[[i]])), n = 20000, flexrep = 10) - threshold <- 0.1 - model <- .EstimateSpill(data=data[[i]], cutoffs=cutoffs_sim, cols=c(1:ncol(data[[i]])), upperbound = threshold, neighbor = 1) - estimates <- model[[1]] - xcols <- model[[2]] - spillmat <- diag(length(xcols)) - for (m in 1:length(xcols)) { - if (!is.na(xcols[[m]][1])) { - for (j in 1:length(xcols[[m]])) { - if (!is.na(estimates[[m]][j])){ - spillmat[xcols[[m]][j],m] <- min(estimates[[m]][j],threshold) - } - } - } - } - sm_sim <- spillmat - my_sm[[i]] <- sm_sim - } - return(my_sm) -} - -spill_cols <- spill_cols_data(data_channelwithvalue) -estimate_sm_withvalue <- estimate_sm(data_simulated_1, spill_cols) -estimate_sm_withoutvalue <- estimate_sm(data_simulated_2, spill_cols) -estimate_sm_withvalue_level2 <- estimate_sm(data_simulated_level2, spill_cols) -estimate_sm_withvalue_level3 <- estimate_sm(data_simulated_level3, spill_cols) -estimate_sm_withvalue_level4 <- estimate_sm(data_simulated_level4, spill_cols) - -channel2_comparison <- function(x, y, spill_cols){ - comparison = NULL - for (i in seq_along(x)){ - single <- NULL - for (j in c(1,3,4)){ - for (k in c(2)){ - single <- rbind(single,c(x[[i]][j,k],y[[i]][j,k])) - } - } - comparison <- rbind(comparison,single) - } - colnames(comparison) <- c('simulation','my_method') - return(comparison) -} - -withvalue_comparison <- channel2_comparison(sm_simulated, estimate_sm_withvalue, spill_cols) -withoutvalue_comparison <- channel2_comparison(sm_simulated, estimate_sm_withoutvalue, spill_cols) -withvaluevswithoutvalue <- channel2_comparison(estimate_sm_withvalue,estimate_sm_withoutvalue, spill_cols) - -withvalue_level2_comparison <- channel2_comparison(sm_simulated, estimate_sm_withvalue_level2, spill_cols) -withvalue_level3_comparison <- channel2_comparison(sm_simulated, estimate_sm_withvalue_level3, spill_cols) -withvalue_level4_comparison <- channel2_comparison(sm_simulated, estimate_sm_withvalue_level4, spill_cols) - - -ggplot(as.data.frame(withvalue_comparison), aes(x=my_method, y=simulation)) + - geom_point(size=0.1) + theme_classic() + xlab('Channel with signal estimated spillover effects') + - ylab('Simulated spillover effects') + coord_fixed() + geom_smooth() - -ggplot(as.data.frame(withoutvalue_comparison), aes(x=my_method, y=simulation)) + - geom_point(size=0.1) + theme_classic() + xlab('Channel without signal estimated spillover effects') + - ylab('Simulated spillover effects') + coord_fixed() + geom_smooth() - -# ggplot(as.data.frame(withvaluevswithoutvalue), aes(x=my_method, y=simulation)) + -# geom_point(size=0.1) + theme_classic() + xlab('Channel with signal estimated spillover effects') + -# ylab('Channel without signal estimated spillover effects') + coord_fixed() - -ggplot(as.data.frame(withvalue_level2_comparison), aes(x=my_method, y=simulation)) + - geom_point(size=0.1) + theme_classic() + xlab('Channel with signal level2 estimated spillover effects') + - ylab('Simulated spillover effects') + coord_fixed() + geom_smooth() - -ggplot(as.data.frame(withvalue_level3_comparison), aes(x=my_method, y=simulation)) + - geom_point(size=0.1) + theme_classic() + xlab('Channel with signal level3 estimated spillover effects') + - ylab('Simulated spillover effects') + coord_fixed() + geom_smooth() - -ggplot(as.data.frame(withvalue_level4_comparison), aes(x=my_method, y=simulation)) + - geom_point(size=0.1) + theme_classic() + xlab('Channel with signal level4 estimated spillover effects') + - ylab('Simulated spillover effects') + coord_fixed() + geom_smooth() - - -cor(withvalue_comparison[,1],withvalue_comparison[,2])^2 -#0.9940178 - -cor(withoutvalue_comparison[,1],withoutvalue_comparison[,2])^2 -#0.9940971 - -cor(withvalue_level2_comparison[,1],withvalue_comparison[,2])^2 -#0.9940178 - -cor(withvalue_level3_comparison[,1],withvalue_comparison[,2])^2 -#0.9940178 - -cor(withvalue_level4_comparison[,1],withvalue_comparison[,2])^2 -#0.9940178 diff --git a/scripts/adaptSpillmat_(remove check_sm).R b/scripts/adaptSpillmat_(remove check_sm).R deleted file mode 100755 index 8e4a23a..0000000 --- a/scripts/adaptSpillmat_(remove check_sm).R +++ /dev/null @@ -1,110 +0,0 @@ -#' @rdname adaptSpillmat -#' @title Adapt spillover matrix -#' -#' @description -#' This helper function adapts the columns of a provided spillover matrix -#' such that it is compatible with data having the column names provided. -#' -#' @param input_sm -#' a previously calculated spillover matrix. -#' @param out_chs -#' the column names that the prepared output spillover matrix should have. -#' Numeric names as well as names of the form MetalMass(Di), e.g. Ir191Di -#' or Ir191, will be interpreted as masses with associated metals. -#' @param isotope_list -#' named list. Used to validate the input spillover matrix. -#' Names should be metals; list elements numeric vectors of their isotopes. -#' See \code{\link{isotope_list}} for the list of isotopes used by default. -#' -#' @details -#' The rules how the spillover matrix is adapted -#' are explained in \code{\link{compCytof}}. -#' -#' @return -#' An adapted spillover matrix with column and row names -#' according to \code{out_chs}. -#' -#' @author -#' Helena Lucia Crowell \email{helena.crowell@uzh.ch} -#' and Vito RT Zanotelli \email{vito.zanotelli@uzh.ch} -#' -#' @examples -#' # get single-stained control samples -#' data(ss_exp) -#' # specify mass channels stained for -#' bc_ms <- c(139, 141:156, 158:176) -#' # debarcode -#' re <- assignPrelim(x = ss_exp, y = bc_ms) -#' re <- estCutoffs(x = re) -#' re <- applyCutoffs(x = re) -#' # estimate spillover matrix and adapt it -#' sm <- computeSpillmat(x = re) -#' chs <- flowCore::colnames(ss_exp) -#' adaptSpillmat(sm, chs) -#' -#' @importFrom flowCore flowFrame colnames exprs compensate -# ------------------------------------------------------------------------------ - -setMethod(f="adaptSpillmat", - signature=signature(input_sm="matrix", out_chs="vector"), - definition=function(input_sm, out_chs, - isotope_list=CATALYST::isotope_list) { - - # check the spillovermatrix for obvious errors - # .check_sm(input_sm, isotope_list) - # make it symmetric - #input_sm <- .make_symetric(input_sm) - # get the output names, metals and masses - n <- length(out_chs) - out_masses <- .get_ms_from_chs(out_chs) - out_metalchs <- out_chs[!is.na(out_masses)] - input_sm_chs_col <- colnames(input_sm) - input_sm_chs_row <- rownames(input_sm) - input_sm_chs <- unique(c(input_sm_chs_row, input_sm_chs_col)) - - # copy the existing spillover information into a new spillover matrix - sm <- matrix(diag(n), n, n, dimnames=list(out_chs, out_chs)) - sm_preexisting_col <- input_sm_chs_col[input_sm_chs_col %in% out_chs] - sm_preexisting_row <- input_sm_chs_row[input_sm_chs_row %in% out_chs] - sm[sm_preexisting_row, sm_preexisting_col] <- - input_sm[sm_preexisting_row, sm_preexisting_col] - - .warn_new_intearctions(out_metalchs, input_sm) - - # check for new channels - new_metalchs <- out_metalchs[!out_metalchs %in% input_sm_chs] - new_masses <- .get_ms_from_chs(new_metalchs) - - old_receiving_masses <- .get_ms_from_chs(input_sm_chs_col) - - test <- (length(new_metalchs) != 0) && - (any(inds <- old_receiving_masses %in% new_masses)) - if (test) { - # check if any new masses were already present in the old masses - # and add them to receive spillover according to the old masses - - # get the channels that correspond to the old_masses - # that have an aditional metal with the same weight - y_col <- input_sm_chs_col[inds] - names(y_col) <- as.character(old_receiving_masses[inds]) - # get all columns that are part of the affected masses - fil <- out_masses %in% old_receiving_masses[inds] - sm_col <- out_chs[fil] - sm_col_ms <- as.character(out_masses[fil]) - # add the spillover - old_rowchs <- out_chs[out_chs %in% input_sm_chs_row] - sm[old_rowchs, sm_col] <- input_sm[old_rowchs, y_col[sm_col_ms]] - for (m in unique(sm_col_ms)) { - mfil <- out_masses == m - # set the spillover between channels of the same mass to 0 - # otherwise the linear system can get singular - # diagonal elements will be set to 1 again later on - sm[mfil, mfil] <- 0 - } - } - - # assure diagonal is all 1 - diag(sm) <- 1 - return(sm) - } -) \ No newline at end of file diff --git a/scripts/default.profraw b/scripts/default.profraw deleted file mode 100644 index fb1b6c7..0000000 Binary files a/scripts/default.profraw and /dev/null differ diff --git a/scripts/helpers-compensation.R b/scripts/helpers-compensation.R deleted file mode 100755 index 3188171..0000000 --- a/scripts/helpers-compensation.R +++ /dev/null @@ -1,152 +0,0 @@ -# ============================================================================== -# get spillover columns -# ------------------------------------------------------------------------------ -.get_spill_cols <- function(ms, mets, l=CATALYST::isotope_list) { - ms <- as.numeric(ms) - spill_cols <- vector("list", length(ms)) - for (i in seq_along(ms)) { - p1 <- m1 <- ox <- iso <- NULL - if ((ms[i] + 1) %in% ms) p1 <- which(ms == (ms[i] + 1)) - if ((ms[i] - 1) %in% ms) m1 <- which(ms == (ms[i] - 1)) - if ((ms[i] + 16) %in% ms) ox <- which(ms == (ms[i] + 16)) - iso <- l[[mets[i]]] - iso <- which(ms %in% iso[iso != ms[i]]) - spill_cols[[i]] <- unique(c(m1, p1, iso, ox)) - } - spill_cols -} - -# ============================================================================== -# compute channel i to j spill -# ------------------------------------------------------------------------------ -.get_sij <- function(pos_i, neg_i, pos_j, neg_j, method, trim) { - if (length(neg_i) == 0) neg_i <- 0 - if (length(neg_j) == 0) neg_j <- 0 - if (method == "default") { - bg_j <- mean(neg_j, trim=.1) - bg_i <- mean(neg_i, trim=.1) - receiver <- pos_j - bg_j - spiller <- pos_i - bg_i - } else if (method == "classic") { - receiver <- mean(pos_j, trim) - mean(neg_j, trim) - spiller <- mean(pos_i, trim) - mean(neg_i, trim) - } else { - stop("'method = ", method, "' is not a valid option.") - } - receiver[receiver < 0] <- 0 - spiller [spiller < 0] <- 0 - sij <- receiver / spiller - sij[is.infinite(sij) | is.na(sij)] <- 0 - median(sij) -} - -# ============================================================================== -# make spillover matrix symmetrical -# ------------------------------------------------------------------------------ -.make_symetric <- function(x) { - x <- as.matrix(x) - dims <- dim(x) - i <- which.max(dim(x)) - nms <- dimnames(x)[[i]] - M <- matrix(0, dims[i], dims[i]) - rownames(M) <- colnames(M) <- nms - M[rownames(x), colnames(x)] <- as.matrix(x) - M -} - -# ============================================================================== -# check validity of input spillover matrix in compCytof() -# ------------------------------------------------------------------------------ -.check_sm <- function(sm, l=CATALYST::isotope_list) { - if (any(sm < 0)) - stop("\nThe supplied spillover matrix is invalid ", - "as it contains negative entries.\n", - "Valid spill values are non-negative and mustn't exceed 1.") - if (any(sm > 1)) - stop("\nThe supplied spillover matrix is invalid ", - "as it contains entries greater than 1.\n", - "Valid spill values are non-negative and mustn't exceed 1.") - cnames <- colnames(sm)[which(colnames(sm) %in% rownames(sm))] - sii <- sm[cbind(cnames, cnames)] - if (any(sii != 1)) - stop("\nThe supplied spillover matrix is invalid ", - "as its diagonal contains entries != 1.\n") - test <- all(rownames(sm) %in% colnames(sm)) - if (!test) - stop("\nThe supplied spillover matrix seems to be invalid.\n", - "All spill channels must appear as receiving channels:\n", - "'all(rownames(sm) %in% colnames(sm))' should return TRUE.") - isos <- paste0(gsub("[0-9]", "", names(unlist(l))), as.numeric(unlist(l))) - test <- vapply(dimnames(sm), function(chs) { - ms <- .get_ms_from_chs(chs) - mets <- .get_mets_from_chs(chs) - all(paste0(mets, ms) %in% isos) - }, logical(1)) - if (any(!test)) - stop("\nThe supplied spillover matrix seems to be invalid.\n", - "All isotopes should appear in `", deparse(substitute(l)), "`.") - sm[, colSums(sm) != 0] -} - -# ============================================================================== -# Helper functions to get mass and metal from a channel name -# ------------------------------------------------------------------------------ -.get_ms_from_chs <- function(chs) { - gsub("[[:punct:][:alpha:]]", "", chs) -} -.get_mets_from_chs <- function(chs) { - gsub("([[:punct:]]*)([[:digit:]]*)(Di)*", "", chs) -} - -# ============================================================================== -# This function compares a list of spillover channels to -# a provided spillover matrix and provides warnings for cases -# where the spillovermatrix has no information about a potential -# expected spillover among the new channels. -# ------------------------------------------------------------------------------ -.warn_new_intearctions <- function(chs_new, sm) { - chs_emitting <- rownames(sm) - chs_receiving <- colnames(sm) - chs <- list(chs_new, chs_emitting, chs_receiving) - chs <- stats::setNames(chs, c("new", "emitting", "receiving")) - - # get the metals and masses from the names - mets <- lapply(chs, .get_mets_from_chs) - ms <- lapply(chs, .get_ms_from_chs) - - # get the potential mass channels a channel could cause spillover in - spill_cols <- .get_spill_cols(ms$new, mets$new) - - first <- TRUE - for (i in order(ms$new)) { - # check if the provided spillovermatrix had the - # current channel measured as a spill emitter - is_new_emitting <- !chs$new[i] %in% chs$emitting - cur_spillms <- ms$new[spill_cols[[i]]] - - if (is_new_emitting) { - # if channel was never measured, no information is present - # for spill in any of the potential spill receiver channels - mass_new_rec <- cur_spillms - } else { - # if it has been measured, no information is only present - # for the channels which were not considered spill receivers - # in the spillover matrix provided - mass_new_rec <- cur_spillms[!cur_spillms %in% ms$receiving] - } - - if (length(mass_new_rec) > 0) { - if (first) { - message("WARNING: ", - "Compensation is likely to be inaccurate.\n", - " ", - "Spill values for the following interactions\n", - " ", - "have not been estimated:") - first <- FALSE - } - message(chs$new[i], " -> ", paste( - chs$new[ms$new %in% mass_new_rec], collapse=", ")) - } - } -} \ No newline at end of file diff --git a/scripts/simulation.Rmd b/scripts/simulation.Rmd deleted file mode 100644 index 5d2204e..0000000 --- a/scripts/simulation.Rmd +++ /dev/null @@ -1,345 +0,0 @@ ---- -title: "simulation" -author: "Qi Miao" -output: html_document ---- - -###simulation results -###Figure 2 - -```{r global_options, include=FALSE} -knitr::opts_chunk$set(echo=FALSE, warning=FALSE, message=FALSE) -``` - -###get the compensated data from CATALYST package as the true expression base for the simulation study -```{r} -colnames(comped_nnls_exprs) -#the compensated data flowframe: comped_nnls, exprs: comped_nnls_exprs - -cols_metal <- c("Pr141Di", "Nd142Di", "Nd143Di", "Nd144Di", "Nd145Di", "Nd146Di", "Sm147Di", -"Nd148Di", "Sm149Di", "Nd150Di", "Eu151Di", "Sm152Di", "Eu153Di", "Sm154Di", "Gd155Di", -"Gd156Di", "Gd158Di", "Tb159Di", "Gd160Di", "Dy161Di") - -``` - -###simulate the spillover matrix - -###some helper functions -```{r} -#get_spill_cols from CATALYST package -get_spill_cols <- function(ms, mets, l=CATALYST::isotope_list) { - ms <- as.numeric(ms) - spill_cols <- vector("list", length(ms)) - for (i in seq_along(ms)) { - p1 <- m1 <- ox <- iso <- NULL - if ((ms[i] + 1) %in% ms) p1 <- which(ms == (ms[i] + 1)) - if ((ms[i] - 1) %in% ms) m1 <- which(ms == (ms[i] - 1)) - if ((ms[i] + 16) %in% ms) ox <- which(ms == (ms[i] + 16)) - iso <- l[[mets[i]]] - iso <- which(ms %in% iso[iso != ms[i]]) - spill_cols[[i]] <- unique(c(m1, p1, iso, ox)) - } - spill_cols -} - -spill_cols_data <- function(data){ - ##get ms and mets - chs <- colnames(data) - ####metal mass number like 167,*** - ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) - ####metal name - mets <- gsub("[[:digit:]]+Di", "", chs) - ##get spillover cols - spill_cols <- get_spill_cols(ms, mets) - return(spill_cols) -} - -#function generate a simulated spillover matrix -sm_simulation <- function(data, n, ...){ - ##get ms and mets - chs <- colnames(data) - ####metal mass number like 167,*** - ms <- as.numeric(regmatches(chs, gregexpr("[0-9]+", chs))) - ####metal name - mets <- gsub("[[:digit:]]+Di", "", chs) - ##get spillover cols - spill_cols <- get_spill_cols(ms, mets) - - sm <- vector("list",n) - for (i in 1:n){ - m = diag(length(spill_cols)) - for (j in seq_along(spill_cols)){ - for (k in spill_cols[[j]]){ - m[j,k] <- runif(1, min=0, max=0.1) - } - } - sm[[i]] <- m - } - return(sm) -} - -#function generate simulated data -data_simulation <- function(signal, sm, ...){ - data <- vector("list",length(sm)) - for (i in seq_along(sm)){ - data[[i]] = signal%*%sm[[i]] - colnames(data[[i]]) = colnames(signal) - } - return(data) -} - -simulate_ture_signal <- function(){ - set.seed(1) - # single modal channel - single <- sort(sample(c(1:20), 10)) - double <- c(1:20)[-single] - mean_single <- runif(10, min = 100, max = 600) - mean_double1 <- runif(10, min = 100, max = 600) - mean_double2 <- runif(10, min = 100, max = 600) - - data <- matrix(nrow = 100000, ncol = 20) - - j = 1 - for (i in single){ - data[,i] <- sample(c(rnorm(60000, mean = mean_single[j], sd = mean_single[j]/5), rep(0, 40000)), 100000) - j = j + 1 - } - - j = 1 - for (i in double){ - data[,i] <- sample(c(rnorm(30000, mean = mean_double1[j], sd = mean_double1[j]/5), rnorm(30000, mean = mean_double2[j], sd = mean_double2[j]/5), rep(0, 40000)), 100000) - j = j + 1 - } - - colnames(data) <- cols_metal - return(data) -} - -``` -###NMF helper functions -```{r} -library(tensorflow) -tf$compat$v1$disable_v2_behavior() - -get_H_mask <- function(spill_cols){ - n_cols <- length(spill_cols) - H_mask <- matrix(0, n_cols, n_cols) - for (i in 1:n_cols){ - if (!is.null(spill_cols[[i]])){ - for (j in spill_cols[[i]]){ - H_mask[i,j] = 1 - } - } - } - return(H_mask) -} - - -get_A_mask <- function(data, cutoffs){ - A_mask <- matrix(0, dim(data)[1], dim(data)[2]) - for (i in 1:dim(data)[2]){ - A_mask[which(data[,i] < cutoffs[i]), i] = 1 - A_mask[which(data[,i] == 0), i] = 0 - } - return(A_mask) -} - -train_NMF <- function(data, spill_cols, cutoffs){ - #down sampling if necessary - # data <- data[sample(nrow(data),5000),] - tf$compat$v1$disable_v2_behavior() - - #get masks for A and H - A_mask <- get_A_mask(data, cutoffs) - H_mask <- get_H_mask(spill_cols) - - A = tf$constant(data) - shape = dim(data) - - #initializing W,H - # temp_H = matrix(rnorm(shape[2]*shape[2]),ncol=shape[2]) - # temp_H = temp_H/max(temp_H) - # - # temp_W = matrix(rnorm(shape[1]*shape[2]),ncol=shape[2]) - # temp_W = temp_W/max(temp_W) - - # temp_H = matrix(0.05, shape[2], shape[2]) * H_mask - temp_H = matrix(runif(shape[2]*shape[2],min=0,max=0.1), shape[2], shape[2]) * H_mask - temp_W = data - data*A_mask - - # W = tf$Variable(temp_W) - W = tf$constant(temp_W) - H = tf$Variable(temp_H) - - A_mask = tf$constant(A_mask) - H_mask = tf$constant(H_mask) - H_masked = tf$multiply(H,H_mask) - - WH = tf$matmul(W, H_masked) - # WH = tf$matmul(W, H) - #cost of Frobenius norm - # cost = tf$reduce_sum(tf$pow(tf$multiply(A,A_mask) - tf$multiply(WH,A_mask) - tf$multiply(W,A_mask), 2)) - cost = tf$reduce_sum(tf$multiply(tf$pow(A - WH - W, 2),A_mask)) - # cost = tf$reduce_sum(tf$norm(tf$multiply(A,A_mask) - tf$multiply(WH,A_mask))) - # Learning rate - lr = 0.001 - # Number of steps - steps = 1000 - # train_step = tf$train$GradientDescentOptimizer(lr)$minimize(cost) - train_step = tf$compat$v1$train$GradientDescentOptimizer(lr)$minimize(cost) - - # Clipping operation. This ensure that W and H learnt are non-negative - # clip_W = W$assign(tf$maximum(tf$zeros_like(W), W)) - clip_H1 = H$assign(tf$maximum(tf$zeros_like(H), H)) - H_clip2 <- matrix(0.1, shape[2], shape[2]) - clip_H2 = H$assign(tf$minimum(H_clip2, H)) - # clip = tf$group(clip_W, clip_H) - # clip = tf$group(clip_W,clip_H1, clip_H2) - clip = tf$group(clip_H1, clip_H2) - - # Launch the graph and initialize the variables. - sess = tf$compat$v1$Session() - sess$run(tf$compat$v1$global_variables_initializer()) - - for (step in 1:steps) { - sess$run(train_step) - sess$run(clip) - if (step %% 100 == 0){ - cat("\nCost:",sess$run(cost), "\n") - print("********************************") - } - } - # - # learnt_W = sess$run(W) - # learnt_H = sess$run(H) - # learnt_H_masked = learnt_H*get_H_mask(spill_cols) - learnt_H_masked = sess$run(H_masked) - return(learnt_H_masked) -} -``` - -###data simulation -```{r} -set.seed(1) - -true_signal <- simulate_ture_signal() -sm_simulated <- sm_simulation(data = true_signal, n = 20) -data_simulated <- data_simulation(signal = true_signal, sm = sm_simulated) - -``` - -###get the estimated spillover matrix based on simulated data -```{r} -library(CytoSpill) - -estimate_sm <- function(data, spill_cols){ - my_sm = vector("list",length(data)) - my_sm_nmf = vector("list",length(data)) - - for (i in seq_along(data)){ - cutoffs_sim <- .DeriveCutoffs(data=data[[i]], cols=c(1:ncol(data[[i]])), n = 10000, flexrep = 5) - threshold <- 0.1 - model <- .EstimateSpill(data=data[[i]], cutoffs=cutoffs_sim, cols=c(1:ncol(data[[i]])), upperbound = threshold, neighbor = 1) - estimates <- model[[1]] - xcols <- model[[2]] - spillmat <- diag(length(xcols)) - for (m in 1:length(xcols)) { - if (!is.na(xcols[[m]][1])) { - for (j in 1:length(xcols[[m]])) { - if (!is.na(estimates[[m]][j])){ - spillmat[xcols[[m]][j],m] <- min(estimates[[m]][j],threshold) - } - } - } - } - sm_sim <- spillmat - my_sm[[i]] <- sm_sim - - nmf_sim <- train_NMF(data[[i]], spill_cols, cutoffs = cutoffs_sim) - my_sm_nmf[[i]] <- nmf_sim - } - return(list(my_sm, my_sm_nmf)) -} - -# estimate_sm_nmf <- function(data, spill_cols) { -# my_sm_nmf = vector("list",length(data)) -# for (i in seq_along(data)){ -# cutoffs_sim <- .DeriveCutoffs(data=data[[i]], cols=c(1:ncol(data[[i]])), n = 5000) -# nmf_sim <- train_NMF(data[[i]], spill_cols, cutoffs = cutoffs_sim) -# my_sm_nmf[[i]] <- nmf_sim -# } -# return(my_sm_nmf) -# } - -set.seed(1) -estimate_sm_simulated <- estimate_sm(data = data_simulated, spill_cols = spill_cols_data(true_signal)) - -##simulated_results.Rdata - -``` - -```{r} -#number of simulated elements in each spillover matrix -spill_cols <- spill_cols_data(true_signal) -sum(sapply(spill_cols, length)) -``` - -```{r} -#trucate nmf estimated spillover effects with >0.1 or <0 value, also add diagnal to make spillover matrix, comparable to sqp results -nmf_estimate_sm <- estimate_sm_simulated[[2]] -for (i in seq_along(nmf_estimate_sm)){ - nmf_estimate_sm[[i]][which(nmf_estimate_sm[[i]]<0)] <- 0 - nmf_estimate_sm[[i]][which(nmf_estimate_sm[[i]]>0.1)] <- 0.1 - nmf_estimate_sm[[i]] <- nmf_estimate_sm[[i]] + diag(20) -} -``` - - - - -### Get comparison data -```{r} -library(ggplot2) -sm_comparison <- function(x, y, spill_cols){ - # errorlog = NULL - comparison = NULL - # errorpoint = NULL - for (i in seq_along(x)){ - single <- NULL - # errorcount = 0 - # errorcount2 = 0 - for (j in seq_along(spill_cols)){ - for (k in spill_cols[[j]]){ - single <- rbind(single,c(x[[i]][j,k],y[[i]][j,k])) - # if (y[[i]][j,k]>0.049 & y[[i]][j,k]<0.05 ) { - # errorcount = errorcount+1 - # errorpoint = rbind(errorpoint, c(j,k)) - # } - # if (y[[i]][j,k]==0) errorcount2 = errorcount2+1 - } - } - # errorlog <- rbind(errorlog,c(errorcount,errorcount2)) - comparison <- rbind(comparison,single) - } - colnames(comparison) <- c('simulation','my_method') - # return(list(comparison,errorlog,errorpoint)) - return(comparison) -} - -slsqp_comparison <- sm_comparison(sm_simulated,estimate_sm_simulated[[1]],spill_cols = spill_cols) -nmf_comparison <- sm_comparison(sm_simulated,nmf_estimate_sm,spill_cols = spill_cols) - -ggplot(as.data.frame(slsqp_comparison), aes(x=my_method, y=simulation)) + geom_point(size=0.1) + theme_classic() + xlab('Sequential quadratic programming estimated spillover effects') + ylab('Simulated spillover effects') + coord_fixed() - -ggplot(as.data.frame(nmf_comparison), aes(x=my_method, y=simulation)) + geom_point(size=0.1) + theme_classic() + xlab('NMF estimated spillover effects') + ylab('Simulated spillover effects') + coord_fixed() - -``` -```{r} - -cor(slsqp_comparison[,1],slsqp_comparison[,2])^2 -cor(nmf_comparison[,1],nmf_comparison[,2])^2 -``` - -```{r} -sessionInfo() -```