Skip to content

Latest commit

 

History

History
121 lines (114 loc) · 4.86 KB

README.org

File metadata and controls

121 lines (114 loc) · 4.86 KB

miBrain

Software for sequencing data for manuscript.

Install

Use Anaconda to create an environment

mamba env create --file=mibrain.yml

Then install

pip install .
make install

Generating DEG sheets

library(miBrain)
seq.dir="~/data/seq" ## Directory with batchName/sampleName/featureCounts inside
if (!dir.exists("miBrain")) { dir.create("miBrain") } ## output directory
se = bulk_aggregate_counts(system.file("extdata", "miBrain_SampleSheet.csv", package="miBrain"), seq.dir)
sel = bulk_aggregate_comparison(se, system.file("extdata", "miBrain_comparison.csv", package="miBrain"), logTMM=FALSE)
lapply(sel, function(obj) { 
    if (obj@metadata$comparison %in% c("Pericyte", "BMEC")) {
	  ### lower depth, use LRT+lower CPM cutoff to allow for more hypotheses
	  deg_dump(obj, "miBrain", cpm.cutoff=10, method="LRT") 
    } else {
	  deg_dump(obj, "miBrain") 
    }
})

Generating CellphoneDB data

For the same structure as before, with XLSX sheets in miBrain directory:

library(miBrain)
if (!dir.exists("miBrain/cpdb")) {
    dir.create("miBrain/cpdb", recursive=TRUE)
}
dl = lapply(list.files("miBrain", pattern="xlsx$", full.names=TRUE), deg_load)
names(dl) = sapply(dl, function(obj) { obj@metadata$comparison })
degl = dplyr::bind_rows(lapply(dl, function(obj) { obj@metadata$deg }))
colnames(degl) = gsub("^case$", "cluster", colnames(degl))
del = do.call(SummarizedExperiment::cbind, dl)
write.table(as.data.frame(SummarizedExperiment::assays(del)$counts), "miBrain/cpdb/counts.tsv", sep="\t", quote=FALSE)
write.table(as.data.frame(SummarizedExperiment::assays(del)$TMM), "miBrain/cpdb/tmm.tsv", sep="\t", quote=FALSE)
write.table(data.frame(Cell=colnames(del), cell_type=del$group), "miBrain/cpdb/meta.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.05) & (degl$log2FC > 0), c("cluster", "gene")], "miBrain/cpdb/deg_FDR005_log2FC0.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.1) & (degl$log2FC > 0), c("cluster", "gene")], "miBrain/cpdb/deg_FDR010_log2FC0.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.05) & (degl$log2FC > 0.5), c("cluster", "gene")], "miBrain/cpdb/deg_FDR005_log2FC05.tsv", sep="\t", quote=FALSE, row.names=FALSE)
write.table(degl[(degl$FDR < 0.1) & (degl$log2FC > 0.5), c("cluster", "gene")], "miBrain/cpdb/deg_FDR010_log2FC05.tsv", sep="\t", quote=FALSE, row.names=FALSE)

Then, to run cellphoneDB:

import blah

Plots

PCA plots

library(miBrain)
set_ht_opt(2)
dl = lapply(list.files("miBrain", pattern="^[A-Za-z]+[.]xlsx$", full.names=TRUE), deg_load)
sapply(dl, function(se) {
    ct = S4Vectors::metadata(se)$comparison
    se = se_tmm(se, log=TRUE)
    cd = as.data.frame(SummarizedExperiment::colData(se))
    mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
    cd$group = gsub("^mono ", "", cd$group)
    cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
    se$group = cd$group
    se = se[,order(se$group)]
    g = PlotPCA(se)
    saveGGplot(g, paste0("miBrain/pca/", ct), w=3, h=3)
    return(g)
})

Heatmaps

Top 50 DEGs

library(miBrain)
set_ht_opt(2)
dl = lapply(list.files("miBrain", pattern="^[A-Za-z]+[.]xlsx$", full.names=TRUE), deg_load)
sapply(dl, function(se) {
    ct = S4Vectors::metadata(se)$comparison
    column_title = paste0("Top DEGs in ", ct)
    se = se_tmm(se, log=TRUE)
    cd = as.data.frame(SummarizedExperiment::colData(se))
    mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
    cd$group = gsub("^mono ", "", cd$group)
    cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
    se$group = cd$group
    se = se[,order(se$group)]
    H = gene_heatmap(se, column_title=gsub("[.][0-9]+$", "", column_title))
    saveHeatmap(H, paste0("miBrain/heatmaps_deg/", ct))
    g = PlotPCA(se)
    saveGGplot(g, paste0("miBrain/pca/", ct), w=3, h=3)
    return(g)
})

Named DEGs

library(miBrain)
set_ht_opt(2)
df = xl_parse_gene_list(system.file("extdata", "heatmap_genes.xlsx", package="miBrain")) ## load some gene lists
sapply(split(df, df$go_name), function(gf) {
  ct = unique(gf$celltype)
  column_title=head(gf$go_name, 1)
  se = se_tmm(dl[[ct]], log=TRUE)
  cd = as.data.frame(SummarizedExperiment::colData(se))
  mono = setdiff(1:nrow(cd), grep("miBrain", cd$group))
  cd$group = gsub("^mono ", "", cd$group)
  cd$group[mono] = paste0("monoculture ", cd$group[-grep("miBrain", cd$group)])
  se$group = cd$group
  se = se[,order(se$group)]
  H = gene_heatmap(se, gene=gf$gene, column_title=gsub("[.][0-9]+$", "", column_title))
  saveHeatmap(H, paste0("miBrain/heatmaps_go/", column_title))
})