Skip to content

Commit

Permalink
Adding support for CGC w/ Automation (#8)
Browse files Browse the repository at this point in the history
* Artifact GHA

* Secondary dockerfile for CGC

* Add build context

* Align dockerfiles more closely

* Contribed example

* Missing dependency

* Adding CGC cwl file

* Update cloudApp file

* Update cgc/netbid.cwl

Co-authored-by: Andrew Thrasher <[email protected]>

* Remove unnessacary files

* Update Dockerfile.cgc
PR feedback from @adthrasher

* Adding GH Container Registry tags to automation

* Automated docker tag (no longer use latest)

* Update secret name

Co-authored-by: Andrew Thrasher <[email protected]>

Co-authored-by: Andrew Thrasher <[email protected]>
  • Loading branch information
jordan-rash and adthrasher authored Sep 14, 2021
1 parent 189618e commit 38fa69b
Show file tree
Hide file tree
Showing 7 changed files with 360 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.git/
demo_scripts/
docs/
inst/
man/

29 changes: 29 additions & 0 deletions .github/workflows/cgc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Deploy to CGC
on:
release:
types:
- released
- edited
jobs:
cgc:
runs-on: ubuntu-20.04
name: Deploy
steps:
- uses: actions/checkout@v2
- name: Set the docker tag from Repo Tag
id: set_dockertag
env:
IMAGE: cgc-images.sbgenomics.com/stjude/netbid
VERSION_TAG: ${{ github.event.release.tag_name }}
run: |
jq --arg image "${{ env.IMAGE }}:${{ env.VERSION_TAG }}" '(.requirements | .[] | select(.class == ("DockerRequirement")) | .dockerPull) |= $image' cgc/netbid.cwl > cgc/netbid.cwl.new
mv cgc/netbid.cwl.new cgc/netbid.cwl
cat cgc/netbid.cwl
- id: cgcdeploy
if: ${{ !env.ACT }}
uses: jordan-rash/[email protected]
with:
file_location: cgc/netbid.cwl
shortid: stjude/netbid/netbid
env:
CGC_TOKEN: ${{ secrets.CGC_TOKEN }}
59 changes: 59 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: Package
on:
release:
types: [released, edited]

jobs:
push_to_registry:
name: Push Docker image to GitHub Packages
runs-on: ubuntu-latest
steps:
-
name: Check out the repo
uses: actions/checkout@v2
-
name: Determine Short SHA
run: echo ${{ github.sha }} | tail -c 8 | (read; echo SHORT_SHA=$REPLY) >> $GITHUB_ENV
-
name: Sanitize Repo Name for Tagging
run: echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]' | (read; echo REPO_LOWER=$REPLY) >> $GITHUB_ENV
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
-
name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ secrets.GH_USERNAME }}
password: ${{ secrets.GH_TOKEN }}
-
name: Login to Cancer Genomics Cloud Container Registry
uses: docker/login-action@v1
with:
registry: cgc-images.sbgenomics.com
username: ${{ secrets.CGC_USERNAME }}
password: ${{ secrets.CGC_TOKEN }}
-
name: Build and push
id: docker_build
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile.cgc
push: true
tags: |
ghcr.io/${{ env.REPO_LOWER }}:latest
ghcr.io/${{ env.REPO_LOWER }}:${{ env.SHORT_SHA }}
ghcr.io/${{ env.REPO_LOWER }}:${{ github.event.release.tag_name }}
cgc-images.sbgenomics.com/stjude/netbid:latest
cgc-images.sbgenomics.com/stjude/netbid:${{ env.SHORT_SHA }}
cgc-images.sbgenomics.com/stjude/netbid:${{ github.event.release.tag_name }}
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
-
name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
.Rhistory
.RData
.Ruserdata
.DS_Store
32 changes: 32 additions & 0 deletions Dockerfile.cgc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM r-base

RUN apt-get update && apt-get install -y \
build-essential \
libcurl4-gnutls-dev \
libxml2-dev \
libssl-dev \
libgit2-dev \
libcairo2-dev \
libxt-dev \
xvfb \
pandoc \
xauth \
xfonts-base \
libssl1.1 \
&& rm -rf /var/lib/apt/lists/*

# install R packages available in CRAN
RUN R -e 'install.packages(c("optparse", "dplyr", "devtools", "BiocManager", "plot3D"))'

# install R packages via Bioconductor
RUN R -e 'BiocManager::install(c("biomaRt", "GSVA", "rhdf5", "ComplexHeatmap", "ConsensusClusterPlus", "DESeq2", "tximport", "impute", "limma", "GEOquery"))'

# install NetBID2 from GitHub master branch
RUN xvfb-run R -e 'devtools::install_github("jyyulab/NetBID", ref="master", dependencies="Depends")'

WORKDIR /data
COPY IBC_CCDI .

ENTRYPOINT ["Rscript"]
CMD ["cloudAppNetBID.R"]
#CMD ["-q", "-e", "packageVersion(\"NetBID2\")"]
132 changes: 132 additions & 0 deletions IBC_CCDI/cloudAppNetBID.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#################
##[email protected]
##2021-07-26
#################
library(NetBID2)
library(optparse)
####input data required####

option_list = list(
make_option(
c("-e", "--expression-set"),
type="character",
default=NULL,
help="File containing gene expression data.",
metavar="character"
),
make_option(
c("-t", "--tf-set"),
type="character",
default=NULL,
help="Filename of the transcription factor network from SJARACNe.",
metavar="character"
),
make_option(
c("-s", "--sig-set"),
type="character",
default=NULL,
help="Filename of SIG network from SJARACNe",
metavar="character"
),
make_option(
c("-m", "--metadata"),
type="character",
default=NULL,
help="Filename of metadata describing samples",
metavar="character"
),
make_option(
c("-p", "--project"),
type="character",
default="project",
help="Output project name",
metavar="character"
)
);

opt_parser = OptionParser(prog = "cloudAppNetBID.R",
description = "Analyze an expression set with NetBID.",
option_list = option_list);
opt = parse_args(opt_parser);

if(is.null(opt) || is.null(opt$`expression-set`) || is.null(opt$`tf-set`) || is.null(opt$`sig-set`) || is.null(opt$`metadata`)) {
print_help(opt_parser)
q(status=1)
}

exp_mat_path <- opt$`expression-set` # path to expression matrix first column with unique gene name/probeID
pd_path<- opt$`metadata` # path to metadata file
network.tf_path <- opt$`tf-set` # path to TF network by SJARACNe
network.sig_path <- opt$`sig-set` # path to SIG network by SJARACNe

####input data optional####
outdir <- "./" # path of output directory
project_name <- opt$`project` # user define or default

# Reload data into R workspace, and saves it locally under db/ directory with specified species name and analysis level.
db.preload(use_level='gene',use_spe='human',update=FALSE) #default use gene levle and human species

#####step0 load data####
exp_mat<-read.csv(exp_mat_path,row.names = 1)
pd<-read.csv(pd_path)
rownames(pd)<-pd$sampleID
cal.eset<-generate.eset(exp_mat = exp_mat,phenotype_info = pd)

analysis.par <- NetBID.analysis.dir.create(project_main_dir=outdir, project_name=paste("NetBID_", project_name, sep=""), tf.network.file = network.tf_path, sig.network.file = network.sig_path)

analysis.par$cal.eset <- cal.eset # add expression eset to analysis.par
NetBID.saveRData(analysis.par=analysis.par,step='exp-load')

####step1. build network####
# Get network information
analysis.par$tf.network <- get.SJAracne.network(network_file=analysis.par$tf.network.file)
analysis.par$sig.network <- get.SJAracne.network(network_file=analysis.par$sig.network.file)

# Merge network first
analysis.par$merge.network <- merge_TF_SIG.network(TF_network=analysis.par$tf.network,SIG_network=analysis.par$sig.network)

####step2. calculate activity####
# Get activity matrix
ac_mat <- cal.Activity(target_list=analysis.par$merge.network$target_list,cal_mat=exprs(analysis.par$cal.eset),es.method='weightedmean')

# Create eset using activity matrix
analysis.par$ac.eset <- generate.eset(exp_mat=ac_mat,phenotype_info=pData(analysis.par$cal.eset)[colnames(ac_mat),],feature_info=NULL)

###step3. Get differential expression (DE) / differential activity (DA) for drivers####

# Create empty list to store comparison result
analysis.par$DE <- list()
analysis.par$DA <- list()

# the comparison group
pd<-pData(analysis.par$cal.eset)
levels<-as.character(unique(pd$comparison))

g1_name<-levels[1];g0_name<-levels[2];comp_name<-sprintf("%s.Vs.%s",g1_name,g0_name)

G1<-rownames(pd)[which(pd$comparison==g1_name)];G0<-rownames(pd)[which(pd$comparison==g0_name)]

DE_gene_limma <- getDE.limma.2G(eset=analysis.par$cal.eset,G1=G1,G0=G0,G1_name=g1_name,G0_name=g0_name)
DA_driver_limma <- getDE.limma.2G(eset=analysis.par$ac.eset,G1=G1,G0=G0,G1_name=g1_name,G0_name=g0_name)

# Save comparison result to list element in analysis.par, with comparison name
analysis.par$DE[[comp_name]] <- DE_gene_limma
analysis.par$DA[[comp_name]] <- DA_driver_limma

####step4. generate master table####
# Get all comparison names
all_comp <- names(analysis.par$DE)

analysis.par$final_ms_tab <- generate.masterTable(use_comp=all_comp,DE=analysis.par$DE,DA=analysis.par$DA,
target_list=analysis.par$merge.network$target_list,
tf_sigs=tf_sigs,z_col='Z-statistics',display_col=c('logFC','P.Value'),
main_id_type='external_gene_name')

out_file <- sprintf('%s/%s_ms_tab.xlsx',analysis.par$out.dir.DATA,analysis.par$project.name)
out2excel(analysis.par$final_ms_tab,out.xlsx = out_file)

# Save analysis.par as RData, ESSENTIAL
NetBID.saveRData(analysis.par=analysis.par,step='ms-tab')

####plot_TOP30 NetBID drivers####
draw.NetBID(DA_list=analysis.par$DA,DE_list=analysis.par$DE,pdf_file =sprintf("%s/NetBID_top30.pdf",analysis.par$out.dir.PLOT),text_cex = 0.8,col_srt = 0)
101 changes: 101 additions & 0 deletions cgc/netbid.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"class": "CommandLineTool",
"cwlVersion": "v1.2",
"baseCommand": [],
"inputs": [
{
"id": "expression_set",
"type": "File",
"inputBinding": {
"prefix": "-e",
"shellQuote": false,
"position": 1
},
"label": "Gene Expression Matrix",
"doc": "comma-delimited expression matrix file with columns as samples, rows as genes."

},
{
"id": "tf",
"type": "File",
"inputBinding": {
"prefix": "-t",
"shellQuote": false,
"position": 1
},
"label": "Transcription Factor Network",
"doc": "file with each row an edge from the TF network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)"

},
{
"id": "sig",
"type": "File",
"inputBinding": {
"prefix": "-s",
"shellQuote": false,
"position": 1
},
"label": "Signaling Network",
"doc": "file with each row an edge from the SIG network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)"

},
{
"id": "metadata",
"type": "File",
"inputBinding": {
"prefix": "-m",
"shellQuote": false,
"position": 3
},
"label": "Sample Grouping File",
"doc": "comma-delimited file with two columns: sample and group."
},
{
"sbg:toolDefaultValue": "project",
"id": "project_name",
"type": "string?",
"inputBinding": {
"prefix": "-p",
"shellQuote": false,
"position": 4
}
}
],
"outputs": [
{
"id": "output",
"type": "Directory?",
"outputBinding": {
"glob": "NetBID_*",
"loadListing": "deep_listing"
}
},
{
"id": "netbid_log",
"type": "stdout",
"outputBinding": {
"glob": "$(inputs.expression_set.nameroot).log"
}
}
],
"doc": "# Description\n\nNetBID is a data-driven system biology pipeline using a data-driven network-based Bayesian inference approach to find drivers from transcriptomics, proteomics, or phosphoproteomics data. The drivers can be either transcription factors (TF) or signaling factors (SIG).\n\nNetBID2 has the following key steps to perform hidden driver analysis:\n1.\tActivity calculation of drivers based on drivers’ regulons from a pre-built or user-provided SJARACNe network;\n2.\tDiscovery of differential expressed genes and differential activated drivers;\n3.\tGeneration of the master table for drivers;\n4.\tVisualizing drivers with significance profiles and target genes.\n\n# Inputs and outputs of NetBID workflow\n## Inputs:\n*\tExpression matrix - comma-delimited expression matrix file with columns as samples, rows as genes.\n*\tMetadata file - comma-delimited file with two columns: sample and group.\n*\tSignaling (SIG) network - file with each row an edge from the SIG network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)\n*\tTranscription factor (TF) network - file with each row an edge from the TF network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)\n\n## Outputs:\n*\tExcel file with differential expressed genes and differential activated drivers\n*\tPicture file visualizing drivers with significance profiles\n\n# Common issues\n*\tThe first row and the first column of the expression matrix file must be sample names and gene names, respectively.\n*\tThe metadata file must have at least two sample groups in the 2nd column.",

"label": "netbid",
"requirements": [
{
"class": "ShellCommandRequirement"
},
{
"class": "LoadListingRequirement"
},
{
"class": "DockerRequirement",
"dockerPull": "cgc-images.sbgenomics.com/stjude/netbid:latest"
},
{
"class": "InlineJavascriptRequirement"
}
],
"stdout": "$(inputs.expression_set.nameroot).log",
"sbg:projectName": "netbid",
}

0 comments on commit 38fa69b

Please sign in to comment.