diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7cc543c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +.git/ +demo_scripts/ +docs/ +inst/ +man/ + diff --git a/.github/workflows/cgc.yml b/.github/workflows/cgc.yml new file mode 100644 index 0000000..d179032 --- /dev/null +++ b/.github/workflows/cgc.yml @@ -0,0 +1,29 @@ +name: Deploy to CGC +on: + release: + types: + - released + - edited +jobs: + cgc: + runs-on: ubuntu-20.04 + name: Deploy + steps: + - uses: actions/checkout@v2 + - name: Set the docker tag from Repo Tag + id: set_dockertag + env: + IMAGE: cgc-images.sbgenomics.com/stjude/netbid + VERSION_TAG: ${{ github.event.release.tag_name }} + run: | + jq --arg image "${{ env.IMAGE }}:${{ env.VERSION_TAG }}" '(.requirements | .[] | select(.class == ("DockerRequirement")) | .dockerPull) |= $image' cgc/netbid.cwl > cgc/netbid.cwl.new + mv cgc/netbid.cwl.new cgc/netbid.cwl + cat cgc/netbid.cwl + - id: cgcdeploy + if: ${{ !env.ACT }} + uses: jordan-rash/cgc-go@v0.1.4 + with: + file_location: cgc/netbid.cwl + shortid: stjude/netbid/netbid + env: + CGC_TOKEN: ${{ secrets.CGC_TOKEN }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..5ac9b91 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,59 @@ +name: Package +on: + release: + types: [released, edited] + +jobs: + push_to_registry: + name: Push Docker image to GitHub Packages + runs-on: ubuntu-latest + steps: + - + name: Check out the repo + uses: actions/checkout@v2 + - + name: Determine Short SHA + run: echo ${{ github.sha }} | tail -c 8 | (read; echo SHORT_SHA=$REPLY) >> $GITHUB_ENV + - + name: Sanitize Repo Name for Tagging + run: echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]' | (read; echo REPO_LOWER=$REPLY) >> $GITHUB_ENV + - + name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ secrets.GH_USERNAME }} + password: ${{ secrets.GH_TOKEN }} + - + name: Login to Cancer Genomics Cloud Container Registry + uses: docker/login-action@v1 + with: + registry: cgc-images.sbgenomics.com + username: ${{ secrets.CGC_USERNAME }} + password: ${{ secrets.CGC_TOKEN }} + - + name: Build and push + id: docker_build + uses: docker/build-push-action@v2 + with: + context: . + file: ./Dockerfile.cgc + push: true + tags: | + ghcr.io/${{ env.REPO_LOWER }}:latest + ghcr.io/${{ env.REPO_LOWER }}:${{ env.SHORT_SHA }} + ghcr.io/${{ env.REPO_LOWER }}:${{ github.event.release.tag_name }} + cgc-images.sbgenomics.com/stjude/netbid:latest + cgc-images.sbgenomics.com/stjude/netbid:${{ env.SHORT_SHA }} + cgc-images.sbgenomics.com/stjude/netbid:${{ github.event.release.tag_name }} + labels: | + org.opencontainers.image.source=https://github.com/${{ github.repository }} + - + name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.gitignore b/.gitignore index 5b6a065..7b732e7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +.DS_Store diff --git a/Dockerfile.cgc b/Dockerfile.cgc new file mode 100644 index 0000000..1417c6a --- /dev/null +++ b/Dockerfile.cgc @@ -0,0 +1,32 @@ +FROM r-base + +RUN apt-get update && apt-get install -y \ + build-essential \ + libcurl4-gnutls-dev \ + libxml2-dev \ + libssl-dev \ + libgit2-dev \ + libcairo2-dev \ + libxt-dev \ + xvfb \ + pandoc \ + xauth \ + xfonts-base \ + libssl1.1 \ + && rm -rf /var/lib/apt/lists/* + +# install R packages available in CRAN +RUN R -e 'install.packages(c("optparse", "dplyr", "devtools", "BiocManager", "plot3D"))' + +# install R packages via Bioconductor +RUN R -e 'BiocManager::install(c("biomaRt", "GSVA", "rhdf5", "ComplexHeatmap", "ConsensusClusterPlus", "DESeq2", "tximport", "impute", "limma", "GEOquery"))' + +# install NetBID2 from GitHub master branch +RUN xvfb-run R -e 'devtools::install_github("jyyulab/NetBID", ref="master", dependencies="Depends")' + +WORKDIR /data +COPY IBC_CCDI . + +ENTRYPOINT ["Rscript"] +CMD ["cloudAppNetBID.R"] +#CMD ["-q", "-e", "packageVersion(\"NetBID2\")"] diff --git a/IBC_CCDI/cloudAppNetBID.R b/IBC_CCDI/cloudAppNetBID.R new file mode 100644 index 0000000..e315db2 --- /dev/null +++ b/IBC_CCDI/cloudAppNetBID.R @@ -0,0 +1,132 @@ +################# +##Jingjing.liu@stjude.org +##2021-07-26 +################# +library(NetBID2) +library(optparse) +####input data required#### + +option_list = list( + make_option( + c("-e", "--expression-set"), + type="character", + default=NULL, + help="File containing gene expression data.", + metavar="character" + ), + make_option( + c("-t", "--tf-set"), + type="character", + default=NULL, + help="Filename of the transcription factor network from SJARACNe.", + metavar="character" + ), + make_option( + c("-s", "--sig-set"), + type="character", + default=NULL, + help="Filename of SIG network from SJARACNe", + metavar="character" + ), + make_option( + c("-m", "--metadata"), + type="character", + default=NULL, + help="Filename of metadata describing samples", + metavar="character" + ), + make_option( + c("-p", "--project"), + type="character", + default="project", + help="Output project name", + metavar="character" + ) +); + +opt_parser = OptionParser(prog = "cloudAppNetBID.R", + description = "Analyze an expression set with NetBID.", + option_list = option_list); +opt = parse_args(opt_parser); + +if(is.null(opt) || is.null(opt$`expression-set`) || is.null(opt$`tf-set`) || is.null(opt$`sig-set`) || is.null(opt$`metadata`)) { + print_help(opt_parser) + q(status=1) +} + +exp_mat_path <- opt$`expression-set` # path to expression matrix first column with unique gene name/probeID +pd_path<- opt$`metadata` # path to metadata file +network.tf_path <- opt$`tf-set` # path to TF network by SJARACNe +network.sig_path <- opt$`sig-set` # path to SIG network by SJARACNe + +####input data optional#### +outdir <- "./" # path of output directory +project_name <- opt$`project` # user define or default + +# Reload data into R workspace, and saves it locally under db/ directory with specified species name and analysis level. +db.preload(use_level='gene',use_spe='human',update=FALSE) #default use gene levle and human species + +#####step0 load data#### +exp_mat<-read.csv(exp_mat_path,row.names = 1) +pd<-read.csv(pd_path) +rownames(pd)<-pd$sampleID +cal.eset<-generate.eset(exp_mat = exp_mat,phenotype_info = pd) + +analysis.par <- NetBID.analysis.dir.create(project_main_dir=outdir, project_name=paste("NetBID_", project_name, sep=""), tf.network.file = network.tf_path, sig.network.file = network.sig_path) + +analysis.par$cal.eset <- cal.eset # add expression eset to analysis.par +NetBID.saveRData(analysis.par=analysis.par,step='exp-load') + +####step1. build network#### +# Get network information +analysis.par$tf.network <- get.SJAracne.network(network_file=analysis.par$tf.network.file) +analysis.par$sig.network <- get.SJAracne.network(network_file=analysis.par$sig.network.file) + +# Merge network first +analysis.par$merge.network <- merge_TF_SIG.network(TF_network=analysis.par$tf.network,SIG_network=analysis.par$sig.network) + +####step2. calculate activity#### +# Get activity matrix +ac_mat <- cal.Activity(target_list=analysis.par$merge.network$target_list,cal_mat=exprs(analysis.par$cal.eset),es.method='weightedmean') + +# Create eset using activity matrix +analysis.par$ac.eset <- generate.eset(exp_mat=ac_mat,phenotype_info=pData(analysis.par$cal.eset)[colnames(ac_mat),],feature_info=NULL) + +###step3. Get differential expression (DE) / differential activity (DA) for drivers#### + +# Create empty list to store comparison result +analysis.par$DE <- list() +analysis.par$DA <- list() + +# the comparison group +pd<-pData(analysis.par$cal.eset) +levels<-as.character(unique(pd$comparison)) + +g1_name<-levels[1];g0_name<-levels[2];comp_name<-sprintf("%s.Vs.%s",g1_name,g0_name) + +G1<-rownames(pd)[which(pd$comparison==g1_name)];G0<-rownames(pd)[which(pd$comparison==g0_name)] + +DE_gene_limma <- getDE.limma.2G(eset=analysis.par$cal.eset,G1=G1,G0=G0,G1_name=g1_name,G0_name=g0_name) +DA_driver_limma <- getDE.limma.2G(eset=analysis.par$ac.eset,G1=G1,G0=G0,G1_name=g1_name,G0_name=g0_name) + +# Save comparison result to list element in analysis.par, with comparison name +analysis.par$DE[[comp_name]] <- DE_gene_limma +analysis.par$DA[[comp_name]] <- DA_driver_limma + +####step4. generate master table#### +# Get all comparison names +all_comp <- names(analysis.par$DE) + +analysis.par$final_ms_tab <- generate.masterTable(use_comp=all_comp,DE=analysis.par$DE,DA=analysis.par$DA, + target_list=analysis.par$merge.network$target_list, + tf_sigs=tf_sigs,z_col='Z-statistics',display_col=c('logFC','P.Value'), + main_id_type='external_gene_name') + +out_file <- sprintf('%s/%s_ms_tab.xlsx',analysis.par$out.dir.DATA,analysis.par$project.name) +out2excel(analysis.par$final_ms_tab,out.xlsx = out_file) + +# Save analysis.par as RData, ESSENTIAL +NetBID.saveRData(analysis.par=analysis.par,step='ms-tab') + +####plot_TOP30 NetBID drivers#### +draw.NetBID(DA_list=analysis.par$DA,DE_list=analysis.par$DE,pdf_file =sprintf("%s/NetBID_top30.pdf",analysis.par$out.dir.PLOT),text_cex = 0.8,col_srt = 0) diff --git a/cgc/netbid.cwl b/cgc/netbid.cwl new file mode 100644 index 0000000..2b867e3 --- /dev/null +++ b/cgc/netbid.cwl @@ -0,0 +1,101 @@ +{ + "class": "CommandLineTool", + "cwlVersion": "v1.2", + "baseCommand": [], + "inputs": [ + { + "id": "expression_set", + "type": "File", + "inputBinding": { + "prefix": "-e", + "shellQuote": false, + "position": 1 + }, + "label": "Gene Expression Matrix", + "doc": "comma-delimited expression matrix file with columns as samples, rows as genes." + + }, + { + "id": "tf", + "type": "File", + "inputBinding": { + "prefix": "-t", + "shellQuote": false, + "position": 1 + }, + "label": "Transcription Factor Network", + "doc": "file with each row an edge from the TF network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)" + + }, + { + "id": "sig", + "type": "File", + "inputBinding": { + "prefix": "-s", + "shellQuote": false, + "position": 1 + }, + "label": "Signaling Network", + "doc": "file with each row an edge from the SIG network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)" + + }, + { + "id": "metadata", + "type": "File", + "inputBinding": { + "prefix": "-m", + "shellQuote": false, + "position": 3 + }, + "label": "Sample Grouping File", + "doc": "comma-delimited file with two columns: sample and group." + }, + { + "sbg:toolDefaultValue": "project", + "id": "project_name", + "type": "string?", + "inputBinding": { + "prefix": "-p", + "shellQuote": false, + "position": 4 + } + } + ], + "outputs": [ + { + "id": "output", + "type": "Directory?", + "outputBinding": { + "glob": "NetBID_*", + "loadListing": "deep_listing" + } + }, + { + "id": "netbid_log", + "type": "stdout", + "outputBinding": { + "glob": "$(inputs.expression_set.nameroot).log" + } + } + ], + "doc": "# Description\n\nNetBID is a data-driven system biology pipeline using a data-driven network-based Bayesian inference approach to find drivers from transcriptomics, proteomics, or phosphoproteomics data. The drivers can be either transcription factors (TF) or signaling factors (SIG).\n\nNetBID2 has the following key steps to perform hidden driver analysis:\n1.\tActivity calculation of drivers based on drivers’ regulons from a pre-built or user-provided SJARACNe network;\n2.\tDiscovery of differential expressed genes and differential activated drivers;\n3.\tGeneration of the master table for drivers;\n4.\tVisualizing drivers with significance profiles and target genes.\n\n# Inputs and outputs of NetBID workflow\n## Inputs:\n*\tExpression matrix - comma-delimited expression matrix file with columns as samples, rows as genes.\n*\tMetadata file - comma-delimited file with two columns: sample and group.\n*\tSignaling (SIG) network - file with each row an edge from the SIG network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)\n*\tTranscription factor (TF) network - file with each row an edge from the TF network constructed using SJARACNe (https://github.com/jyyulab/SJARACNe)\n\n## Outputs:\n*\tExcel file with differential expressed genes and differential activated drivers\n*\tPicture file visualizing drivers with significance profiles\n\n# Common issues\n*\tThe first row and the first column of the expression matrix file must be sample names and gene names, respectively.\n*\tThe metadata file must have at least two sample groups in the 2nd column.", + + "label": "netbid", + "requirements": [ + { + "class": "ShellCommandRequirement" + }, + { + "class": "LoadListingRequirement" + }, + { + "class": "DockerRequirement", + "dockerPull": "cgc-images.sbgenomics.com/stjude/netbid:latest" + }, + { + "class": "InlineJavascriptRequirement" + } + ], + "stdout": "$(inputs.expression_set.nameroot).log", + "sbg:projectName": "netbid", +}