diff --git a/CHANGELOG.md b/CHANGELOG.md index 42f8850..b51ad1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ ## CHARLIE development version -- Fix bug where CHARLIE was falsely throwing a file permissions error for tempdir values containing bash variables. (#118, @kelly-sovacool) +### bug fixes + +- CHARLIE was falsely throwing a file permissions error for tempdir values containing bash variables. (#118, @kelly-sovacool) +- Singularity bind paths were not being set properly. (#119, @kelly-sovacool) ## CHARLIE 0.11.0 diff --git a/charlie b/charlie index 6e324b8..ca9e827 100755 --- a/charlie +++ b/charlie @@ -220,7 +220,8 @@ sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ -e "s/HOST/${HOST}/g" \ -e "s/ADDITIVES/${ADDITIVES}/g" \ -e "s/VIRUSES/${VIRUSES}/g" \ - ${PIPELINE_HOME}/config/$PLATFORM/config.yaml > $CONFIGFILE + ${PIPELINE_HOME}/config/$PLATFORM/config.yaml |\ + cat - ${PIPELINE_HOME}/config/containers.yaml > $CONFIGFILE fi if [ ! -f $WORKDIR/nclscan.config ];then sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config @@ -293,7 +294,9 @@ function reconfig(){ # rebuild config file and replace the config.yaml in the WORKDIR # this is only for dev purposes when new key-value pairs are being added to the config file check_essential_files - sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/config/config.yaml > $WORKDIR/config.yaml + sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ + ${PIPELINE_HOME}/config/config.yaml |\ + cat - ${PIPELINE_HOME}/config/containers.yaml > $WORKDIR/config.yaml echo "$WORKDIR/config.yaml has been updated!" } @@ -355,21 +358,10 @@ function unlock() { # SET SINGULARITY BINDS ... bind required singularity folders appropriately ########################################################################################## -function set_singularity_binds(){ - # this functions tries find what folders to bind - # TODO parse config file with pyyaml to determine singularity bind paths - echo "$PIPELINE_HOME" > ${WORKDIR}/tmp1 - echo "$WORKDIR" >> ${WORKDIR}/tmp1 - grep -o '\/.*' <(cat ${WORKDIR}/config.yaml ${WORKDIR}/samples.tsv)|dos2unix|tr '\t' '\n'|grep -v ' \|\/\/'|sort|uniq >> ${WORKDIR}/tmp1 - grep gpfs ${WORKDIR}/tmp1|awk -F'/' -v OFS='/' '{print $1,$2,$3,$4,$5}'| grep "[a-zA-Z0-9]" |sort|uniq > ${WORKDIR}/tmp2 - grep -v gpfs ${WORKDIR}/tmp1|awk -F'/' -v OFS='/' '{print $1,$2,$3}'| grep "[a-zA-Z0-9]"|sort|uniq > ${WORKDIR}/tmp3 - while read a;do readlink -f $a;done < ${WORKDIR}/tmp3 | grep "[a-zA-Z0-9]"> ${WORKDIR}/tmp4 - binds=$(cat ${WORKDIR}/tmp2 ${WORKDIR}/tmp3 ${WORKDIR}/tmp4|sort|uniq |tr '\n' ',') - rm -f ${WORKDIR}/tmp? - binds=$(echo $binds|awk '{print substr($1,1,length($1)-1)}') +function set_singularity_binds() { + binds=$( $PIPELINE_HOME/workflow/scripts/set_singularity_bind_paths.py ${WORKDIR}/config.yaml ${WORKDIR}/samples.tsv) SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds" } - ########################################################################################## # PRINT SINGULARITY BINDS ... print bound singularity folders for debugging ########################################################################################## @@ -642,7 +634,7 @@ function main(){ ;; esac done - WORKDIR=$(readlink -f "$WORKDIR") + echo "Working Dir: $WORKDIR" if [[ -z "$SING_CACHE_DIR" ]]; then diff --git a/config/biowulf/config.yaml b/config/biowulf/config.yaml index da1eb19..c1dfa91 100644 --- a/config/biowulf/config.yaml +++ b/config/biowulf/config.yaml @@ -4,7 +4,7 @@ workdir: "WORKDIR" # temporary directory for intermediate files that are not saved -tempdir: '/lscratch/$SLURM_JOB_ID' +tempdir: "/lscratch/$SLURM_JOB_ID" # tab delimited samples file ... should have the following 3 columns # sampleName path_to_R1_fastq path_to_R2_fastq @@ -112,20 +112,3 @@ fastas_gtfs_dir: "/data/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs" annotation_lookups: hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" - -containers: - base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6" - bowtie1: "docker://nciccbr/charlie_bowtie1:v0.1.0" - circexplorer: "docker://nciccbr/ccbr_circexplorer:v1.0" - circRNA_finder: "docker://nciccbr/charlie_circrna_finder:v1" - ciri: "docker://nciccbr/charlie_ciri2:v1" - clear: "docker://nciccbr/ccbr_clear:2" - cutadapt: "docker://nciccbr/charlie_cutadapt_fqfilter:v1" - dcc: "docker://nciccbr/charlie_dcc:v0.2.0" - fastqc: "docker://nciccbr/ccrgb_qctools:v4.0" - mapsplice: "docker://cgrlab/mapsplice2:latest" - multiqc: "docker://nciccbr/ccbr_multiqc_1.15:v1" - picard: "docker://nciccbr/ccbr_picard_2.27.5:v1" - R: "docker://nciccbr/ccbr_r_4.3.0:v1" - star: "docker://nciccbr/ccbr_star_2.7.6a:latest" - star_ucsc_cufflinks: "docker://nciccbr/charlie_star_ucsc_cufflinks:v0.4.0" diff --git a/config/containers.yaml b/config/containers.yaml new file mode 100644 index 0000000..baca596 --- /dev/null +++ b/config/containers.yaml @@ -0,0 +1,16 @@ +containers: + base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6" + bowtie1: "docker://nciccbr/charlie_bowtie1:v0.1.1" + circexplorer: "docker://nciccbr/ccbr_circexplorer:v1.0" + circRNA_finder: "docker://nciccbr/charlie_circrna_finder:v1.0.1" + ciri: "docker://nciccbr/charlie_ciri2:v1.0.1" + clear: "docker://nciccbr/ccbr_clear:v2.0.1" + cutadapt: "docker://nciccbr/charlie_cutadapt_fqfilter:v1.0.1" + dcc: "docker://nciccbr/charlie_dcc:v0.2.1" + fastqc: "docker://nciccbr/ccrgb_qctools:v4.0" + mapsplice: "docker://cgrlab/mapsplice2:latest" + multiqc: "docker://nciccbr/ccbr_multiqc_1.15:v1" + picard: "docker://nciccbr/ccbr_picard_2.27.5:v1" + R: "docker://nciccbr/ccbr_r_4.3.0:v1" + star: "docker://nciccbr/ccbr_star_2.7.6a:latest" + star_ucsc_cufflinks: "docker://nciccbr/charlie_star_ucsc_cufflinks:v0.4.1" diff --git a/config/fnlcr/config.yaml b/config/fnlcr/config.yaml index de47366..86e0f40 100644 --- a/config/fnlcr/config.yaml +++ b/config/fnlcr/config.yaml @@ -112,20 +112,3 @@ fastas_gtfs_dir: "/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs" annotation_lookups: hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt" mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt" - -containers: - base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6" - bowtie1: "docker://nciccbr/charlie_bowtie1:v0.1.0" - circexplorer: "docker://nciccbr/ccbr_circexplorer:v1.0" - circRNA_finder: "docker://nciccbr/charlie_circrna_finder:v1" - ciri: "docker://nciccbr/charlie_ciri2:v1" - clear: "docker://nciccbr/ccbr_clear:2" - cutadapt: "docker://nciccbr/charlie_cutadapt_fqfilter:v1" - dcc: "docker://nciccbr/charlie_dcc:v0.2.0" - fastqc: "docker://nciccbr/ccrgb_qctools:v4.0" - mapsplice: "docker://cgrlab/mapsplice2:latest" - multiqc: "docker://nciccbr/ccbr_multiqc_1.15:v1" - picard: "docker://nciccbr/ccbr_picard_2.27.5:v1" - R: "docker://nciccbr/ccbr_r_4.3.0:v1" - star: "docker://nciccbr/ccbr_star_2.7.6a:latest" - star_ucsc_cufflinks: "docker://nciccbr/charlie_star_ucsc_cufflinks:v0.4.0" diff --git a/docker/bowtie1/Dockerfile b/docker/bowtie1/Dockerfile index 6386538..3cea9ee 100644 --- a/docker/bowtie1/Dockerfile +++ b/docker/bowtie1/Dockerfile @@ -12,6 +12,8 @@ ENV REPONAME=${REPONAME} COPY environment.txt /data2/ RUN mamba install -c conda-forge -c bioconda --file /data2/environment.txt ENV R_LIBS_USER=/opt2/conda/lib/R/library/ +ENV PATH="/opt2/conda/bin/:$PATH" +ENV PYTHONPATH="" # Save Dockerfile in the docker COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} diff --git a/docker/bowtie1/meta.yml b/docker/bowtie1/meta.yml index 0baf464..0ad8002 100644 --- a/docker/bowtie1/meta.yml +++ b/docker/bowtie1/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_bowtie1 -version: v0.1.0 +version: v0.1.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/circRNA_finder/Dockerfile b/docker/circRNA_finder/Dockerfile index 0262596..97014f2 100644 --- a/docker/circRNA_finder/Dockerfile +++ b/docker/circRNA_finder/Dockerfile @@ -12,6 +12,8 @@ ENV REPONAME=${REPONAME} COPY environment.txt /data2/ RUN mamba install -c conda-forge -c bioconda --file /data2/environment.txt ENV R_LIBS_USER=/opt2/conda/lib/R/library/ +ENV PATH="/opt2/conda/bin/:$PATH" +ENV PYTHONPATH="" # install circRNA_finder WORKDIR /opt2 diff --git a/docker/circRNA_finder/meta.yml b/docker/circRNA_finder/meta.yml index ae5bcb3..2810384 100644 --- a/docker/circRNA_finder/meta.yml +++ b/docker/circRNA_finder/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_circrna_finder -version: v1 +version: v1.0.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/ciri2/Dockerfile b/docker/ciri2/Dockerfile index c8ed18e..982884c 100644 --- a/docker/ciri2/Dockerfile +++ b/docker/ciri2/Dockerfile @@ -13,6 +13,7 @@ WORKDIR /opt2 RUN wget -O CIRI.zip https://sourceforge.net/projects/ciri/files/CIRI2/CIRI_v2.0.6.zip/download && \ unzip CIRI.zip ENV PATH="/opt2/CIRI2_v2.0.6/:$PATH" +ENV PYTHONPATH="/usr/local/lib/python3.8/dist-packages/" # Save Dockerfile in the docker COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG} diff --git a/docker/ciri2/meta.yml b/docker/ciri2/meta.yml index 65fe3ed..7996d83 100644 --- a/docker/ciri2/meta.yml +++ b/docker/ciri2/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_ciri2 -version: v1 +version: v1.0.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/clear/Dockerfile b/docker/clear/Dockerfile index 9f53a10..1257399 100755 --- a/docker/clear/Dockerfile +++ b/docker/clear/Dockerfile @@ -11,6 +11,7 @@ ENV CONDA_ENV=clear RUN mamba env create -n ${CONDA_ENV} -f /data2/environment.yml && \ echo "conda activate ${CONDA_ENV}" > ~/.bashrc ENV PATH="/opt2/conda/envs/${CONDA_ENV}/bin:$PATH" +ENV PYTHONPATH="/opt2/conda/envs/${CONDA_ENV}/lib/${CONDA_ENV}/site-packages/" RUN python -m pip install git+https://github.com/YangLab/CLEAR.git RUN which circ_quant && circ_quant -h diff --git a/docker/clear/meta.yml b/docker/clear/meta.yml index b04cd5d..d4c03a7 100644 --- a/docker/clear/meta.yml +++ b/docker/clear/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: ccbr_clear -version: 2 +version: v2.0.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/cutadapt_fqfilter/Dockerfile b/docker/cutadapt_fqfilter/Dockerfile index de3475e..23426c1 100644 --- a/docker/cutadapt_fqfilter/Dockerfile +++ b/docker/cutadapt_fqfilter/Dockerfile @@ -14,6 +14,7 @@ ENV CONDA_ENV=cutadapt RUN mamba env create -n ${CONDA_ENV} -f /data2/environment.yml && \ echo "conda activate ${CONDA_ENV}" > ~/.bashrc ENV PATH="/opt2/conda/envs/${CONDA_ENV}/bin:$PATH" +ENV PYTHONPATH="/opt2/conda/envs/${CONDA_ENV}/lib/${CONDA_ENV}/site-packages/" ENV R_LIBS_USER=/opt2/conda/lib/R/library/ # Save Dockerfile in the docker diff --git a/docker/cutadapt_fqfilter/meta.yml b/docker/cutadapt_fqfilter/meta.yml index ed3a6b6..3396e67 100644 --- a/docker/cutadapt_fqfilter/meta.yml +++ b/docker/cutadapt_fqfilter/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_cutadapt_fqfilter -version: v1 +version: v1.0.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/dcc/Dockerfile b/docker/dcc/Dockerfile index 6338f48..0f9d30c 100644 --- a/docker/dcc/Dockerfile +++ b/docker/dcc/Dockerfile @@ -14,7 +14,9 @@ WORKDIR /opt2 RUN wget https://github.com/dieterich-lab/DCC/archive/${DCC_VERSION}.zip -O dcc.zip && \ unzip dcc.zip && \ cd DCC-${DCC_VERSION} && \ + pip install numpy==1.21.0 && \ python setup.py install +ENV PYTHONPATH="/usr/local/lib/python3.8/dist-packages/" RUN which DCC && DCC -h # Save Dockerfile in the docker diff --git a/docker/dcc/meta.yml b/docker/dcc/meta.yml index a3a1393..5caca40 100644 --- a/docker/dcc/meta.yml +++ b/docker/dcc/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_dcc -version: v0.2.0 +version: v0.2.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/docker/star_ucsc_cufflinks/Dockerfile b/docker/star_ucsc_cufflinks/Dockerfile index 39ef4e3..b81aa44 100644 --- a/docker/star_ucsc_cufflinks/Dockerfile +++ b/docker/star_ucsc_cufflinks/Dockerfile @@ -14,6 +14,7 @@ ENV CONDA_ENV=py3.6 RUN mamba env create -n ${CONDA_ENV} -f /data2/environment.yml && \ echo "conda activate ${CONDA_ENV}" > ~/.bashrc ENV PATH="/opt2/conda/envs/${CONDA_ENV}/bin:$PATH" +ENV PYTHONPATH="/opt2/conda/envs/${CONDA_ENV}/lib/${CONDA_ENV}/site-packages/" ENV R_LIBS_USER=/opt2/conda/lib/R/library/ # install find_circ diff --git a/docker/star_ucsc_cufflinks/meta.yml b/docker/star_ucsc_cufflinks/meta.yml index 12f9f38..084bc08 100644 --- a/docker/star_ucsc_cufflinks/meta.yml +++ b/docker/star_ucsc_cufflinks/meta.yml @@ -1,4 +1,4 @@ dockerhub_namespace: nciccbr image_name: charlie_star_ucsc_cufflinks -version: v0.4.0 +version: v0.4.1 container: "$(dockerhub_namespace)/$(image_name):$(version)" diff --git a/workflow/Snakefile b/workflow/Snakefile index 04bb067..22a9a50 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -305,7 +305,7 @@ include: "rules/post_findcircrna_processing.smk" on_finish_cmd = f""" sleep 10 -mkdir -p {WORKDIR}/log && run_jobby_on_snakemake_log {WORKDIR}/snakemake.log | tee {WORKDIR}/log/snakemake.log.jobby | cut -f2,3,18 > {WORKDIR}/log/snakemake.log.jobby.short +mkdir -p {WORKDIR}/logs/ && run_jobby_on_snakemake_log {WORKDIR}/snakemake.log | tee {WORKDIR}/logs/snakemake.log.jobby | cut -f2,3,18 > {WORKDIR}/logs/snakemake.log.jobby.short if command -v spooker &> /dev/null; then spooker {WORKDIR} CHARLIE else diff --git a/workflow/rules/align.smk b/workflow/rules/align.smk index 47f06cf..1b408a6 100644 --- a/workflow/rules/align.smk +++ b/workflow/rules/align.smk @@ -244,7 +244,7 @@ rule merge_SJ_tabs: """ set -exo pipefail cat {input} | \\ -python {params.script1} \\ +python -E {params.script1} \\ --regions {params.regions} \\ --filter1regions {params.filter1regions} \\ --filter1_noncanonical {params.filter1_noncanonical} \\ diff --git a/workflow/rules/create_index.smk b/workflow/rules/create_index.smk index 419c036..949d6af 100644 --- a/workflow/rules/create_index.smk +++ b/workflow/rules/create_index.smk @@ -33,13 +33,13 @@ samtools faidx {params.reffa} && \\ # bwa index -p ref {params.reffa} > bwa_index.log ... created in a separate rule # NCLscan files -python {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf} +python -E {params.script3} --ingtf {params.refgtf} --outgtf {output.fixed_gtf} gffread -w {output.transcripts_fa} -g {params.reffa} {output.fixed_gtf} touch {output.lncRNA_transcripts_fa} create_reference.py -c {params.nclscan_config} gtfToGenePred -ignoreGroupsWithoutExons {output.fixed_gtf} ref.genes.genepred && \\ - python {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid} + python -E {params.script1} {output.fixed_gtf} ref.genes.genepred > {output.genepred_w_geneid} stardir=$(dirname {output.sa}) mkdir -p $stardir && \\ @@ -125,4 +125,4 @@ set -exo pipefail refdir=$(dirname {params.reffa}) cd $refdir bowtie-build {params.reffa} ref -""" +""" diff --git a/workflow/rules/findcircrna.smk b/workflow/rules/findcircrna.smk index 7edec8d..84712fe 100644 --- a/workflow/rules/findcircrna.smk +++ b/workflow/rules/findcircrna.smk @@ -141,7 +141,7 @@ def get_per_sample_files_to_merge(wildcards): # 2. parse the back_spliced_junction BED from above along with known splicing annotations to CircExplorer2 'parse' to create # a. circularRNA_known.txt ... circRNAs around known gene exons # b. low_conf_circularRNA_known.txt .... circRNAs with low confidence -# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python script +# 3. parse back_spliced_junction BED along with circularRNA_known.txt and low_conf_circularRNA_known.txt to custom python -E script # to create an aggregated list of BSJs with following columns: # | # | ColName | # |---|-------------| @@ -305,7 +305,7 @@ perl {params.ciripl} \\ # samtools view -@{threads} -T {params.reffa} -CS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O CRAM -o {output.ciribam} - samtools view -@{threads} -bS {params.sample}.bwa.sam | samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.ciribam} - rm -rf {params.sample}.bwa.sam -python {params.script} \\ +python -E {params.script} \\ --ciriout {output.ciriout} \\ --back_spliced_min_reads {params.bsj_min_nreads} \\ --host "{params.host}" \\ @@ -411,7 +411,7 @@ rule create_ciri_count_matrix: """ set -exo pipefail cd {params.outdir} -python {params.script} {params.lookup} {params.hostID} +python -E {params.script} {params.lookup} {params.hostID} """ @@ -441,8 +441,8 @@ rule create_circexplorer_count_matrix: shell: """ cd {params.outdir} -python {params.script} {params.lookup} {params.hostID} -python {params.script2} {params.lookup} {params.hostID} +python -E {params.script} {params.lookup} {params.hostID} +python -E {params.script2} {params.lookup} {params.hostID} """ @@ -563,7 +563,7 @@ set -exo pipefail find {params.cleardir} -maxdepth 1 -type d -name "quant.txt*" -exec rm -rf {{}} \; if [[ "$(cat {input.quantfile} | wc -l)" != "0" ]] then -python {params.script} {params.lookup} {input.quantfile} {params.hostID} +python -E {params.script} {params.lookup} {input.quantfile} {params.hostID} else touch {output.annotatedquantfile} fi @@ -722,10 +722,10 @@ ls -alrth {params.tmpdir} paste {output.cr} {output.linear} | cut -f1-5,9 > {params.tmpdir}/CircRNALinearCount -python {params.script} \\ +python -E {params.script} \\ --CircCoordinates {output.cc} --CircRNALinearCount {params.tmpdir}/CircRNALinearCount -o {output.ct} -python {params.script2} \\ +python -E {params.script2} \\ --in_dcc_counts_table {output.ct} \\ --out_dcc_filtered_counts_table {output.ctf} \\ --back_spliced_min_reads {params.bsj_min_nreads} \\ @@ -843,7 +843,7 @@ R2fn=$(basename {input.R2}) zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} zcat {input.R2} > {params.tmpdir}/${{R2fn%.*}} -python $MSHOME/mapsplice.py \\ +python -E $MSHOME/mapsplice.py \\ -1 {params.tmpdir}/${{R1fn%.*}} \\ -2 {params.tmpdir}/${{R2fn%.*}} \\ -c {params.separate_fastas} \\ @@ -862,7 +862,7 @@ else R1fn=$(basename {input.R1}) zcat {input.R1} > {params.tmpdir}/${{R1fn%.*}} -python $MSHOME/mapsplice.py \ +python -E $MSHOME/mapsplice.py \ -1 {params.tmpdir}/${{R1fn%.*}} \ -c {params.separate_fastas} \ -p {threads} \ @@ -934,7 +934,7 @@ rule mapsplice_postprocess: """ set -exo pipefail mkdir -p {params.tmpdir} -python {params.script} \\ +python -E {params.script} \\ --circularRNAstxt {input.circRNAs} \\ -o {output.ct} \\ -fo {output.ctf} \\ @@ -1027,7 +1027,7 @@ results_bn=$(basename {output.result}) if [ "{params.peorse}" == "PE" ];then NCLscan.py -c {params.nclscan_config} -pj {params.sample} -o {params.tmpdir} --fq1 {input.R1} --fq2 {input.R2} rsync -az --progress {params.tmpdir}/${{results_bn}} {output.result} -python {params.script} \\ +python -E {params.script} \\ --result {output.result} \\ -o {output.ct} \\ -fo {output.ctf} \\ @@ -1163,7 +1163,7 @@ rule find_circ: shell: """ set -exo pipefail -python --version +python -E --version which python mkdir -p {params.tmpdir} cd {params.tmpdir} @@ -1223,7 +1223,7 @@ grep CIRCULAR {params.tmpdir}/{params.sample}.splice_sites.bed | \\ > {output.find_circ_bsj_bed} echo -ne "chrom\\tstart\\tend\\tname\\tn_reads\\tstrand\\tn_uniq\\tuniq_bridges\\tbest_qual_left\\tbest_qual_right\\ttissues\\ttiss_counts\\tedits\\tanchor_overlap\\tbreakpoints\\tsignal\\tstrandmatch\\tcategory\\n" > {output.find_circ_bsj_bed_filtered} -cat {output.find_circ_bsj_bed} | python {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\ +cat {output.find_circ_bsj_bed} | python -E {params.collapse_script} | awk -F"\\t" -v m={params.min_reads} -v OFS="\\t" '{{if ($5>=m) {{print}}}}' \\ >> {output.find_circ_bsj_bed_filtered} """ @@ -1332,7 +1332,7 @@ for f in {input};do fi done -python {params.script} \\ +python -E {params.script} \\ --counttablelist $infiles \\ -o {output.matrix} \\ --minreads {params.bsj_min_nreads} diff --git a/workflow/rules/post_findcircrna_processing.smk b/workflow/rules/post_findcircrna_processing.smk index 192cc23..05fe341 100644 --- a/workflow/rules/post_findcircrna_processing.smk +++ b/workflow/rules/post_findcircrna_processing.smk @@ -7,7 +7,7 @@ def get_alignment_stats_input(wildcards): d['star2bam']=join(WORKDIR,"results",sample,"STAR2p",sample+"_p2.bam") d['star2bam_chimeric']=join(WORKDIR,"results",sample,"STAR2p",sample+"_p2.chimeric.bam") d['star2bam_non_chimeric']=join(WORKDIR,"results",sample,"STAR2p",sample+"_p2.non_chimeric.bam") - d['filtered_bam']=join(WORKDIR,"results",sample,"circExplorer",sample+".bam") + d['filtered_bam']=join(WORKDIR,"results",sample,"circExplorer",sample+".bam") d['linearbam']=join(WORKDIR,"results",sample,"circExplorer",sample+".linear.bam") d['splicedbam']=join(WORKDIR,"results",sample,"circExplorer",sample+".spliced.bam") d['BSJbam']=join(WORKDIR,"results",sample,"circExplorer",sample+".BSJ.bam") @@ -108,21 +108,21 @@ python3 {params.scriptse} \\ fi -samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.plusBSJbam} {params.tmpdir}/{params.sample}.BSJ.plus.bam -samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.minusBSJbam} {params.tmpdir}/{params.sample}.BSJ.minus.bam +samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.plusBSJbam} {params.tmpdir}/{params.sample}.BSJ.plus.bam +samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.minusBSJbam} {params.tmpdir}/{params.sample}.BSJ.minus.bam samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o {output.BSJbam} {params.tmpdir}/{params.sample}.BSJ.bam -for b in {output.plusBSJbam} {output.minusBSJbam} {output.BSJbam} +for b in {output.plusBSJbam} {output.minusBSJbam} {output.BSJbam} # for b in {output.plusBSJbam} {output.minusBSJbam} do bash {params.bam2bwscript} $b {params.tmpdir} done -for i in $(echo {params.host}|tr ',' ' ');do +for i in $(echo {params.host}|tr ',' ' ');do samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir}/{params.sample}.${{i}}.BSJ.bam bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir} done -for i in $(echo {params.viruses}|tr ',' ' ');do +for i in $(echo {params.viruses}|tr ',' ' ');do samtools sort -l 9 -T {params.tmpdir} --write-index -@{threads} -O BAM -o ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir}/{params.sample}.${{i}}.BSJ.bam bash {params.bam2bwscript} ${{outdir}}/{params.sample}.${{i}}.BSJ.bam {params.tmpdir} done @@ -253,9 +253,6 @@ rm -rf {params.tmpdir} # linear_spliced_BSJ_reads_same_strand # linear_BSJ_reads_opposite_strand # linear_spliced_BSJ_reads_opposite_strand -localrules: - create_circExplorer_merged_found_counts_table, - rule create_circExplorer_merged_found_counts_table: input: @@ -404,10 +401,6 @@ else: """ -localrules: - merge_alignment_stats, - - rule merge_alignment_stats: input: expand( @@ -432,7 +425,7 @@ for f in {input};do paste {output} {params.tmpdir}/${{count}} > {params.tmpdir}/${{count}}.tmp mv {params.tmpdir}/${{count}}.tmp {output} fi -done +done """ diff --git a/workflow/scripts/Create_circExplorer_BSJ_count_matrix.py b/workflow/scripts/Create_circExplorer_BSJ_count_matrix.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/Create_circExplorer_count_matrix.py b/workflow/scripts/Create_circExplorer_count_matrix.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/Create_ciri_count_matrix.py b/workflow/scripts/Create_ciri_count_matrix.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_add_geneid2genepred.py b/workflow/scripts/_add_geneid2genepred.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_append_splice_site_flanks_to_BSJs.py b/workflow/scripts/_append_splice_site_flanks_to_BSJs.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_bam_filter_BSJ_for_HQonly.py b/workflow/scripts/_bam_filter_BSJ_for_HQonly.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_bam_get_alignment_stats.py b/workflow/scripts/_bam_get_alignment_stats.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_bamtobed2readendsbed.py b/workflow/scripts/_bamtobed2readendsbed.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_bedintersect_to_rid2jid.py b/workflow/scripts/_bedintersect_to_rid2jid.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_bedpe2bed.py b/workflow/scripts/_bedpe2bed.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_circExplorer_BSJ_get_strand.py b/workflow/scripts/_circExplorer_BSJ_get_strand.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_circExplorer_BSJ_get_strand.sh b/workflow/scripts/_circExplorer_BSJ_get_strand.sh old mode 100644 new mode 100755 diff --git a/workflow/scripts/_collapse_find_circ.py b/workflow/scripts/_collapse_find_circ.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_compare_lists.py b/workflow/scripts/_compare_lists.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_create_circExplorer_BSJ_bam_pe.py b/workflow/scripts/_create_circExplorer_BSJ_bam_pe.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_create_circExplorer_BSJ_bam_se.py b/workflow/scripts/_create_circExplorer_BSJ_bam_se.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_create_circExplorer_BSJ_hqonly_pe.py b/workflow/scripts/_create_circExplorer_BSJ_hqonly_pe.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_create_circExplorer_linear_bam.v2.sh b/workflow/scripts/_create_circExplorer_linear_bam.v2.sh old mode 100644 new mode 100755 index f415ad9..cf44b03 --- a/workflow/scripts/_create_circExplorer_linear_bam.v2.sh +++ b/workflow/scripts/_create_circExplorer_linear_bam.v2.sh @@ -78,10 +78,10 @@ function printtime() { start0=$2 start=$3 msg=$4 - end=$(date +%s.%N) - runtime0=$(python -c "print(${end} - ${start0})") + end=$(date +%s.%N) + runtime0=$(python -E -c "print(${end} - ${start0})") runtime0=${runtime0%.*} - runtime=$(python -c "print(${end} - ${start})") + runtime=$(python -E -c "print(${end} - ${start})") runtime=${runtime%.*} echo "$scriptname | $runtime0 | $runtime | $msg" } @@ -156,7 +156,7 @@ start=$(date +%s.%N) bedtools bamtobed -split -i $filtered_bam > ${tmpdir}/${sample_name}.bed -python ${SCRIPT_DIR}/_process_bamtobed.py \ +python -E ${SCRIPT_DIR}/_process_bamtobed.py \ --inbed ${tmpdir}/${sample_name}.bed \ --outbed ${tmpdir}/${sample_name}.readends.bed \ --linear ${tmpdir}/${sample_name}.linear.readids.gz \ diff --git a/workflow/scripts/_extract_circExplorer_linear_reads.py b/workflow/scripts/_extract_circExplorer_linear_reads.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_filter_linear_spliced_readids_w_rid2jid.py b/workflow/scripts/_filter_linear_spliced_readids_w_rid2jid.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_make_master_counts_table.py b/workflow/scripts/_make_master_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_make_merge_per_sample_sh.py b/workflow/scripts/_make_merge_per_sample_sh.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_merge_circExplorer_found_counts.py b/workflow/scripts/_merge_circExplorer_found_counts.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_merge_per_sample_counts_table.py b/workflow/scripts/_merge_per_sample_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_multifasta2separatefastas.sh b/workflow/scripts/_multifasta2separatefastas.sh old mode 100644 new mode 100755 diff --git a/workflow/scripts/_process_bamtobed.py b/workflow/scripts/_process_bamtobed.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/_run_circExplorer_bwa.sh b/workflow/scripts/_run_circExplorer_bwa.sh old mode 100644 new mode 100755 index 1234ff3..4f7951e --- a/workflow/scripts/_run_circExplorer_bwa.sh +++ b/workflow/scripts/_run_circExplorer_bwa.sh @@ -63,12 +63,12 @@ cat back_spliced_junction.filter1.bed|tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREA # 1. both chromosomes are the same # 2. both strands are the same # 3. both coordinates are NOT the same -# as the junction file will be empy for BWA .. this strategy needs to be redone! +# as the junction file will be empty for BWA .. this strategy needs to be redone! # awk '$1==$4' junction |awk '$3==$6' | awk '$2!=$5' > junction.filter1 -# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done +# use junctions file to get the true strand (not + as reported in back_spliced_junction.bed) ... this is done # using _circExplorer_BSJ_get_strand.sh ... and replace it to create new BSJ BED -# while read seq s e score name ostrand;do +# while read seq s e score name ostrand;do # strand=$(bash ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.sh $seq $s $e junction.filter1) # echo -ne "$seq\t$s\t$e\t$score\t.\t$strand\n" # done < back_spliced_junction.filter2.bed > back_spliced_junction.filter2.strand_fixed.bed @@ -89,7 +89,7 @@ cp low_conf_circRNA_known.txt $LOWCONF cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF -python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ +python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ --back_spliced_bed $STRANDFIXEDBSJBED \ --back_spliced_min_reads $MINREADS \ --circularRNA_known $FILTEREDKNOWNTXT \ diff --git a/workflow/scripts/_run_circExplorer_star.sh b/workflow/scripts/_run_circExplorer_star.sh old mode 100644 new mode 100755 index cd4ac9f..0ecd1fb --- a/workflow/scripts/_run_circExplorer_star.sh +++ b/workflow/scripts/_run_circExplorer_star.sh @@ -56,7 +56,7 @@ CIRCexplorer2 parse -t STAR junction > $PARSELOG 2>&1 # copy back original back_spliced BED file cp back_spliced_junction.bed $ORIGINALBSJBED -python ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed +python -E ${SCRIPTDIR}/_circExplorer_BSJ_get_strand.py ${JUNCTIONFILE} back_spliced_junction.bed ${MINREADS} > back_spliced_junction.strand_fixed.bed # copy back strand_fixed BSJ BED cp back_spliced_junction.strand_fixed.bed $STRANDFIXEDBSJBED @@ -72,7 +72,7 @@ cp low_conf_circRNA_known.txt $LOWCONF cat $KNOWNTXT |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDKNOWNTXT cat $LOWCONF |tr '/' '\t'|cut -f1-3,5- |awk -v m=$MINREADS '$4>=m' > $FILTEREDLOWCONF -python ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ +python -E ${SCRIPTDIR}/circExplorer_get_annotated_counts_per_sample.py \ --back_spliced_bed $STRANDFIXEDBSJBED \ --back_spliced_min_reads $MINREADS \ --circularRNA_known $FILTEREDKNOWNTXT \ diff --git a/workflow/scripts/annotate_clear_quant.py b/workflow/scripts/annotate_clear_quant.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/apply_junction_filters.py b/workflow/scripts/apply_junction_filters.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/bam_get_max_readlen.py b/workflow/scripts/bam_get_max_readlen.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/bam_split_by_regions.py b/workflow/scripts/bam_split_by_regions.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/bam_to_bigwig.sh b/workflow/scripts/bam_to_bigwig.sh old mode 100644 new mode 100755 diff --git a/workflow/scripts/circExplorer_get_annotated_counts_per_sample.py b/workflow/scripts/circExplorer_get_annotated_counts_per_sample.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/create_circExplorer_linear_bam.py b/workflow/scripts/create_circExplorer_linear_bam.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/create_circExplorer_per_sample_counts_table.py b/workflow/scripts/create_circExplorer_per_sample_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/create_dcc_per_sample_counts_table.py b/workflow/scripts/create_dcc_per_sample_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/create_mapsplice_per_sample_counts_table.py b/workflow/scripts/create_mapsplice_per_sample_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/create_nclscan_per_sample_counts_table.py b/workflow/scripts/create_nclscan_per_sample_counts_table.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_bam.py b/workflow/scripts/filter_bam.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_bam_by_readids.py b/workflow/scripts/filter_bam_by_readids.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_bam_for_BSJs.py b/workflow/scripts/filter_bam_for_BSJs.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_bam_for_linear_reads.py b/workflow/scripts/filter_bam_for_linear_reads.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_bam_for_splice_reads.py b/workflow/scripts/filter_bam_for_splice_reads.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_dcc.py b/workflow/scripts/filter_dcc.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_junction.py b/workflow/scripts/filter_junction.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/filter_junction_human.py b/workflow/scripts/filter_junction_human.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/fix_gtfs.py b/workflow/scripts/fix_gtfs.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/fix_refseq_gtf.py b/workflow/scripts/fix_refseq_gtf.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/gather_cluster_stats.sh b/workflow/scripts/gather_cluster_stats.sh old mode 100644 new mode 100755 diff --git a/workflow/scripts/get_index_rl.py b/workflow/scripts/get_index_rl.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/junctions2readids.py b/workflow/scripts/junctions2readids.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/make_star_index.sh b/workflow/scripts/make_star_index.sh old mode 100644 new mode 100755 diff --git a/workflow/scripts/merge_ReadsPerGene_counts.R b/workflow/scripts/merge_ReadsPerGene_counts.R old mode 100644 new mode 100755 diff --git a/workflow/scripts/merge_counts_tables_2_counts_matrix.py b/workflow/scripts/merge_counts_tables_2_counts_matrix.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/reformat_hg38_2_hg19.py b/workflow/scripts/reformat_hg38_2_hg19.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/set_singularity_bind_paths.py b/workflow/scripts/set_singularity_bind_paths.py new file mode 100755 index 0000000..47d70ba --- /dev/null +++ b/workflow/scripts/set_singularity_bind_paths.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +""" +set_singularity_bind_paths.py +""" +import os +import pathlib +import sys +import yaml + + +def print_bind_paths(config_filename, samples_filename): + bind_paths = resolve_additional_bind_paths( + get_paths(config_filename, samples_filename) + ) + print(",".join(bind_paths)) + + +def get_paths(config_filename, samples_filename): + paths = set() + with open(config_filename, "r") as config_file: + config = yaml.safe_load(config_file) + conf_keys = [ + "workdir", + "samples", + "nclscan_config", + "scriptsdir", + "resourcesdir", + "cluster", + "adapters", + "fastas_gtfs_dir", + ] + # exclude tempdir from bind paths + for key in conf_keys: + if key in config: + paths.add(config[key]) + + with open(samples_filename, "r") as samples_file: + next(samples_file) # skip header + for line in samples_file: + line_spl = line.split("\t") + if len(line_spl) > 1: + paths.add(line_spl[1]) + if len(line_spl) > 2: + paths.add(line_spl[2]) + + return paths + + +def resolve_additional_bind_paths(search_paths): + """Adapted from RENEE + + Finds additional singularity bind paths from a list of random paths. Paths are + indexed with a compostite key containing the first two directories of an absolute + file path to avoid issues related to shared names across the /gpfs shared network + filesystem. For each indexed list of file paths, a common path is found. Assumes + that the paths provided are absolute paths, the renee build sub command creates + resource file index with absolute filenames. + @param search_paths list[]: + List of absolute file paths to find common bind paths from + @return common_paths list[]: + Returns a list of common shared file paths to create additional singularity bind paths + """ + common_paths = [] + indexed_paths = {} + + for ref in search_paths: + # Skip over resources with remote URI and + # skip over strings that are not file PATHS as + # RENEE build creates absolute resource PATHS + if ( + ref.lower().startswith("sftp://") + or ref.lower().startswith("s3://") + or ref.lower().startswith("gs://") + or not ref.lower().startswith(os.sep) + ): + continue + + # Break up path into directory tokens + for r in [ + ref, + str(pathlib.Path(ref).resolve()), + ]: # taking care of paths which are symlinks! + path_list = os.path.abspath(r).split(os.sep) + + try: # Create composite index from first two directories + # Avoids issues created by shared /gpfs/ PATHS + index = path_list[1:3] + index = tuple(index) + except IndexError: + index = path_list[1] # ref startswith / + if index not in indexed_paths: + indexed_paths[index] = [] + # Create an INDEX to find common PATHS for each root child directory + # like /scratch or /data. This prevents issues when trying to find the + # common path between these two different directories (resolves to /) + indexed_paths[index].append(str(os.sep).join(path_list)) + + for index, paths in indexed_paths.items(): + # Find common paths for each path index + common_paths.append(os.path.dirname(os.path.commonprefix(paths))) + + return sorted(set(common_paths)) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print(__doc__) + raise Exception("Please provide two arguments") + config_filename = sys.argv[1] + samples_filename = sys.argv[2] + print_bind_paths(config_filename, samples_filename) diff --git a/workflow/scripts/transcript2gene.py b/workflow/scripts/transcript2gene.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/validate_BSJ_reads_and_split_BSJ_bam_by_strand.py b/workflow/scripts/validate_BSJ_reads_and_split_BSJ_bam_by_strand.py old mode 100644 new mode 100755 diff --git a/workflow/scripts/venn.R b/workflow/scripts/venn.R old mode 100644 new mode 100755