Skip to content

Commit

Permalink
#36 Added and tested all annotation steps in nextflow script
Browse files Browse the repository at this point in the history
  • Loading branch information
Negin Valizadegan committed Nov 3, 2021
1 parent d9d2cde commit b189426
Showing 1 changed file with 272 additions and 1 deletion.
273 changes: 272 additions & 1 deletion filter.nf
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ process filter_seqkit {
tuple val(id), file(fasta) from fasta_Ch1

output:
tuple val(id), file('*.filtered.fasta') into filtered_file
tuple val(id), file('*.filtered.fasta') into filtered_file,filtered_file1,filtered_file2,filtered_file3,filtered_file4,filtered_file5,filtered_file6
file "${id}.filter-stats.txt"

script:
Expand Down Expand Up @@ -148,3 +148,274 @@ process blastnGRCh38 {
"""
}

/*
STEP 2.1: CREATE BLAST DATABASE (GRCh38.p0)
This process is executed only once
*/
process blastdbGRCh38p0 {
tag { "PREP:${genome2}" }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "BLAST+/2.10.1-IGB-gcc-8.2.0"
storeDir genomeStore2

input:
file genome2 from genome_file2

output:
file "*" into genome2_db_ch

script:
"""
makeblastdb -in ${genome2} -parse_seqids -title "GRCh38.p0.no.decoy.hla" -dbtype nucl
"""
}

/*
STEP 2.2: RUN BLAST ON FILTERED READS (GRCh38.p0) -----
*/
process blastnGRCh38p0 {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "BLAST+/2.10.1-IGB-gcc-8.2.0"
publishDir "${resultsPath}/blast/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered1) from filtered_file1
file genome2 from genome_file2
file "*" from genome2_db_ch

output:
tuple val(id), file('blast_*.GRCh38.p0.no.decoy.hla.asn')
tuple val(id), file('blast_*.GRCh38.p0.no.decoy.hla.txt')

script:
"""
# Run blast ------
blastn -db ${genome2} \
-query ${filtered1} \
-out blast_${id}.GRCh38.p0.no.decoy.hla.asn \
-outfmt 11 \
-max_target_seqs 5 \
-max_hsps 10 \
-evalue 1e-5 \
-perc_identity 90
# Change format to tabular -----
blast_formatter -archive blast_${id}.GRCh38.p0.no.decoy.hla.asn \
-outfmt "6 qseqid sseqid stitle pident length evalue qcovs bitscore sblastnames mismatch gapopen qstart qend qlen sstart send slen" \
> blast_${id}.GRCh38.p0.no.decoy.hla.txt
"""
}

/*
STEP 3.1: CREATE BLAST DATABASE (CHM13)
This process is executed only once
*/
process blastdbCHM13 {
tag { "PREP:${genome3}" }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "BLAST+/2.10.1-IGB-gcc-8.2.0"
storeDir genomeStore3

input:
file genome3 from genome_file3

output:
file "*" into genome3_db_ch

script:
"""
makeblastdb -in ${genome3} -parse_seqids -title "CHM13.v1.1_GRCh38.p13.chrY" -dbtype nucl
"""
}

/*
STEP 3.2: RUN BLAST ON FILTERED READS (CHM13) -----
*/
process blastnCHM13 {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "BLAST+/2.10.1-IGB-gcc-8.2.0"
publishDir "${resultsPath}/blast/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered2) from filtered_file2
file genome3 from genome_file3
file "*" from genome3_db_ch

output:
tuple val(id), file('blast_*.CHM13.v1.1_GRCh38.p13.chrY.fna.asn')
tuple val(id), file('blast_*.CHM13.v1.1_GRCh38.p13.chrY.fna.txt')

script:
"""
# Run blast ------
blastn -db ${genome3} \
-query ${filtered2} \
-out blast_${id}.CHM13.v1.1_GRCh38.p13.chrY.fna.asn \
-outfmt 11 \
-max_target_seqs 5 \
-max_hsps 10 \
-evalue 1e-5 \
-perc_identity 90
# Change format to tabular -----
blast_formatter -archive blast_${id}.CHM13.v1.1_GRCh38.p13.chrY.fna.asn \
-outfmt "6 qseqid sseqid stitle pident length evalue qcovs bitscore sblastnames mismatch gapopen qstart qend qlen sstart send slen" \
> blast_${id}.CHM13.v1.1_GRCh38.p13.chrY.fna.txt
"""
}


/*
STEP 4: RUN REPEAT MASKER ON FILTERED READS -----
*/
process repeatmasker {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "RepeatMasker/4.1.2-p1-IGB-gcc-8.2.0-Perl-5.28.1"
publishDir "${resultsPath}/RepeatMasker/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered3) from filtered_file3

output:
file '*'

script:
"""
# Run Repeat Masker ------
RepeatMasker -species human ${filtered3}
"""
}

/*
STEP 5: RUN QUAST ON FILTERED READS -----
*/
process quast {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "quast/5.0.0-IGB-gcc-4.9.4-Python-3.6.1"
publishDir "${resultsPath}/QUAST/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered4) from filtered_file4

output:
file '*'

script:
"""
# Run QUAST ------
quast.py ${filtered4} \
--threads 2
"""
}

/*
STEP 6: RUN BLAST ON FILTERED READS -----
*/
process blastncontam {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "BLAST+/2.10.1-IGB-gcc-8.2.0"
module "ncbi-blastdb/20201212"
publishDir "${resultsPath}/blast-contam/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered5) from filtered_file5

output:
tuple val(id), file('blast_*.masurca.nt.asn')
tuple val(id), file('blast_*.masurca.nt.txt')

script:
"""
# Run blast ------
blastn -db nt \
-query ${filtered5} \
-out blast_${id}.masurca.nt.asn \
-outfmt 11 \
-max_target_seqs 5 \
-max_hsps 10 \
-evalue 1e-5 \
-perc_identity 90
# Change format to tabular -----
blast_formatter -archive blast_${id}.masurca.nt.asn \
-outfmt "6 qseqid sseqid stitle pident length evalue qcovs bitscore sblastnames mismatch gapopen qstart qend qlen sstart send slen" \
> blast_${id}.masurca.nt.txt
"""
}

/*
STEP 7: RUN KRAKEN ON FILTERED READS -----
*/
process kraken {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus assemblerCPU
queue params.myQueue
memory "$assemblerMemory GB"
module "Kraken2/2.0.8-beta-IGB-gcc-4.9.4"
publishDir "${resultsPath}/kraken/${params.assembler}/",mode:"copy"

input:
tuple val(id), file(filtered6) from filtered_file6
file krakendb from krakendb_file

output:
tuple val(id),file('*_kraken2_report.txt')
tuple val(id),file('*_kraken2_classified.fasta')
tuple val(id),file('*_kraken2_unclassified.fasta')
tuple val(id),file('*_kraken2_output.txt')

script:
"""
# Run Kraken ------
kraken2 --use-names --threads 6 --quick \
--report ${id}_kraken2_report.txt \
--classified-out ${id}_kraken2_classified.fasta \
--unclassified-out ${id}_kraken2_unclassified.fasta \
--db ${krakendb} \
${filtered6} > ${id}_kraken2_output.txt
"""
}

0 comments on commit b189426

Please sign in to comment.