Skip to content

Commit

Permalink
#36 Created Nextflow script for annotation seqkit process
Browse files Browse the repository at this point in the history
  • Loading branch information
Negin Valizadegan committed Nov 2, 2021
1 parent 09e9e1f commit e80322c
Showing 1 changed file with 51 additions and 7 deletions.
58 changes: 51 additions & 7 deletions filter.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ params.min_read_length = '500' /*minimum length of read to be kep
/*Stage*/
stage = "annotation"

/*Results path*/
resultsPath = "${params.outputDir}/${stage}"


Expand All @@ -27,19 +28,61 @@ assemblerMemory = '100'
params.clusterAcct = " -A h3bionet "

/*Prepare input*/
genome_file = file(params.genome)
genomeStore = genome_file.getParent()
genome_file1 = file(params.genome1)
genome_file2 = file(params.genome2)
genome_file3 = file(params.genome2)
genomeStore1 = genome_file1.getParent()
genomeStore2 = genome_file2.getParent()
genomeStore3 = genome_file3.getParent()

// Sanity checks
if( !genome_file.exists() ) exit 1, "Missing reference genome file: ${genome_file}"
if( !genome_file1.exists() ) exit 1, "Missing reference genome file: ${genome_file1}"
if( !genome_file2.exists() ) exit 1, "Missing reference genome file: ${genome_file2}"
if( !genome_file3.exists() ) exit 1, "Missing reference genome file: ${genome_file3}"
//if( params.assembler != "megahit" || params.assembler != "masurca" ) exit 1, "Unknown assembler: ${params.assembler}"

CRAM_Ch1 = Channel.fromFilePairs("${params.samplePath}", size: 1)

/*
Introduce input files -----
*/
fasta_Ch1 = Channel.fromFilePairs("${params.samplePath}", size: 1)


/*
Filter the assembly file -----
*/
process filter_seqkit {
tag { id }
executor myExecutor
clusterOptions params.clusterAcct
cpus defaultCPU
queue params.myQueue
memory "$defaultMemory GB"
module "seqkit/0.12.1"
publishDir "${resultsPath}/seqkit/masurca/test"

input:
tuple val(id), file(fasta) from fasta_Ch1

output:
tuple val(id), file('*.filtered.fasta') into filtered_file
file "${id}.filter-stats.txt"

script:
"""
# Run seqkit to filter below 500 length ------
seqkit seq --min-len 500 --remove-gaps ${fasta} > ${id}.filtered.fasta
# Create seqkit stats before and after filtering ------
seqkit stats ${fasta} > ${id}.filter-stats.txt
seqkit stats ${id}.filtered.fasta | sed -e '1d' >> ${id}.filter-stats.txt
"""
}

/*
prepare_genome
This process is executed only once
*/
process prepare_genome{
tag { "PREP:${genome}" }
Expand All @@ -55,10 +98,11 @@ process prepare_genome{
file genome from genome_file
output:
file "*.fai" into genome_index_ch
file "*." into genome_index_ch
script:
"""
samtools faidx ${genome}
"""
}
}
*/

0 comments on commit e80322c

Please sign in to comment.