diff --git a/assembly-config.conf b/assembly-config.conf index bcaea70..1595e9e 100644 --- a/assembly-config.conf +++ b/assembly-config.conf @@ -1,3 +1,9 @@ +/* + * ------------------------------------------------- + * UIUC RefGraph Assembly Nextflow config file + * ------------------------------------------------- + */ + params { genome = "./GRCh38/GRCh38_full_analysis_set_plus_decoy_hla.fa" samplePath = "./crams/*.final.cram" diff --git a/filter-config.conf b/filter-config.conf index 0320113..7150bf7 100644 --- a/filter-config.conf +++ b/filter-config.conf @@ -1,3 +1,9 @@ +/* + * ------------------------------------------------- + * UIUC RefGraph Filtering Nextflow config file + * ------------------------------------------------- + */ + params { genome1 = "./GRCh38/GRCh38_full_analysis_set_plus_decoy_hla.fa" genome2 = "./GRCh38.p0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" diff --git a/filter.nf b/filter.nf index 915e008..b95c6cf 100644 --- a/filter.nf +++ b/filter.nf @@ -34,7 +34,9 @@ params.blastnt_filter_pident = '60' /*filtering cut off for percentag params.blastnt_filter_length = '100' /*filtering cut off for alignment length from blast NT. Default is 100*/ params.blastr_filter_pident = '95' /*filtering cut off for percentage of identical matches from blast ref genome. Default is 95*/ params.blastr_filter_qcov = '95' /*filtering cut off for query coverage from blast ref genome. Default is 95*/ -params.cdhit_identity = '0.9' /*proportion of idenitity for clustering using cdhit. Default is 0.9*/ + +/*Parameters for cdhit */ +params.cdhit_identity = '0.9' /*proportion of idenitity for clustering. Default is 0.9*/ params.cdhit_wordsize = '7' /*word size for cdhit. Default is 7*/ /*Stage*/ @@ -150,7 +152,7 @@ process blastdbCHM13 { } /* - STEP 1: FILTER BASED ON READ LENGHT + STEP 1: FILTER BASED ON READ LENGTH /* 1.1 FILTER THE ASSEMBLY FILES --- use seqkit to remove low read lengths --- @@ -449,20 +451,20 @@ process final_filtering { tuple val(id), file(filter_CHM13) from filter_CHM13_GRCH38 output: - tuple val(id), file('*_GRCH38_decoys_hla_Final_filter.fasta') - tuple val(id), file('*_GRCH38_p0_Final_filter.fasta') - tuple val(id), file('*_CHM13_Final_filter.fasta') + tuple val(id), file('*_GRCH38_decoys_hla_filter.final.fasta') + tuple val(id), file('*_GRCH38_p0_filter.final.fasta') + tuple val(id), file('*_CHM13_filter.final.fasta') script: """ # Filter the fasta using blast output (GRCh38) ------ - seqkit grep -i -v -f ${filter_GRCH38} ${blast_kn_cdhit_filtered2} > ${id}_GRCH38_decoys_hla_Final_filter.fasta + seqkit grep -i -v -f ${filter_GRCH38} ${blast_kn_cdhit_filtered2} > ${id}_GRCH38_decoys_hla_filter.final.fasta # Filter the fasta using blast output (GRCh38.p0) ------ - seqkit grep -i -v -f ${filter_GRCH38p0} ${blast_kn_cdhit_filtered2} > ${id}_GRCH38_p0_Final_filter.fasta + seqkit grep -i -v -f ${filter_GRCH38p0} ${blast_kn_cdhit_filtered2} > ${id}_GRCH38_p0_filter.final.fasta # Filter the fasta using blast output (CHM13) ------ - seqkit grep -i -v -f ${filter_CHM13} ${blast_kn_cdhit_filtered2} > ${id}_CHM13_Final_filter.fasta + seqkit grep -i -v -f ${filter_CHM13} ${blast_kn_cdhit_filtered2} > ${id}_CHM13_filter.final.fasta """ } \ No newline at end of file