Skip to content

Commit

Permalink
added a bunch of annotation and maker codes and edited filter script …
Browse files Browse the repository at this point in the history
…a little bit. changes since Dec 2021
  • Loading branch information
Negin Valizadegan committed Jan 27, 2022
1 parent 0f29654 commit 3c7310a
Show file tree
Hide file tree
Showing 16 changed files with 538 additions and 33 deletions.
174 changes: 174 additions & 0 deletions MAKER.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/bin/bash

#SBATCH --mem 50G
#SBATCH --job-name maker
#SBATCH --mail-user [email protected] ## CHANGE THIS TO YOUR EMAIL
#SBATCH --mail-type ALL
#SBATCH -n 24
#SBATCH -N 1
#SBATCH -A h3abionet
#SBATCH -o /home/groups/h3abionet/RefGraph/results/NeginV_Test_Summer2021/slurm_output/slurm-%j.out



# HPCBio UIUC Gene Annotation pipeline (MAKER + EVidenceModeler); Created by Negin Valizadegan Jan 18, 2022; [email protected]

##############################################################################
## ##
## GENERAL WRAPPER RELATED SCRIPTS ##
## ##
##############################################################################

# Set fancy fonts for the help message ------
NORM=`tput sgr0`
BOLD=`tput bold`
REV=`tput smso`

# Help ------
function HELP {
echo ""
echo "${BOLD}Help Documentation for the HPCBio UIUC Annotation (Filtering) Pipeline${NORM}"
echo ""
echo "The Following Options Must Be Specified:"
echo "${REV}-d${NORM} The full path to the main results directory${NORM} (Required)"
echo "${REV}-s${NORM} The name of the input sequence (Required)"
echo "${REV}-h${NORM} Displays this help message without complaints (Optional)"
echo ""
echo "[ ${NORM}${BOLD}Example:${NORM} sbatch MAKER.sh -d /home/groups/h3abionet/RefGraph/results/NeginV_Test_Summer2021/results/ -s clustered_GRCH38_p0.fasta ]"
echo ""
exit 1
}


# Check the number of arguments. If none are passed, print message and exit ------
NUMARGS=$#
if [ $NUMARGS -eq 0 ]; then
echo ""
echo "You Did Not Pass Any Arguments. Please Specify the Arguments Below:"
echo ""
HELP
fi


# Parse the inputs
while getopts :d:s:h FLAG; do
case $FLAG in
d) #set option "d"
OPT_d=$OPTARG
;;
s) #set option "s"
OPT_s=$OPTARG
;;
h) #set option "h"
OPT_h=$OPTARG
HELP
;;
\?) #unrecognized option - show help
echo "Option -${BOLD}$OPTARG${NORM} not allowed."
exit 1
;;
esac
done


# Exit if necessary options are not passed ------
if [[ -z "$OPT_d" ]]; then
echo "No project directory specified, aborting script"
exit 1
fi

if [[ -z "$OPT_s" ]]; then
echo "No input sequence name is specified, aborting script"
exit 1
fi

##############################################################################
## ##
## STEP 0: LOAD MODULES ##
## ##
##############################################################################

setup ()
{

# Load modules ------
module load MAKER/3.01.03-IGB-gcc-4.9.4-Perl-5.26.1-unthreaded

# Create 3 control files needed for maker ----- (do not run if present; control files should be edited manually)
# cd ${OPT_d}/../HPCBio-Refgraph_pipeline/
# maker -CITL

echo "Control ctl files are created if not already exist. They are usually needed to be manually modified."

}


##############################################################################
## ##
## STEP 1: RUN MAKER ##
## ##
##############################################################################

maker ()
{

# Set working directory -----
cd ${OPT_d}/annotation

# Create output directory
mkdir -p MAKER
cd MAKER

echo "Working directory is set to" | tr '\n' ' ' && pwd

# Create a temp directory ------
mkdir -p /scratch/valizad2/maker # change valizad2 to your username

start=`date +%s` # capture start time
echo "Start of maker annotation"

#export AUGUSTUS_CONFIG_PATH=/home/n-z/valizad2/NeginV_Test_Summer2021/augustus/3.2.3-IGB-gcc-4.9.4/config export PATH=$PATH:=/home/n-z/valizad2/NeginV_Test_Summer2021/augustus/3.2.3-IGB-gcc-4.9.4/bin

# Run maker -----
mpiexec -n $SLURM_NPROCS maker \
${OPT_d}/../HPCBio-Refgraph_pipeline/maker_opts.ctl \
${OPT_d}/../HPCBio-Refgraph_pipeline/maker_bopts.ctl \
${OPT_d}/../HPCBio-Refgraph_pipeline/maker_exe.ctl \
-genome ${OPT_d}/annotation/Cluster_CDHIT/masurca/${OPT_s} \
-fix_nucleotides # This will change Ys to Ns

echo "Maker gene prediction is completed for ${OPT_s}"

end=`date +%s`
runtime=$((end-start))
runtime=$( echo "scale=2;$((end-start)) / 60" | bc )
echo "It took $runtime minutes to run maker on ${OPT_s}"

}


##############################################################################
## ##
## MAIN ##
## ##
##############################################################################

# Main function runs each step/function of the pipeline separately so that
# user can choose to run steps one at a time.

main ()
{
# Determine whether running full pipeline or single step
#runtype="PARTIAL"
runtype="FULL"
echo ""
echo "*** RUNNING ${runtype} ANNOTATION PIPELINE ***"

setup
maker

}


# Run main function
main
Empty file added _Inline/.lock
Empty file.
14 changes: 14 additions & 0 deletions _Inline/config-x86_64-linux-5.026001
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version : 0.80
languages : %
C : C
Foo : Foo
foo : Foo
types : %
C : compiled
Foo : interpreted
modules : %
C : Inline::C
Foo : Inline::Foo
suffixes : %
C : so
Foo : foo
22 changes: 22 additions & 0 deletions _Inline/lib/auto/Bio/DB/IndexedBase_168b/IndexedBase_168b.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
md5 : 168b5562b2d3d613c6ee4dee4c45c915
name : Bio::DB::IndexedBase_168b
version : ""
language : C
language_id : C
installed : 0
date_compiled : Wed Jan 19 11:50:34 2022
inline_version : 0.80
ILSM : %
module : Inline::C
suffix : so
type : compiled
Config : %
apiversion : ?
archname : x86_64-linux
cc : gcc
ccflags : -O2 -march=x86-64 -mtune=generic -fPIC -fwrapv -fno-strict-aliasing -pipe -fstack-protector-strong -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_FORTIFY_SOURCE=2
ld : gcc
osname : linux
osvers : 3.10.0-514.21.1.el7.x86_64
so : so
version : 5.26.1
Binary file not shown.
5 changes: 4 additions & 1 deletion annotation-config.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ params {
genome1 = "./GRCh38/GRCh38_full_analysis_set_plus_decoy_hla.fa"
genome2 = "./GRCh38.p0/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna"
genome3 = "./CHM13.v1.1_GRCh38.p13.chrY/CHM13.v1.1_GRCh38.p13.chrY.fna"
samplePath = "./results/filter/Final-Filtered/masurca/*_filter.final.fasta"
samplePath = "./results/filter/Final-Filtered/masurca/test/*_filter.final.fasta"
samplePath1 = "./results/filter/Final-Filtered/masurca/test/*_GRCH38_decoys_hla_filter.final.fasta"
samplePath2 = "./results/filter/Final-Filtered/masurca/test/*_GRCH38_p0_filter.final.fasta"
samplePath3 = "./results/filter/Final-Filtered/masurca/test/*_CHM13_filter.final.fasta"
myQueue = "normal"
clusterAcct = " -A h3abionet "
}
44 changes: 44 additions & 0 deletions annotation-run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

#SBATCH --mem 18G
#SBATCH --job-name annotation
#SBATCH --mail-user [email protected] ## CHANGE THIS TO YOUR EMAIL
#SBATCH --mail-type ALL
#SBATCH -n 2
#SBATCH -N 1
#SBATCH -A h3abionet
#SBATCH -o /home/groups/h3abionet/RefGraph/results/NeginV_Test_Summer2021/slurm_output/slurm-%A.out

### This Runs Nextflow Annotation UIUC pipeline
## Date File Created: Dec 5, 2021


# Set working directory -------
cd /home/groups/h3abionet/RefGraph/results/NeginV_Test_Summer2021

# Load nextflow ------
module load nextflow/21.04.1-Java-1.8.0_152

# Run nextflow UIUC workflow -----
nextflow run HPCBio-Refgraph_pipeline/annotation.nf \
-c HPCBio-Refgraph_pipeline/annotation-config.conf \
-qs 3 -resume \
-with-report nextflow_reports/nf_report.html \
-with-timeline nextflow_reports/nf_timeline.html \
-with-trace nextflow_reports/nf_trace.txt

# -log custom.log #add this for log not hidden
# -q # Disable the printing of information to the terminal.

# -with-report nf_exec_report_annotation.html \
# -with-timeline nf_timeline_annotation.html \
# -with-trace > nf_trace_annotation.txt \ # this is the same as slurm output, if you use this, slurm output will be empty
# -with-dag nf_flowchart_annotation.pdf

#if [ echo "wc -l ${keep}" == echo "grep -E ">" ${id}_kn_filtered.fasta | wc -l" ]
# then
# echo "The filtering has not been done correctly. Please check your blastncontam script"
# fi



Loading

0 comments on commit 3c7310a

Please sign in to comment.