Skip to content

Commit

Permalink
move to coverage.pyx
Browse files Browse the repository at this point in the history
  • Loading branch information
JMencius committed Apr 15, 2024
1 parent 28dcae8 commit b664d5a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 25 deletions.
26 changes: 1 addition & 25 deletions dysgu/cluster.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1159,31 +1159,7 @@ def cluster_reads(args):
if kind2 == "stdin" or kind2 == "-" or kind2 not in opts:
raise ValueError("--ibam must be a .bam/cam/sam file")
ibam = pysam.AlignmentFile(args["ibam"], opts[kind2], threads=1,
reference_filename=None if kind2 != "cram" else args["reference"])

# Double check if bam file is sorted
# first check SO tag in HD
if "HD" in infile.header:
hd = infile.header["HD"]
if "SO" in hd:
if hd["SO"] == "unsorted":
# raise ValueError for SO:unsorted
raise ValueError("Input bam file must be sorted")
# directly check bam just check the first 1000 alignment for speed
max_alignment_check = 10**3
alignment_count = 0
prev_alignment = None
for aln in infile:
alignment_count += 1
if alignment_count > max_alignment_check:
break
if prev_alignment and prev_alignment.reference_id > aln.reference_id:
raise ValueError("Input bam file must be sorted")
prev_alignment = aln
logging.info("Check PASS : Input BAM file is sorted")
# reopen infile
infile = pysam.AlignmentFile(args["sv_aligns"], bam_mode, threads=args["procs"],
reference_filename=None if kind != "cram" else args["reference"])
reference_filename=None if kind2 != "cram" else args["reference"])

if "RG" in infile.header:
rg = infile.header["RG"]
Expand Down
17 changes: 17 additions & 0 deletions dysgu/coverage.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,24 @@ cdef class GenomeScanner:
cdef uint32_t *cigar_p
cdef int i
cdef int non_match_count, matched_bases

# first check SO tag in HD
if "HD" in file_iter.header:
hd = file_iter.header["HD"]
if "SO" in hd:
if hd["SO"] == "unsorted":
# raise ValueError for SO:unsorted
raise ValueError("Input bam file must be sorted")
else:
logging.info("Check PASS : Input BAM file has sorted header")

prev_alignment = None
for a in file_iter:
# double check with index
if prev_alignment and prev_alignment.reference_id > a.reference_id:
raise ValueError("Input bam file must be sorted")
prev_alignment = a

if ibam is None:
if a.flag & 1284 or a.mapq < self.mapq_threshold or a.cigartuples is None:
continue
Expand Down

0 comments on commit b664d5a

Please sign in to comment.