Skip to content

Commit

Permalink
try and fail to speed up GenotypesPLINKTR.read()
Browse files Browse the repository at this point in the history
  • Loading branch information
aryarm committed Aug 31, 2023
1 parent ec294bc commit 02cbc72
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions haptools/data/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,25 +1666,25 @@ def read(
super().read(region,samples,variants,max_variants)
num_variants = len(self.variants)
# initialize a jagged array of allele lengths
max_num_alleles = max(map(len, self.variants["alleles"]))
allele_lens = np.empty((len(self.variants), max_num_alleles), dtype=self.data.dtype)
num_alleles = np.fromiter(map(len, self.variants["alleles"]), dtype=np.uint8, count=num_variants)
allele_lens = np.empty((num_variants, num_alleles.max()+1), dtype=self.data.dtype)
# iterate through each TR and extract the REF and ALT allele lengths
for idx, record in enumerate(self._iter_TRRecords(region, variants)):
if idx > num_variants:
# exit early if we've already found all the variants
break
# extract allele lengths from TRRecord object
allele_lens[idx, 0] = record.ref_allele_length
num_alleles = len(record.alt_allele_lengths) + 1
allele_lens[idx, 1:num_alleles] = record.alt_allele_lengths
num_alleles_rec = num_alleles[idx]
allele_lens[idx, 1:num_alleles_rec] = record.alt_allele_lengths
allele_lens[idx, num_alleles_rec] = np.iinfo(np.uint8).max
# record missing entries and then set them all to REF
missing = self.data[:, :, :2] == np.iinfo(np.uint8).max
self.data[:, :, :2][missing] = 0
breakpoint()
np.putmask(self.data[:, :, :2], missing, num_alleles)
# convert from genotype indices to allele lengths
variant_coords = np.arange(num_variants)[:, np.newaxis]
self.data[:, :, :2] = allele_lens[variant_coords, self.data[:, :, :2]]
# restore missing entries
self.data[:, :, :2][missing] = np.iinfo(np.uint8).max
# clean up memory
del missing
del allele_lens
Expand Down

0 comments on commit 02cbc72

Please sign in to comment.