diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc index 075470d2e5..2f20ae91d5 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc index 36c953d7c3..9b0f6b88ba 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt index ba9eb29394..a40db44634 100644 --- a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt +++ b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.128-eead8100a1c1 - Created at 2024/03/04 16:14:35 \ No newline at end of file + Created at 2024/08/29 13:43:52 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.index.crc deleted file mode 100644 index 741666296d..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.index.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.metadata.json.gz.crc deleted file mode 100644 index ca03555fe8..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.metadata.json.gz.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/index deleted file mode 100644 index 73ec2f7ff9..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/index and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/metadata.json.gz deleted file mode 100644 index ecb2944baa..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/metadata.json.gz and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.index.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.index.crc new file mode 100644 index 0000000000..644f583444 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.index.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000..359650e816 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/index b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/index new file mode 100644 index 0000000000..33d6653b42 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/index differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/metadata.json.gz new file mode 100644 index 0000000000..521ca22d19 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/index/part-0-fbbd1d66-9016-474d-b435-c7d356e21767.idx/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz index 51ee68f2c0..dcdc45d622 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc index 640a7e087a..c2dc85a9d8 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz index dcf83cab03..7d7697ed0a 100644 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.crc deleted file mode 100644 index fcaf05107a..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.crc and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-fbbd1d66-9016-474d-b435-c7d356e21767.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-fbbd1d66-9016-474d-b435-c7d356e21767.crc new file mode 100644 index 0000000000..b8e95019c5 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-fbbd1d66-9016-474d-b435-c7d356e21767.crc differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b deleted file mode 100644 index 66c4efbd88..0000000000 Binary files a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b and /dev/null differ diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-fbbd1d66-9016-474d-b435-c7d356e21767 b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-fbbd1d66-9016-474d-b435-c7d356e21767 new file mode 100644 index 0000000000..f43efce500 Binary files /dev/null and b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-fbbd1d66-9016-474d-b435-c7d356e21767 differ diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index 2a8631efdd..d55eaf52a6 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -24,7 +24,8 @@ class SnvIndelHailTableQuery(SnvIndelHailTableQuery37): SCREEN_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY, ] FREQUENCY_PREFILTER_FIELDS = OrderedDict([ - (True, PREFILTER_FREQ_CUTOFF), + (True, 0.001), + ('is_gt_1_percent', PREFILTER_FREQ_CUTOFF), ('is_gt_3_percent', 0.03), ('is_gt_5_percent', 0.05), ('is_gt_10_percent', 0.1), diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index 3addc9ded2..bebb02eab9 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -96,19 +96,26 @@ def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwarg if af_cutoff_field is None: return False - af_filter = True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field] - + clinvar_path_ht = False if af_cutoff < PATH_FREQ_OVERRIDE_CUTOFF: clinvar_path_ht = self._get_loaded_clinvar_prefilter_ht(pathogenicity) - if clinvar_path_ht is not False: - path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF) - non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key]) - if af_filter is not True: - non_clinvar_filter = lambda ht: non_clinvar_filter(ht) & af_filter(ht) - af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht) + + if clinvar_path_ht is not False: + path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF) + non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key]) + if af_cutoff_field is not True: + non_clinvar_var_filter = non_clinvar_filter + non_clinvar_filter = lambda ht: non_clinvar_var_filter(ht) & self._af_prefilter(af_cutoff_field)(ht) + af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht) + else: + af_filter = self._af_prefilter(af_cutoff_field) return af_filter + @staticmethod + def _af_prefilter(af_cutoff_field): + return True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field] + def _get_af_prefilter_field(self, af_cutoff): return next((field for field, cutoff in self.FREQUENCY_PREFILTER_FIELDS.items() if af_cutoff <= cutoff), None)