Skip to content

Commit

Permalink
Make chunks have the same number of dimensions as shape
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Jul 8, 2024
1 parent 69ee0e8 commit ada71fa
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
13 changes: 11 additions & 2 deletions bio2zarr/vcf2zarr/vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def from_field(
# TODO make an option to add in the empty extra dimension
if vcf_field.summary.max_number > 1:
shape.append(vcf_field.summary.max_number)
chunks.append(vcf_field.summary.max_number)
# TODO we should really be checking this to see if the named dimensions
# are actually correct.
if vcf_field.vcf_number == "R":
Expand Down Expand Up @@ -251,7 +252,12 @@ def spec_from_field(field, array_name=None):
)

def fixed_field_spec(
name, dtype, vcf_field=None, shape=(m,), dimensions=("variants",)
name,
dtype,
vcf_field=None,
shape=(m,),
dimensions=("variants",),
chunks=None,
):
return ZarrArraySpec.new(
vcf_field=vcf_field,
Expand All @@ -260,7 +266,7 @@ def fixed_field_spec(
shape=shape,
description="",
dimensions=dimensions,
chunks=[variants_chunk_size],
chunks=chunks or [variants_chunk_size],
)

alt_field = icf.fields["ALT"]
Expand All @@ -276,12 +282,14 @@ def fixed_field_spec(
dtype="bool",
shape=(m, icf.metadata.num_filters),
dimensions=["variants", "filters"],
chunks=(variants_chunk_size, icf.metadata.num_filters),
),
fixed_field_spec(
name="variant_allele",
dtype="O",
shape=(m, max_alleles),
dimensions=["variants", "alleles"],
chunks=(variants_chunk_size, max_alleles),
),
fixed_field_spec(
name="variant_id",
Expand Down Expand Up @@ -329,6 +337,7 @@ def fixed_field_spec(
)
)
shape += [ploidy]
chunks += [ploidy]
dimensions += ["ploidy"]
array_specs.append(
ZarrArraySpec.new(
Expand Down
6 changes: 3 additions & 3 deletions tests/test_vcz.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def test_call_genotype(self, schema):
"name": "call_genotype",
"dtype": "i1",
"shape": (9, 3, 2),
"chunks": (10000, 1000),
"chunks": (10000, 1000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
Expand All @@ -332,7 +332,7 @@ def test_call_genotype_mask(self, schema):
"name": "call_genotype_mask",
"dtype": "bool",
"shape": (9, 3, 2),
"chunks": (10000, 1000),
"chunks": (10000, 1000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
Expand All @@ -351,7 +351,7 @@ def test_call_genotype_phased(self, schema):
"name": "call_genotype_mask",
"dtype": "bool",
"shape": (9, 3, 2),
"chunks": (10000, 1000),
"chunks": (10000, 1000, 2),
"dimensions": ("variants", "samples", "ploidy"),
"description": "",
"vcf_field": None,
Expand Down

0 comments on commit ada71fa

Please sign in to comment.