Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relax varnames #48

Merged
merged 2 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions lib/ugrid_checks/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,15 @@
"trajectory_id",
]

# Valid cf varname regex : copied from iris.common.metadata code.
_VALID_NAME_REGEX = re.compile(r"""^[a-zA-Z][a-zA-Z0-9]*[\w.+\-@]*$""")
# Valid varname regex.
# It is not possible to use the common 'variable name' patterns,
# as we *can* legally have variables which start with a digit, etc.
# In principle, in netcdf anything but forward-slash is allowed
# - c.f. https://github.com/cf-convention/cf-conventions/issues/307
# However, clearly, we can't really handle spaces in names.
# FOR NOW: allow a Python "normal word character", followed by anything except
# the forward-slash and space.
_VALID_NAME_REGEX = re.compile(r"^\w[^ /]*$")


class Checker:
Expand Down Expand Up @@ -152,7 +159,7 @@ def check_mesh_attr_is_varlist(
"Mesh",
meshvar.name,
f'has {attrname}="{value}", '
"which is not a valid list of netcdf variable names.",
"which is not a valid list of variable names.",
)
success = False
if success:
Expand All @@ -165,7 +172,7 @@ def check_mesh_attr_is_varlist(
"Mesh",
meshvar.name,
f'has {attrname}="{varname}", '
"which is not a valid netcdf variable name.",
"which is not a valid variable name.",
)
success = False
elif varname not in self._all_vars:
Expand Down Expand Up @@ -202,7 +209,7 @@ def var_ref_problem(self, attr_value: np.ndarray) -> str:
if succeed:
var_name = property_as_single_name(attr_value)
if not _VALID_NAME_REGEX.match(var_name):
result = "is not a valid netcdf variable name"
result = "is not a valid variable name"
succeed = False
if succeed:
bounds_var = self._all_vars.get(var_name)
Expand Down
22 changes: 11 additions & 11 deletions lib/ugrid_checks/tests/check/test_check_dataset__checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def test_r105_r108_mesh_badcoordattr_empty(self, scan_2d_and_meshvar):
meshvar.attributes["node_coordinates"] = ""
msg = (
'"topology" has node_coordinates="".*'
"is not a valid list of netcdf variable"
"is not a valid list of variable names"
)
self.check(
scan,
Expand All @@ -449,7 +449,7 @@ def test_r105_r108_mesh_badcoordattr_invalidname(
meshvar.attributes["node_coordinates"] = "$123"
msg = (
r'"topology" has node_coordinates="\$123"'
".*not a valid netcdf variable name"
".*not a valid variable name"
)
self.check(
scan,
Expand Down Expand Up @@ -505,7 +505,7 @@ def test_r109_mesh_badconn_empty(self, scan_2d_and_meshvar):
"R105",
(
'"topology" has face_node_connectivity="".*'
"not a valid list of netcdf variable"
"not a valid list of variable names"
),
),
(
Expand Down Expand Up @@ -921,8 +921,8 @@ def test_r502_datavar_empty_mesh(self, scan_2d_and_datavar):

def test_r502_datavar_invalid_mesh_name(self, scan_2d_and_datavar):
scan, datavar = scan_2d_and_datavar
datavar.attributes["mesh"] = "$123"
msg = r'mesh="\$123", which is not a valid netcdf variable name\.'
datavar.attributes["mesh"] = "/bad/"
msg = r'mesh="/bad/", which is not a valid variable name\.'
self.check(scan, "R502", msg)

def test_r502_datavar_bad_mesh_dtype(self, scan_2d_and_datavar):
Expand Down Expand Up @@ -999,10 +999,10 @@ def test_r507_datavar_lis_with_location(self, scan_0d_with_lis_datavar):
def test_r508_datavar_lis_invalid(self, scan_0d_with_lis_datavar):
# The lis attribute should be a valid variable reference.
scan, data_var = scan_0d_with_lis_datavar
data_var.attributes["location_index_set"] = "$123"
data_var.attributes["location_index_set"] = "/bad/"
msg = (
r'location_index_set="\$123", '
r"which is not a valid netcdf variable name\."
r'location_index_set="/bad/", '
r"which is not a valid variable name\."
)
self.check(scan, "R508", msg)

Expand Down Expand Up @@ -1095,10 +1095,10 @@ def test_r203_coord_bounds_bad_attrdtype(self, scan_2d_and_coordvar):
def test_r203_coord_bounds_bad_name(self, scan_2d_and_coordvar):
scan, coord = scan_2d_and_coordvar
# Add an invalid bounds attribute to the node_lon coord.
coord.attributes["bounds"] = "$123"
coord.attributes["bounds"] = "/bad/"
msg = (
r'"node_lon" within topology:node_coordinates has bounds="\$123", '
"which is not a valid netcdf variable name"
r'"node_lon" within topology:node_coordinates has bounds="/bad/", '
r"which is not a valid variable name\."
)
self.check(scan, "R203", msg)

Expand Down
26 changes: 26 additions & 0 deletions lib/ugrid_checks/tests/check/test_name_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from ...check import _VALID_NAME_REGEX as var_re


def test_valid_varnames():
# Just check a few things that should be accepted, and not
assert var_re.match("abc")
assert var_re.match("x") # single char

# various unusual chars, cannot appear at start
nonstart_chars = r"#$£+-*^%?!.:;,\()[]{}" # almost anything !!
for nonstart_char in nonstart_chars:
assert not var_re.match(nonstart_char)
# But these are all OK *after* the start position
assert var_re.match("x" + nonstart_chars)

# Examples of characters which *are* allowed at start
start_chars = "_10" # NB includes digits.
for start_char in start_chars:
assert var_re.match(start_char)

# not empty
assert not var_re.match("")
# no spaces
assert not var_re.match("space in name")
# no forward-slash
assert not var_re.match("forward/slash")