Skip to content

Commit

Permalink
Refactor validate_and_convert_config
Browse files Browse the repository at this point in the history
* Put everything in smaller validate_*() functions
* Convert the value and catch problems in these validate_*() functions
  • Loading branch information
tomschr committed Jun 21, 2024
1 parent 78f3d3a commit a902366
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 82 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import datetime
import itertools
import typing as t
Expand Down
217 changes: 171 additions & 46 deletions python-scripts/metadatavalidator/src/metadatavalidator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
# merged_config |= config
# return merged_config

SPLIT = re.compile(r"[;, ]")

def as_dict(config: configparser.ConfigParser):

def as_dict(config: configparser.ConfigParser) -> dict[str, t.Any]:
"""
Converts a ConfigParser object into a dictionary.
Expand All @@ -49,88 +51,211 @@ def truefalse(value: str|bool|int) -> bool:
return str(value).lower() in ("true", "yes", "1", "on")


def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]:
"""Validate sections, keys, and their values of the config
# def retrievekey(config: configparser.ConfigParser,
# section: str, key: str, default: t.Any = None) -> t.Any:
# """Retrieve a key from a section in a config file

:param config: the :class:`configparser.Configparser` object
:return: a dict that contains converted keys into their
respective datatypes
"""
split = re.compile(r"[;, ]")
theconfig = as_dict(config)
# :param config: the configuration object
# :param section: the section to look for
# :param key: the key to look for
# :param default: the default value if the key is not found
# :return: the value of the key
# """
# if not config.has_section(section):
# raise MissingSectionError(section)
# return config.get(section, key, fallback=default)

if not config.has_section("validator"):
raise MissingSectionError("validator")

# Section "validator"
check_root_elements = config.get("validator", "check_root_elements", fallback=None)
# def get_metadata(config: configparser.ConfigParser, key) -> dict[str, t.Any]:
# """Retrieve the metadata section from the config

# :param config: the configuration object
# :return: a dictionary with the metadata section
# """
# return retrievekey(config, "metadata", key)


def validate_check_root_elements(config: dict) -> list[str]:
"""Validate the language section of the config
:param config: the configuration object
:return: a list of valid languages
"""
check_root_elements = config.get("validator", {}).get("check_root_elements")
if check_root_elements is None:
raise MissingKeyError("validator.check_root_elements")
theconfig["validator"]["check_root_elements"] = split.split(check_root_elements)

valid_languages = config.get("validator", "valid_languages", fallback=None)
return SPLIT.split(check_root_elements)


def validate_valid_languages(config: dict) -> list[str]:
"""Validate the language section of the config
:param config: the configuration object
:return: a list of valid languages
"""

# valid_languages = retrievekey(config, "validator", "valid_languages")
valid_languages = config.get("validator", {}).get("valid_languages")
if valid_languages is None:
raise MissingKeyError("validator.valid_languages")

theconfig["validator"]["valid_languages"] = split.split(valid_languages)
return SPLIT.split(valid_languages)

# Section "metadata"
require_xmlid_on_revision = truefalse(
theconfig.get("metadata", {}).get("require_xmlid_on_revision", True)
)
theconfig.setdefault("metadata", {})["require_xmlid_on_revision"] = require_xmlid_on_revision

def validate_meta_title_length(config: dict) -> int:
"""Validate the meta title length
:param config: the configuration object
:return: the meta title length
"""
try:
meta_title_length = int(theconfig.get("metadata", {}).get("meta_title_length"))
meta_title_length = int(config.get("metadata", {}).get("meta_title_length"))
if meta_title_length < 0:
raise ValueError("meta_title_length should be a positive integer")
theconfig.setdefault("metadata", {})["meta_title_length"] = meta_title_length
return meta_title_length

except TypeError:
raise MissingKeyError("metadata.meta_title_length")


def validate_meta_description_length(config: dict) -> int:
"""Validate the meta description length
:param config: the configuration object
:return: the meta description length
"""
try:
meta_description_length = int(theconfig.get("metadata", {}).get("meta_description_length"))
meta_description_length = int(config.get("metadata", {}).get("meta_description_length"))
if meta_description_length < 0:
raise ValueError("meta_description_length should be a positive integer")
theconfig.setdefault("metadata", {})["meta_description_length"] = meta_description_length
return meta_description_length

except TypeError:
raise MissingKeyError("metadata.meta_description_length")

split = re.compile(r"[;,]") # no space!
valid_meta_series = split.split(theconfig.get("metadata", {}).get("valid_meta_series", ""))
theconfig.setdefault("metadata", {})["valid_meta_series"] = valid_meta_series

require_meta_series = truefalse(
theconfig.get("metadata", {}).get("require_meta_series", False)
)
theconfig.setdefault("metadata", {})["require_meta_series"] = require_meta_series
def validate_valid_meta_series(config: dict) -> list[str]:
"""Validate the meta series
# architectures
require_meta_architecture = truefalse(
theconfig.get("metadata", {}).get("require_meta_architecture", False)
)
theconfig.setdefault("metadata", {})["require_meta_architecture"] = require_meta_architecture
:param config: the configuration object
:return: a list of valid meta series
"""
# split = re.compile(r"[;,]") # no space!
return [x.strip() for x in re.split(r"[;,]",
config.get("metadata", {}).get("valid_meta_series", "")
)
if x
]


def validate_valid_meta_architecture(config: dict) -> list[str]:
"""Validate the meta architecture
:param config: the configuration object
:return: a list of valid meta architecture
"""
try:
architectures = split.split(theconfig.get("metadata", {}).get("valid_meta_architecture", []))
theconfig.setdefault("metadata", {})["valid_meta_architecture"] = architectures
# architectures = re.split(r"[;,]", config.get("metadata", {}).get("valid_meta_architecture", []))
return [x.strip() for x in re.split(r"[;,]",
config.get("metadata", {}).get("valid_meta_architecture", "")
)
if x
]
except TypeError:
raise MissingKeyError("metadata.valid_meta_architecture")


# categories
require_meta_category = truefalse(
theconfig.get("metadata", {}).get("require_meta_category", False)
)
theconfig.setdefault("metadata", {})["require_meta_category"] = require_meta_category
def validate_valid_meta_category(config: dict) -> list[str]:
"""Validate the meta category
:param config: the configuration object
:return: a list of valid meta category
"""
try:
categories = split.split(theconfig.get("metadata", {}).get("valid_meta_category", []))
theconfig.setdefault("metadata", {})["valid_meta_category"] = categories
return [x.strip() for x in re.split(r"[;,]",
config.get("metadata", {}).get("valid_meta_category", "")
)
if x
]
except TypeError:
raise MissingKeyError("metadata.valid_meta_category")


def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]:
"""Validate sections, keys, and their values of the config
:param config: the :class:`configparser.Configparser` object
:return: a dict that contains converted keys into their
respective datatypes
"""
# TODO: This should be better used with pydantic
if not config.has_section("validator"):
raise MissingSectionError("validator")
if not config.has_section("metadata"):
raise MissingSectionError("metadata")

theconfig = as_dict(config)
# Section "validator"
theconfig["validator"]["check_root_elements"] = validate_check_root_elements(theconfig)
theconfig["validator"]["valid_languages"] = validate_valid_languages(theconfig)

# Section "metadata"
theconfig.setdefault("metadata", {})[
"require_xmlid_on_revision"
] = truefalse(theconfig.get("metadata", {}).get("require_xmlid_on_revision", True))

# <meta name="title">
theconfig.setdefault("metadata", {})[
"meta_title_length"
] = validate_meta_title_length(theconfig)

# <meta name="description">
theconfig.setdefault("metadata", {})[
"require_meta_description"
] = truefalse(theconfig.get("metadata", {}).get("require_meta_description", False))
theconfig.setdefault("metadata", {})[
"meta_description_length"
] = validate_meta_description_length(theconfig)

# <meta name="series">
theconfig.setdefault("metadata", {})[
"require_meta_series"
] = truefalse(theconfig.get("metadata", {}).get("require_meta_series", False))
theconfig.setdefault("metadata", {})[
"valid_meta_series"
] = validate_valid_meta_series(theconfig)

# <meta name="architecture">
theconfig.setdefault("metadata", {})[
"require_meta_architecture"
] = truefalse(theconfig.get("metadata", {}).get("require_meta_architecture",
False))
theconfig.setdefault("metadata", {})[
"valid_meta_architecture"
] = validate_valid_meta_architecture(theconfig)

# <meta name="techpartner">
require_meta_techpartner = truefalse(
theconfig.get("metadata", {}).get("require_meta_techpartner", False)
)
theconfig.setdefault("metadata", {})[
"require_meta_techpartner"
] = require_meta_techpartner

# <meta name="platform">
theconfig.setdefault("metadata", {})[
"require_meta_platform"
] = truefalse(theconfig.get("metadata", {}).get("require_meta_platform", False))

# <meta name="category">
theconfig.setdefault("metadata", {})[
"require_meta_category"
] = truefalse(theconfig.get("metadata", {}).get("require_meta_category", False))
theconfig.setdefault("metadata", {})[
"valid_meta_category"
] = validate_valid_meta_category(theconfig)

# Store the configfiles
theconfig["configfiles"] = getattr(config, "configfiles")
return theconfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]):
"""
errors = []
basexmlfile = os.path.basename(xmlfile)
# log.debug("Config %s", config)
for checkfunc in get_all_check_functions(checks.__package__):
log.debug("Checking %r with %r",
basexmlfile,
Expand All @@ -44,6 +45,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]):

# Apply check function
checkfunc(tree, config)
# await asyncio.sleep(0.1)

except etree.XMLSyntaxError as e:
# log.fatal("Syntax error in %r: %s", xmlfile, e)
Expand Down
34 changes: 31 additions & 3 deletions python-scripts/metadatavalidator/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
import sys
from configparser import ConfigParser
import os, os.path
import typing as t

from lxml import etree

import pytest

from metadatavalidator.config import as_dict


os.environ.setdefault("PYTHONPATH",
os.path.normpath(os.path.join(os.path.dirname(__file__), "..")))


@pytest.fixture
def xmlparser():
return etree.XMLParser(encoding="UTF-8")
return etree.XMLParser(encoding="UTF-8")


@pytest.fixture(scope="function")
def config() -> ConfigParser:
config = ConfigParser()
config.add_section("validator")
config.set("validator", "check_root_elements", "book article")
config.set("validator", "file_extension", ".xml")
config.set("validator", "valid_languages", "en-us de-de")
#
config.add_section("metadata")
config.set("metadata", "revhistory", "0")
config.set("metadata", "require_xmlid_on_revision", "true")
config.set("metadata", "meta_title_length", "50")
config.set("metadata", "meta_description_length", "150")
#
config.set("metadata", "valid_meta_architecture", "A, B, C")
config.set("metadata", "valid_meta_category", "D, E, F")
setattr(config, "configfiles", None)
return config


@pytest.fixture(scope="function")
def dict_config(config) -> dict[str, t.Any]:
return as_dict(config)
Loading

0 comments on commit a902366

Please sign in to comment.