Skip to content

Commit

Permalink
Issue #82 - Making validation errors more type-specific & adding some…
Browse files Browse the repository at this point in the history
… unit tests (#122)
  • Loading branch information
robons authored Aug 6, 2021
1 parent 06a69b2 commit 1e3fb73
Show file tree
Hide file tree
Showing 11 changed files with 617 additions and 66 deletions.
1 change: 1 addition & 0 deletions csvqb/csvqb/models/cube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .cube import Cube
from .catalog import CatalogMetadataBase
from .csvqb import *
from .validationerrors import *
1 change: 1 addition & 0 deletions csvqb/csvqb/models/cube/csvqb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
from .components import *
from .catalog import CatalogMetadata
from ..cube import Cube
from .validationerrors import *

QbCube = Cube[CatalogMetadata]
163 changes: 163 additions & 0 deletions csvqb/csvqb/models/cube/csvqb/validationerrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
"""
Qb-Cube Validation Errors
-------------------------
"""

from dataclasses import dataclass
from typing import Optional, Type, Union

from csvqb.models.cube.csvqb.components import (
QbObservationValue,
QbMultiUnits,
QbDataStructureDefinition,
)
from csvqb.models.validationerror import SpecificValidationError

ComponentTypeDescription = Union[str, Type[QbDataStructureDefinition]]


def _get_description_for_component(t: ComponentTypeDescription) -> str:
if isinstance(t, str):
return t

return t.__name__


@dataclass
class OutputUriTemplateMissingError(SpecificValidationError):
"""
Represents an error where the user has defined a component which cannot infer its own output_uri_template without
manually specifying an output_uri_template.
"""

csv_column_name: str
component_type: ComponentTypeDescription

def __post_init__(self):
self.message = (
f"'{self.csv_column_name}' - a {_get_description_for_component(self.component_type)} must have an "
+ "output_uri_template defined."
)


@dataclass
class MaxNumComponentsExceededError(SpecificValidationError):
"""
Represents an error where the user can define a maximum number of components of a given type, but has defined
too many.
"""

component_type: ComponentTypeDescription
maximum_number: int
actual_number: int
additional_explanation: Optional[str] = None

def __post_init__(self):
self.message = (
f"Found {self.actual_number} of {_get_description_for_component(self.component_type)}s. "
+ f"Expected a maximum of {self.maximum_number}."
)
if self.additional_explanation is not None:
self.message += " " + self.additional_explanation


@dataclass
class MinNumComponentsNotSatisfiedError(SpecificValidationError):
"""
Represents an error where the user must define a minimum number of components of a given type, but has not done so.
"""

component_type: ComponentTypeDescription
minimum_number: int
actual_number: int
additional_explanation: Optional[str] = None

def __post_init__(self):
self.message = (
f"At least {self.minimum_number} {_get_description_for_component(self.component_type)}s must be defined."
+ f" Found {self.actual_number}."
)
if self.additional_explanation is not None:
self.message += " " + self.additional_explanation


@dataclass
class WrongNumberComponentsError(SpecificValidationError):
"""
Represents an error where the user must include a specific number of components, but has not done so.
"""

component_type: ComponentTypeDescription
expected_number: int
actual_number: int
additional_explanation: Optional[str] = None

def __post_init__(self):
self.message = (
f"Found {self.actual_number} {_get_description_for_component(self.component_type)}s."
+ f" Expected exactly {self.expected_number}."
)
if self.additional_explanation is not None:
self.message += " " + self.additional_explanation


@dataclass
class NeitherDefinedError(SpecificValidationError):
"""
An error for when the user must define one of two different kinds of component, but has defined neither.
"""

component_one: ComponentTypeDescription
component_two: ComponentTypeDescription
additional_explanation: Optional[str] = None

def __post_init__(self):
self.message = (
f"Found neither {_get_description_for_component(self.component_one)} "
+ f"nor {_get_description_for_component(self.component_two)} defined. "
+ "One of these must be defined."
)
if self.additional_explanation is not None:
self.message += " " + self.additional_explanation


@dataclass
class UnitsNotDefinedError(NeitherDefinedError):
"""
An error for when the user has not defined any units for the dataset.
"""

component_one: ComponentTypeDescription = f"{QbObservationValue.__name__}.unit"
component_two: ComponentTypeDescription = QbMultiUnits
additional_explanation: Optional[str] = None


@dataclass
class IncompatibleComponentsError(SpecificValidationError):
"""
An error for when the user has defined components which are incompatible with each-other.
"""

component_one: ComponentTypeDescription
component_two: ComponentTypeDescription
additional_explanation: Optional[str] = None

def __post_init__(self):
self.message = (
f"Both {_get_description_for_component(self.component_one)} "
+ f"and {_get_description_for_component(self.component_two)} have been defined. "
+ f"These components cannot be used together."
)
if self.additional_explanation is not None:
self.message += " " + self.additional_explanation


@dataclass
class BothUnitTypesDefinedError(IncompatibleComponentsError):
"""
An error for when the user has both a units column as well as setting `QbObservationValue.unit`.
"""

component_one: ComponentTypeDescription = f"{QbObservationValue.__name__}.unit"
component_two: ComponentTypeDescription = QbMultiUnits
additional_explanation: Optional[str] = None
25 changes: 10 additions & 15 deletions csvqb/csvqb/models/cube/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@
import pandas as pd

from csvqb.models.validationerror import ValidationError
from .validationerrors import (
DuplicateColumnTitleError,
ColumnNotFoundInDataError,
MissingColumnDefinitionError,
)
from .columns import CsvColumn
from csvqb.models.cube.catalog import CatalogMetadataBase
from csvqb.inputs import pandas_input_to_columnar
from .catalog import CatalogMetadataBase
from ..pydanticmodel import PydanticModel

TMetadata = TypeVar("TMetadata", bound=CatalogMetadataBase, covariant=True)
Expand All @@ -35,9 +39,7 @@ def _validate_columns(self) -> List[ValidationError]:
existing_col_titles: Set[str] = set()
for col in self.columns:
if col.csv_column_title in existing_col_titles:
errors.append(
ValidationError(f"Duplicate column title '{col.csv_column_title}'")
)
errors.append(DuplicateColumnTitleError(col.csv_column_title))
else:
existing_col_titles.add(col.csv_column_title)

Expand All @@ -46,22 +48,15 @@ def _validate_columns(self) -> List[ValidationError]:
if col.csv_column_title in self.data.columns:
maybe_column_data = self.data[col.csv_column_title]
else:
errors.append(
ValidationError(
f"Column '{col.csv_column_title}' not found in data provided."
)
)
errors.append(ColumnNotFoundInDataError(col.csv_column_title))

errors += col.validate_data(maybe_column_data)

if self.data is not None:
defined_column_titles = [c.csv_column_title for c in self.columns]
for column in list(self.data.columns):
column = str(column)
if column not in defined_column_titles:
errors.append(
ValidationError(
f"Column '{column}' does not have a mapping defined."
)
)
errors.append(MissingColumnDefinitionError(column))

return errors
45 changes: 45 additions & 0 deletions csvqb/csvqb/models/cube/validationerrors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Cube Validation Errors
----------------------
"""
from dataclasses import dataclass

from csvqb.models.validationerror import SpecificValidationError


@dataclass
class DuplicateColumnTitleError(SpecificValidationError):
"""
An error to inform the user that they have defined two instances of the same column.
"""

csv_column_title: str

def __post_init__(self):
self.message = f"Duplicate column title '{self.csv_column_title}'"


@dataclass
class ColumnNotFoundInDataError(SpecificValidationError):
"""
An error to inform the user that they have defined a column which cannot be found in the provided data.
"""

csv_column_title: str

def __post_init__(self):
self.message = f"Column '{self.csv_column_title}' not found in data provided."


@dataclass
class MissingColumnDefinitionError(SpecificValidationError):
"""
An error to inform the user that there is a column in their data that does not have a mapping specified.
"""

csv_column_title: str

def __post_init__(self):
self.message = (
f"Column '{self.csv_column_title}' does not have a mapping defined."
)
3 changes: 3 additions & 0 deletions csvqb/csvqb/models/rdf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Contains Models for mapping data to RDF which are unique to the `csvqb` package.
"""
16 changes: 12 additions & 4 deletions csvqb/csvqb/models/validationerror.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@
ValidationError
---------------
"""
from dataclasses import dataclass, field
from abc import ABC


@dataclass
class ValidationError:
def __init__(self, message: str):
self.message: str = message
"""Class representing an error validating a model."""

def __repr__(self) -> str:
return f"{self.__class__.__name__}({self.message})"
message: str


@dataclass
class SpecificValidationError(ValidationError, ABC):
"""Abstract base class to represent ValidationErrors which are more specific and so can be interpreted by code."""

message: str = field(init=False)
Loading

0 comments on commit 1e3fb73

Please sign in to comment.