Skip to content

Commit

Permalink
#695 Implement CodeListInspector queries (#725)
Browse files Browse the repository at this point in the history
  • Loading branch information
nimshi89 authored Feb 16, 2023
1 parent 9bea1bd commit 2361d5d
Show file tree
Hide file tree
Showing 26 changed files with 765 additions and 519 deletions.
4 changes: 2 additions & 2 deletions src/csvcubed/cli/inspect/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from csvcubed.cli.inspect.metadataprinter import MetadataPrinter
from csvcubed.models.csvcubedexception import FailedToLoadRDFGraphException
from csvcubed.models.csvwtype import CSVWType
from csvcubed.utils.sparql_handler.code_list_state import CodeListState
from csvcubed.utils.sparql_handler.code_list_inspector import CodeListInspector
from csvcubed.utils.sparql_handler.csvw_state import CsvWState
from csvcubed.utils.sparql_handler.data_cube_state import DataCubeState
from csvcubed.utils.sparql_handler.sparql import path_to_file_uri_for_rdflib
Expand Down Expand Up @@ -88,7 +88,7 @@ def _generate_printables(
data_cube_state = DataCubeState(csvw_state)
metadata_printer = MetadataPrinter(data_cube_state)
else:
code_list_state = CodeListState(csvw_state)
code_list_state = CodeListInspector(csvw_state)
metadata_printer = MetadataPrinter(code_list_state)

type_info_printable: str = metadata_printer.type_info_printable
Expand Down
56 changes: 18 additions & 38 deletions src/csvcubed/cli/inspect/metadataprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,9 @@
)
from csvcubed.models.sparqlresults import (
CatalogMetadataResult,
CodeListColsByDatasetUrlResult,
CodelistColumnResult,
CodelistsResult,
ColsWithSuppressOutputTrueResult,
ColumnDefinition,
DSDLabelURIResult,
PrimaryKeyColNamesByDatasetUrlResult,
QubeComponentsResult,
Expand All @@ -45,17 +44,14 @@
get_codelist_col_title_by_property_url,
get_codelist_col_title_from_col_name,
)
from csvcubed.utils.sparql_handler.code_list_state import CodeListState
from csvcubed.utils.sparql_handler.code_list_inspector import CodeListInspector
from csvcubed.utils.sparql_handler.data_cube_state import DataCubeState
from csvcubed.utils.sparql_handler.sparql import path_to_file_uri_for_rdflib
from csvcubed.utils.sparql_handler.sparqlquerymanager import (
select_codelist_cols_by_csv_url,
select_codelist_csv_url,
select_cols_where_suppress_output_is_true,
select_csvw_dsd_dataset_label_and_dsd_def_uri,
select_dsd_code_list_and_cols,
select_primary_key_col_names_by_csv_url,
select_qb_csv_url,
)
from csvcubed.utils.uri import looks_like_uri

Expand All @@ -66,7 +62,7 @@ class MetadataPrinter:
This class produces the printables necessary for producing outputs to the CLI.
"""

state: Union[DataCubeState, CodeListState]
state: Union[DataCubeState, CodeListInspector]

csvw_type_str: str = field(init=False)
primary_csv_url: str = field(init=False)
Expand All @@ -83,7 +79,7 @@ class MetadataPrinter:
result_dataset_value_counts: DatasetObservationsByMeasureUnitInfoResult = field(
init=False
)
result_code_list_cols: CodeListColsByDatasetUrlResult = field(init=False)
result_code_list_cols: List[ColumnDefinition] = field(init=False)
result_concepts_hierachy_info: CodelistHierarchyInfoResult = field(init=False)

@staticmethod
Expand All @@ -95,26 +91,23 @@ def get_csvw_type_str(csvw_type: CSVWType) -> str:
else:
raise InputNotSupportedException()

@staticmethod
def get_primary_csv_url(
csvw_metadata_rdf_graph: rdflib.ConjunctiveGraph,
csvw_type: CSVWType,
catalogue_data_set_uri: str,
) -> str:
def get_primary_csv_url(self) -> str:
"""Return the csv_url for the primary table in the graph."""

if csvw_type == CSVWType.QbDataSet:
return select_qb_csv_url(
csvw_metadata_rdf_graph, catalogue_data_set_uri
primary_metadata = self.state.csvw_state.get_primary_catalog_metadata()
if isinstance(self.state, DataCubeState):
return self.state.get_cube_identifiers_for_data_set(
primary_metadata.dataset_uri
).csv_url
elif isinstance(self.state, CodeListInspector):
return self.state.get_table_identifiers_for_concept_scheme(
primary_metadata.dataset_uri
).csv_url
elif csvw_type == CSVWType.CodeList:
return select_codelist_csv_url(csvw_metadata_rdf_graph).csv_url
else:
raise InputNotSupportedException()

@staticmethod
def get_parent_label_unique_id_col_titles(
columns: List[CodelistColumnResult], primary_key_col: str
columns: List[ColumnDefinition], primary_key_col: str
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
parent_notation_col_title = get_codelist_col_title_by_property_url(
columns, CodelistPropertyUrl.SkosBroader
Expand All @@ -139,13 +132,7 @@ def generate_general_results(self):

self.csvw_type_str = self.get_csvw_type_str(csvw_type)
self.result_catalog_metadata = csvw_state.get_primary_catalog_metadata()
self.primary_csv_url = self.get_primary_csv_url(
csvw_state.rdf_graph,
csvw_type,
to_absolute_rdflib_file_path(
self.result_catalog_metadata.dataset_uri, csvw_state.csvw_json_path
),
)
self.primary_csv_url = self.get_primary_csv_url()
self.dataset = load_csv_to_dataframe(
csvw_state.csvw_json_path, Path(self.primary_csv_url)
)
Expand Down Expand Up @@ -204,8 +191,8 @@ def generate_codelist_results(self):
Member of :class:`./MetadataPrinter`.
"""
csvw_state = self.state.csvw_state
self.result_code_list_cols = select_codelist_cols_by_csv_url(
csvw_state.rdf_graph, self.primary_csv_url
self.result_code_list_cols = csvw_state.get_column_definitions_for_csv(
self.primary_csv_url
)
# Retrieving the primary key column names of the code list to identify the unique identifier
result_primary_key_col_names_by_csv_url: PrimaryKeyColNamesByDatasetUrlResult = select_primary_key_col_names_by_csv_url(
Expand All @@ -226,7 +213,7 @@ def generate_codelist_results(self):
label_col_title,
unique_identifier,
) = self.get_parent_label_unique_id_col_titles(
self.result_code_list_cols.columns, primary_key_col_names[0].value
self.result_code_list_cols, primary_key_col_names[0].value
)
self.result_concepts_hierachy_info = get_concepts_hierarchy_info(
self.dataset, parent_col_title, label_col_title, unique_identifier
Expand Down Expand Up @@ -318,10 +305,3 @@ def codelist_hierachy_info_printable(self) -> str:
:return: `str` - user-friendly string which will be output to CLI.
"""
return f"- The {self.csvw_type_str} has the following concepts information:{self.result_concepts_hierachy_info.output_str}"


def to_absolute_rdflib_file_path(path: str, parent_document_path: Path) -> str:
if looks_like_uri(path):
return path
else:
return urljoin(path_to_file_uri_for_rdflib(parent_document_path), path)
67 changes: 5 additions & 62 deletions src/csvcubed/models/sparqlresults.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,13 @@ def output_str(self) -> str:


@dataclass
class CsvUrlResult:
class CodeListTableIdentifers:
"""
Model to represent select csv url result.
Table identifiers to support mapping between csv_url and concept_scheme_url
"""

csv_url: str
concept_scheme_url: str


@dataclass
Expand Down Expand Up @@ -267,7 +268,7 @@ class ColumnDefinition:
"""CSV that this column is defined against."""
about_url: Optional[str]
data_type: Optional[str]
name: str
name: Optional[str]
property_url: Optional[str]
required: bool
suppress_output: bool
Expand Down Expand Up @@ -605,22 +606,6 @@ def map_csvw_table_schemas_file_dependencies_result(
return result


def map_csv_url_result(
sparql_result: ResultRow,
) -> CsvUrlResult:
"""
Maps sparql query result to `CsvUrlResult`
Member of :file:`./models/sparqlresults.py`
:return: `CsvUrlResult`
"""
result_dict = sparql_result.asdict()

result = CsvUrlResult(csv_url=str(result_dict["tableUrl"]))
return result


def map_units(sparql_results: List[ResultRow]) -> List[UnitResult]:
"""
Maps sparql query result to `UnitResult`
Expand All @@ -638,48 +623,6 @@ def map_row(row_result: Dict[str, Any]) -> UnitResult:
return [map_row(row.asdict()) for row in sparql_results]


def _map_codelist_column_sparql_result(
sparql_result: ResultRow,
) -> CodelistColumnResult:
"""
Maps sparql query result to `CodelistColumnResult`
Member of :file:`./models/sparqlresults.py`
:return: `CodelistColumnResult`
"""
result_dict = sparql_result.asdict()

result = CodelistColumnResult(
column_property_url=none_or_map(result_dict.get("columnPropertyUrl"), str),
column_value_url=none_or_map(result_dict.get("columnValueUrl"), str),
column_title=none_or_map(result_dict.get("columnTitle"), str),
column_name=none_or_map(result_dict.get("columnName"), str),
)
return result


def map_codelist_cols_by_csv_url_result(
sparql_results: List[ResultRow],
) -> CodeListColsByDatasetUrlResult:
"""
Maps sparql query result to `CodeListColsByDatasetUrlResult`
Member of :file:`./models/sparqlresults.py`
:return: `CodeListColsByDatasetUrlResult`
"""

columns = list(
map(
lambda result: _map_codelist_column_sparql_result(result),
sparql_results,
)
)
result = CodeListColsByDatasetUrlResult(columns=columns)
return result


def _map_primary_key_col_name_by_csv_url_result(
sparql_result: ResultRow,
) -> PrimaryKeyColNameByDatasetUrlResult:
Expand Down Expand Up @@ -782,7 +725,7 @@ def map_row(row_result: Dict[str, Any]) -> ColumnDefinition:
csv_url=str(row_result["csvUrl"]),
about_url=none_or_map(row_result.get("aboutUrl"), str),
data_type=none_or_map(row_result.get("dataType"), str),
name=str(row_result["name"]),
name=none_or_map(row_result.get("name"), str),
property_url=none_or_map(row_result.get("propertyUrl"), str),
required=bool(row_result["required"]),
suppress_output=bool(row_result["suppressOutput"]),
Expand Down
4 changes: 3 additions & 1 deletion src/csvcubed/utils/csvdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,9 @@ def _melt_pivoted_shape(
if csv_url is None:
raise ValueError("csv_url cannot be None.")

column_definitions = data_cube_state.get_column_definitions_for_csv(csv_url)
column_definitions = data_cube_state.csvw_state.get_column_definitions_for_csv(
csv_url
)

measure_components = filter_components_from_dsd(
qube_components,
Expand Down
2 changes: 1 addition & 1 deletion src/csvcubed/utils/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_from_dict_ensure_exists(config: Dict[str, T], key: str) -> T:
"""
val = config.get(key)
if val is None:
raise Exception(f"Couldn't find value for key '{key}'")
raise KeyError(f"Couldn't find value for key '{key}'")
return val


Expand Down
18 changes: 9 additions & 9 deletions src/csvcubed/utils/skos/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
InvalidNumberOfRecordsException,
PrimaryKeyColumnTitleCannotBeNoneException,
)
from csvcubed.models.sparqlresults import CodelistColumnResult
from csvcubed.models.sparqlresults import CodelistColumnResult, ColumnDefinition


class CodelistPropertyUrl(Enum):
Expand All @@ -41,9 +41,9 @@ class CodelistPropertyUrl(Enum):


def get_codelist_col_title_by_property_url(
columns: List[CodelistColumnResult], property_url: CodelistPropertyUrl
columns: List[ColumnDefinition], property_url: CodelistPropertyUrl
) -> Optional[str]:
"""
"""n
Returns dataset column title for the given property url.
Member of :class:`./codelist`.
Expand All @@ -52,7 +52,7 @@ def get_codelist_col_title_by_property_url(
"""

results = [
column for column in columns if column.column_property_url == property_url.value
column for column in columns if column.property_url == property_url.value
]

if len(results) != 1:
Expand All @@ -62,11 +62,11 @@ def get_codelist_col_title_by_property_url(
num_of_records=len(results),
)

return results[0].column_title
return results[0].title


def get_codelist_col_title_from_col_name(
columns: List[CodelistColumnResult], col_name: str
columns: List[ColumnDefinition], col_name: str
) -> str:
"""
Returns the column title for the column name.
Expand All @@ -76,7 +76,7 @@ def get_codelist_col_title_from_col_name(
:return: `str` - dataset column title.
"""

results = [column for column in columns if column.column_name == col_name]
results = [column for column in columns if column.name == col_name]

if len(results) != 1:
raise InvalidNumberOfRecordsException(
Expand All @@ -85,10 +85,10 @@ def get_codelist_col_title_from_col_name(
num_of_records=len(results),
)

if results[0].column_title is None:
if results[0].title is None:
raise PrimaryKeyColumnTitleCannotBeNoneException()

return results[0].column_title
return results[0].title


def build_concepts_hierarchy_tree(
Expand Down
Loading

0 comments on commit 2361d5d

Please sign in to comment.