diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py index 9b9d0deff..829026ab0 100644 --- a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py +++ b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py @@ -1,3 +1,4 @@ +import copy import dataclasses import datetime import difflib @@ -1402,29 +1403,38 @@ def make_text_prop( # write the text into the tag, without validation value_.text = str(val.value) else: - escaped_text = _escape_reserved_chars(str(val.value)) - # transform named entities (=character references) to numeric entities, e.g.   ->   - num_ent = numeric_entities(escaped_text) - pseudo_xml = f"{num_ent}" try: - parsed = etree.fromstring(pseudo_xml) - value_.text = parsed.text # everything before the first child tag - value_.extend(list(parsed)) # all (nested) children of the pseudo-xml - except etree.XMLSyntaxError as err: - msg = ( - "The XML tags contained in a richtext property (encoding=xml) must be well-formed. " - "The special characters <, > and & are only allowed to construct a tag. " - ) + value_ = _add_richtext_to_etree_element(str(val.value), value_) + except BaseError as err: if calling_resource: - msg += f"The error occurred in resource {calling_resource}, property {name}" - msg += f"\nOriginal error message: {err.msg}" - msg += f"\nEventual line/column numbers are relative to this text: {pseudo_xml}" - raise BaseError(msg) from None + err.message += f"The error occurred in resource {calling_resource}, property {name}" + raise err from None prop_.append(value_) return prop_ +def _add_richtext_to_etree_element(richtext: str, element: etree._Element) -> etree._Element: + new_element = copy.deepcopy(element) + escaped_text = _escape_reserved_chars(richtext) + # transform named entities (=character references) to numeric entities, e.g.   ->   + num_ent = numeric_entities(escaped_text) + pseudo_xml = f"{num_ent}" + try: + parsed = etree.fromstring(pseudo_xml) + except etree.XMLSyntaxError as err: + msg = ( + "The XML tags contained in a richtext property (encoding=xml) must be well-formed. " + "The special characters <, > and & are only allowed to construct a tag. " + ) + msg += f"\nOriginal error message: {err.msg}" + msg += f"\nEventual line/column numbers are relative to this text: {pseudo_xml}" + raise BaseError(msg) from None + new_element.text = parsed.text # everything before the first child tag + new_element.extend(list(parsed)) # all (nested) children of the pseudo-xml + return new_element + + def _escape_reserved_chars(text: str) -> str: """ From richtext strings (encoding="xml"), escape the reserved characters <, > and &, @@ -1834,7 +1844,7 @@ def make_video_segment( # noqa: D417 (undocumented-param) Creates an empty `` element, with the attributes as specified by the arguments. Args: - The arguments correspond 1:1 to the attributes of the element. + The arguments correspond 1:1 to the attributes of the `` element. Returns: The video-segment element, without any children, but with the attributes diff --git a/src/dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py b/src/dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py index 1c7ae00c7..617448541 100644 --- a/src/dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +++ b/src/dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py @@ -38,16 +38,13 @@ def __init__(self, node: etree._Element, valtype: str, default_ontology: str) -> XmlUploadError: If an upload fails """ # get the property name which is in format namespace:propertyname, p.ex. rosetta:hasName - tmp_prop_name = node.attrib["name"].split(":") - if len(tmp_prop_name) > 1: - if tmp_prop_name[0]: - self.name = node.attrib["name"] - else: - # replace an empty namespace with the default ontology name - self.name = f"{default_ontology}:{tmp_prop_name[1]}" + if ":" not in node.attrib["name"]: + self.name = f"knora-api:{node.attrib['name']}" else: - self.name = f"knora-api:{tmp_prop_name[0]}" - listname = node.attrib.get("list") # safe the list name if given (only for lists) + prefix, name = node.attrib["name"].split(":") + # replace an empty namespace with the default ontology name + self.name = node.attrib["name"] if prefix else f"{default_ontology}:{name}" + listname = node.attrib.get("list") # save the list name if given (only for lists) self.valtype = valtype self.values = []