Skip to content

Commit

Permalink
refactor: preparation for new <video-segment> (#1125)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum authored Aug 20, 2024
1 parent 54ba206 commit 57dd646
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 26 deletions.
44 changes: 27 additions & 17 deletions src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import dataclasses
import datetime
import difflib
Expand Down Expand Up @@ -1402,29 +1403,38 @@ def make_text_prop(
# write the text into the tag, without validation
value_.text = str(val.value)
else:
escaped_text = _escape_reserved_chars(str(val.value))
# transform named entities (=character references) to numeric entities, e.g. &nbsp; -> &#160;
num_ent = numeric_entities(escaped_text)
pseudo_xml = f"<ignore-this>{num_ent}</ignore-this>"
try:
parsed = etree.fromstring(pseudo_xml)
value_.text = parsed.text # everything before the first child tag
value_.extend(list(parsed)) # all (nested) children of the pseudo-xml
except etree.XMLSyntaxError as err:
msg = (
"The XML tags contained in a richtext property (encoding=xml) must be well-formed. "
"The special characters <, > and & are only allowed to construct a tag. "
)
value_ = _add_richtext_to_etree_element(str(val.value), value_)
except BaseError as err:
if calling_resource:
msg += f"The error occurred in resource {calling_resource}, property {name}"
msg += f"\nOriginal error message: {err.msg}"
msg += f"\nEventual line/column numbers are relative to this text: {pseudo_xml}"
raise BaseError(msg) from None
err.message += f"The error occurred in resource {calling_resource}, property {name}"
raise err from None
prop_.append(value_)

return prop_


def _add_richtext_to_etree_element(richtext: str, element: etree._Element) -> etree._Element:
new_element = copy.deepcopy(element)
escaped_text = _escape_reserved_chars(richtext)
# transform named entities (=character references) to numeric entities, e.g. &nbsp; -> &#160;
num_ent = numeric_entities(escaped_text)
pseudo_xml = f"<ignore-this>{num_ent}</ignore-this>"
try:
parsed = etree.fromstring(pseudo_xml)
except etree.XMLSyntaxError as err:
msg = (
"The XML tags contained in a richtext property (encoding=xml) must be well-formed. "
"The special characters <, > and & are only allowed to construct a tag. "
)
msg += f"\nOriginal error message: {err.msg}"
msg += f"\nEventual line/column numbers are relative to this text: {pseudo_xml}"
raise BaseError(msg) from None
new_element.text = parsed.text # everything before the first child tag
new_element.extend(list(parsed)) # all (nested) children of the pseudo-xml
return new_element


def _escape_reserved_chars(text: str) -> str:
"""
From richtext strings (encoding="xml"), escape the reserved characters <, > and &,
Expand Down Expand Up @@ -1834,7 +1844,7 @@ def make_video_segment( # noqa: D417 (undocumented-param)
Creates an empty `<video-segment>` element, with the attributes as specified by the arguments.
Args:
The arguments correspond 1:1 to the attributes of the <video-segment> element.
The arguments correspond 1:1 to the attributes of the `<video-segment>` element.
Returns:
The video-segment element, without any children, but with the attributes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,13 @@ def __init__(self, node: etree._Element, valtype: str, default_ontology: str) ->
XmlUploadError: If an upload fails
"""
# get the property name which is in format namespace:propertyname, p.ex. rosetta:hasName
tmp_prop_name = node.attrib["name"].split(":")
if len(tmp_prop_name) > 1:
if tmp_prop_name[0]:
self.name = node.attrib["name"]
else:
# replace an empty namespace with the default ontology name
self.name = f"{default_ontology}:{tmp_prop_name[1]}"
if ":" not in node.attrib["name"]:
self.name = f"knora-api:{node.attrib['name']}"
else:
self.name = f"knora-api:{tmp_prop_name[0]}"
listname = node.attrib.get("list") # safe the list name if given (only for lists)
prefix, name = node.attrib["name"].split(":")
# replace an empty namespace with the default ontology name
self.name = node.attrib["name"] if prefix else f"{default_ontology}:{name}"
listname = node.attrib.get("list") # save the list name if given (only for lists)
self.valtype = valtype
self.values = []

Expand Down

0 comments on commit 57dd646

Please sign in to comment.