Skip to content

Commit

Permalink
Last changes for optimisation and bugfixes + rewrite unitary tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
rigoudyg committed Nov 26, 2019
1 parent bd48ab5 commit bc67c57
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 596 deletions.
433 changes: 268 additions & 165 deletions tests_xml_writer.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion xml_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def create_xml_sub_element(xml_element, tag, attrib=OrderedDict(), text=None):


def create_xml_element_from_string(string):
return xml_writer.xml_parser(string)
return xml_writer.parse_xml_string_rewrite(string)


def create_string_from_xml_element(xml_element):
Expand Down
2 changes: 1 addition & 1 deletion xml_writer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from xml_writer.element import Element
from xml_writer.header import Header
from xml_writer.comment import Comment
from xml_writer.parser import xml_parser, xml_file_parser
from xml_writer.parser import xml_file_parser, parse_xml_string_rewrite
from xml_writer.utils import encode_if_needed, decode_if_needed

if __name__ == "__main__":
Expand Down
236 changes: 0 additions & 236 deletions xml_writer/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,38 +137,6 @@ def _dump_attrib(self, sort=False):
return self.dump_dict(deepcopy(self.attrib), sort=sort)


def _build_element(xml_string, verbose=False):
# Delete unused spaces
xml_string = xml_string.strip()
print_if_needed("<<<build_element: XML_STRING before>>>", len(xml_string), xml_string, verbose=verbose)
if len(xml_string) == 0:
raise Exception("The XML string should not be void.")
# Check if the element is a XML comment
xml_string, comment = _find_xml_comment(xml_string, verbose=verbose)
if comment is not None:
xml_string = xml_string.strip()
print_if_needed("<<<build_element: XML_STRING after comment>>>", len(xml_string), xml_string, verbose=verbose)
return xml_string, comment
else:
# Check if the element is a XML element made of one single part
xml_string, element = _find_one_part_element(xml_string, verbose=verbose)
if element is not None:
xml_string = xml_string.strip()
print_if_needed("<<<build_element: XML_STRING after element>>>", len(xml_string), xml_string,
verbose=verbose)
return xml_string, element
else:
# Check if the element is a XML element made of two parts
xml_string, element = _find_two_parts_element(xml_string, verbose=verbose)
if element is not None:
xml_string = xml_string.strip()
print_if_needed("<<<build_element: XML_STRING after element>>>", len(xml_string), xml_string,
verbose=verbose)
return xml_string, element
else:
raise Exception("Could not find what the element could be...")


# XML single part regexp
_xml_single_part_element_regexp = re.compile(r'^\s?(?P<all><\s?(?P<tag>\w+)\s?{}\s?/>)\s?'.format(
_generic_dict_regexp))
Expand All @@ -195,12 +163,7 @@ def _find_one_part_element(xml_string, verbose=False):
_generic_dict_regexp)
_xml_string_init_element_replace = r'^'+_xml_string_first_element_replace
_xml_init_two_parts_element_regexp = re.compile(_xml_string_init_element_replace.format(r"\w+\s?"))
_xml_string_content_element = r'(?P<content>{})'
_xml_string_end_element_replace = r'(?P<all_end>\s?(?P<end></\s?{}\s?>)\s?)'
_xml_string_two_parts_element_replace = _xml_string_init_element_replace + _xml_string_content_element + \
_xml_string_end_element_replace
_xml_string_pseudo_two_parts_element_replace = r"(?P<all>" + _xml_string_init_element_replace + r"\s?" + \
_xml_string_end_element_replace + r")"


def _find_two_parts_element_init(xml_string, verbose=False):
Expand Down Expand Up @@ -231,204 +194,5 @@ def _find_two_parts_element_end(xml_string, tag, verbose=False):
return xml_string, True


# def _find_matching_first_part_in_content(content, tag, verbose=False):
# finditer_matches_first_part_in_content = \
# re.compile(_xml_string_first_element_replace.format(tag)).finditer(content)
# find_positions_first_part_in_content = list()
# last_position = 0
# for match in finditer_matches_first_part_in_content:
# last_position = match.start()
# if content[last_position] == " ":
# last_position += 1
# find_positions_first_part_in_content.append(last_position)
# if verbose:
# print("<<<find_matching_first_part_in_content: rank match first part>>>",
# len(find_positions_first_part_in_content), find_positions_first_part_in_content)
# return find_positions_first_part_in_content
#
#
# def _find_matching_last_part_in_content(content, tag, verbose=False):
# finditer_matches_last_part_in_content = \
# re.compile(_xml_string_end_element_replace.format(tag)).finditer(content)
# find_positions_last_part_in_content = list()
# find_groups_last_part_in_content = list()
# last_position = 0
# for match in finditer_matches_last_part_in_content:
# # last_position = content.find(match.groupdict()["end"], last_position + 1)
# last_position = match.start()
# if content[last_position] == " ":
# last_position += 1
# find_positions_last_part_in_content.append(last_position)
# find_groups_last_part_in_content.append(match.groupdict()["end"])
# if verbose:
# print("<<<find_matching_last_part_in_content: rank match last part>>>",
# len(find_positions_last_part_in_content), find_positions_last_part_in_content)
# return find_positions_last_part_in_content, find_groups_last_part_in_content


def _find_matching_first_and_last_part_in_content(content, tag, verbose=False):
match_found = list()
for match in re.compile(_xml_string_first_element_replace.format(tag)).finditer(content):
last_position = match.start()
if match.group().startswith(" "):
last_position += 1
match_found.append((last_position, match.groupdict()["begin"], "match_first"))
for match in re.compile(_xml_string_end_element_replace.format(tag)).finditer(content):
last_position = match.start()
if match.group().startswith(" "):
last_position += 1
match_found.append((last_position, match.groupdict()["end"], "match_end"))
match_found = sorted(match_found, key=lambda match: match[0])
for match in match_found:
yield match


# def _find_real_content(content, match_first_part, match_last_part, groups_last_part, verbose=False):
# find_end = False
# if len(match_first_part) != 0 and len(match_last_part) != 0:
# # Case of nested beacons with same tag
# position_start = 0
# nb_positions_start = len(match_first_part)
# position_end = 0
# nb_positions_end = len(match_last_part)
# nb_nested = 0
# while position_start < nb_positions_start and position_end < nb_positions_end and not find_end:
# if verbose:
# print("<<<find_element: POSITIONS START/END NESTED before>>>", position_start, "/",
# nb_positions_start, position_end, "/", nb_positions_end, nb_nested)
# if match_last_part[position_end] < match_first_part[position_start]:
# if nb_nested > 0:
# position_end += 1
# nb_nested -= 1
# else:
# content = content[0:match_last_part[position_end]]
# find_end = True
# else:
# nb_nested += 1
# position_start += 1
# if verbose:
# print("<<<find_real_content: POSITIONS START/END NESTED after>>>", position_start, "/",
# nb_positions_start, position_end, "/", nb_positions_end, nb_nested)
#
# if not find_end and position_start == nb_positions_start and position_end == (nb_positions_end - nb_nested):
# # Case of nested and finished beacons with same tag
# find_end = True
# position_end += nb_nested
# nb_nested = 0
# if not find_end:
# raise Exception("There is a problem with the xml file... All opened beacon must be closed.")
# elif len(match_last_part) != 0 or len(match_first_part) != 0:
# raise Exception("There is a problem with the xml file... All opened beacon must be closed.")
# if find_end:
# if position_end > (nb_positions_end - 1):
# return content, None
# else:
# return content, groups_last_part[position_end]
# else:
# return content, None


def _find_real_content(content, tag, verbose=False):
nb_nested = 0
find_end = False
real_content = ""
for (match_pos, match_key, match_type) in _find_matching_first_and_last_part_in_content(content, tag,
verbose=verbose):
if match_type == "match_end":
if nb_nested > 0:
nb_nested -= 1
else:
find_end = True
real_content = content[0:match_pos]
break
elif match_type == "match_first":
nb_nested += 1
else:
raise ValueError("Unknown match type %s" % match_type)
if nb_nested > 0:
raise Exception("There is a problem with the xml file... All opened beacon must be closed.")
if find_end:
if len(real_content) + len(match_key) >= len(content):
return content, None
else:
return real_content, match_key
else:
return content, None


def _find_two_parts_element(xml_string, verbose=False):
xml_string = xml_string.strip()
# Match the first part of the two parts xml element
match_first_part = _xml_init_two_parts_element_regexp.match(xml_string)
if match_first_part:
# Get as many information as possible
tag = match_first_part.groupdict()["tag"].strip()
# Check if it is a pseudo two parts element
match_pseudo_two_strings = re.compile(_xml_string_pseudo_two_parts_element_replace.format(tag, tag)).match(xml_string)
if match_pseudo_two_strings:
match_group_dict = match_pseudo_two_strings.groupdict()
attrib = _build_dict_attrib(match_group_dict["attrib"])
element = Element(tag=tag, attrib=attrib)
string_to_remove = match_group_dict["all"]
xml_string = xml_string.replace(string_to_remove, "", 1)
return xml_string, element
# It is a real two parts element
else:
two_strings_regexp = _xml_string_two_parts_element_replace.format(tag, r".*", tag)
match_two_strings = re.compile(two_strings_regexp).match(xml_string)
if not match_two_strings:
raise Exception("Error - element should be a two parts element but seems not...")
else:
match_group_dict = match_two_strings.groupdict()
attrib = match_group_dict["attrib"]
attrib = _build_dict_attrib(attrib)
all_begin = match_group_dict["all_begin"]
all_end = match_group_dict["all_end"]
content = match_group_dict["content"]
# Find out if the content contains a subpart with the same tag
print_if_needed("<<<find_element: CONTENT before>>>", len(content), content, verbose=verbose)
# Find out where the content really stop
content, new_end = _find_real_content(content, tag, verbose=verbose)
if new_end is not None:
end = new_end
else:
end = all_end
print_if_needed("<<<find_element: CONTENT after>>>", len(content), content, verbose=verbose)
# Create string to remove
string_to_remove = all_begin + content + end
# Separate children from text
sub_xml_string, text = _find_text(content, verbose=verbose)
print_if_needed("<<<SUB_XML_STRING>>> Text found", text, verbose=verbose)
print_if_needed("<<<SUB_XML_STRING>>>", len(sub_xml_string), sub_xml_string, verbose=verbose)
if len(text) == 0:
text = None
# Create the element and its children
element = Element(tag=tag, text=text, attrib=attrib)
while len(sub_xml_string) > 0:
print_if_needed("<<<SUB_XML_STRING>>> Enter the loop...", len(sub_xml_string), sub_xml_string,
verbose=verbose)
new_sub_xml_string, text = _find_text(sub_xml_string, verbose=verbose)
if len(text) > 0:
print_if_needed("<<<SUB_XML_STRING>>> Text found:", text, verbose=verbose)
element.set_text(text)
new_sub_xml_string = new_sub_xml_string.strip()
new_sub_xml_string, subelement = _build_element(new_sub_xml_string, verbose=verbose)
if len(sub_xml_string) == len(new_sub_xml_string):
raise Exception("Stop: Infinite loop!!!!!!")
else:
sub_xml_string = new_sub_xml_string
sub_xml_string = sub_xml_string.strip()
if subelement is not None:
print_if_needed("<<<find sub_element>>>", subelement, verbose=verbose)
element.append(subelement)
xml_string = xml_string.replace(string_to_remove, "", 1)
print_if_needed("<<<XML_STRING end of treatment>>>", len(xml_string), xml_string, verbose=verbose)
print_if_needed("<<<XML_STRING string replaced>>>", len(string_to_remove), string_to_remove,
verbose=verbose)
return xml_string, element
else:
return xml_string, None


def is_xml_element(element):
return isinstance(element, (Beacon, Element, Comment, Header))
Loading

0 comments on commit bc67c57

Please sign in to comment.