Skip to content

Commit

Permalink
fix: poor performance with large and complex forms with many references
Browse files Browse the repository at this point in the history
- instance.py
  - gather the survey xpaths since _xpath now tracks the objects
- survey.py
  - when flattening the objects in _setup_xpath_dict, keep the object
    references instead of just the xpath, for future use
  - skip calling the heavily recursive share_same_repeat_parent if the
    target and context items have no common ancestor that is a repeat
  - incorporate "any_repeat" func body into is_parent_a_repeat since
    it is not re-used elsewhere and to avoid an extra lru_cache.
- survey_element.py
  - add method to iterate ancestors, to find common items for relative
    references, rather than iterating down from the survey/root
  - add method to find nearest common ancestor repeat (if any).
    Currently only used to discern objects with no such ancestor but
    could be developed further to replace "share_same_repeat_parent".
  • Loading branch information
lindsay-stevens committed Nov 14, 2024
1 parent 42bb2d6 commit 3a9d0bb
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 40 deletions.
2 changes: 1 addition & 1 deletion pyxform/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, survey_object, **kwargs):
# get xpaths
# - prep for xpaths.
self._survey.xml()
self._xpaths = self._survey._xpath.values()
self._xpaths = [x.get_xpath() for x in self._survey._xpath.values()]

# see "answers(self):" below for explanation of this dict
self._answers = {}
Expand Down
33 changes: 19 additions & 14 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ def is_parent_a_repeat(survey, xpath):
if not parent_xpath:
return False

if survey.any_repeat(parent_xpath):
return parent_xpath
for item in survey.iter_descendants():
if item.type == constants.REPEAT and item.get_xpath() == parent_xpath:
return parent_xpath

return is_parent_a_repeat(survey, parent_xpath)

Expand All @@ -98,7 +99,7 @@ def share_same_repeat_parent(survey, xpath, context_xpath, reference_parent=Fals
parent.
For example,
xpath = /data/repeat_a/group_a/name
xpath = /data/repeat_a/group_a/name
context_xpath = /data/repeat_a/group_b/age
returns (2, '/group_a/name')'
Expand Down Expand Up @@ -995,13 +996,13 @@ def __unicode__(self):
return f"<pyxform.survey.Survey instance at {hex(id(self))}>"

def _setup_xpath_dictionary(self):
self._xpath = {} # pylint: disable=attribute-defined-outside-init
self._xpath: dict[str, SurveyElement | None] = {} # pylint: disable=attribute-defined-outside-init
for element in self.iter_descendants():
if isinstance(element, Question | Section):
if element.name in self._xpath:
self._xpath[element.name] = None
else:
self._xpath[element.name] = element.get_xpath()
self._xpath[element.name] = element

def _var_repl_function(
self, matchobj, context, use_current=False, reference_parent=False
Expand Down Expand Up @@ -1036,7 +1037,7 @@ def _in_secondary_instance_predicate() -> bool:
def _relative_path(ref_name: str, _use_current: bool) -> str | None:
"""Given name in ${name}, return relative xpath to ${name}."""
return_path = None
xpath = self._xpath[ref_name]
xpath = self._xpath[ref_name].get_xpath()
context_xpath = context.get_xpath()
# share same root i.e repeat_a from /data/repeat_a/...
if (
Expand All @@ -1045,13 +1046,17 @@ def _relative_path(ref_name: str, _use_current: bool) -> str | None:
):
# if context xpath and target xpath fall under the same
# repeat use relative xpath referencing.
steps, ref_path = share_same_repeat_parent(
self, xpath, context_xpath, reference_parent
)
if steps:
ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}"
prefix = " current()/" if _use_current else " "
return_path = prefix + "/".join([".."] * steps) + ref_path + " "
relation = context.has_common_repeat_parent(self._xpath[ref_name])
if relation[0] == "Unrelated":
return return_path
else:
steps, ref_path = share_same_repeat_parent(
self, xpath, context_xpath, reference_parent
)
if steps:
ref_path = ref_path if ref_path.endswith(ref_name) else f"/{name}"
prefix = " current()/" if _use_current else " "
return_path = prefix + "/".join([".."] * steps) + ref_path + " "

return return_path

Expand Down Expand Up @@ -1122,7 +1127,7 @@ def _is_return_relative_path() -> bool:
last_saved_prefix = (
"instance('" + LAST_SAVED_INSTANCE_NAME + "')" if last_saved else ""
)
return " " + last_saved_prefix + self._xpath[name] + " "
return " " + last_saved_prefix + self._xpath[name].get_xpath() + " "

def insert_xpaths(self, text, context, use_current=False, reference_parent=False):
"""
Expand Down
89 changes: 64 additions & 25 deletions pyxform/survey_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import json
import re
from collections import deque
from functools import lru_cache
from typing import TYPE_CHECKING, Any, ClassVar
from collections.abc import Generator
from typing import TYPE_CHECKING, Any, ClassVar, Optional

from pyxform import aliases as alias
from pyxform import constants as const
Expand Down Expand Up @@ -65,19 +65,8 @@ def _overlay(over, under):
return over if over else under


@lru_cache(maxsize=65536)
def any_repeat(survey_element: "SurveyElement", parent_xpath: str) -> bool:
"""Return True if there ia any repeat in `parent_xpath`."""
for item in survey_element.iter_descendants():
if item.get_xpath() == parent_xpath and item.type == const.REPEAT:
return True

return False


SURVEY_ELEMENT_LOCAL_KEYS = {
SURVEY_ELEMENT_LOCAL_KEYS = {
"_survey_element_default_type",
"_survey_element_repeats",
"_survey_element_xpath",
}

Expand Down Expand Up @@ -115,8 +104,7 @@ def __hash__(self):

def __setattr__(self, key, value):
if key == "parent":
# Object graph local changes invalidate these cached facts.
self._survey_element_repeats = {}
# If object graph position changes then invalidate cached.
self._survey_element_xpath = None
self[key] = value

Expand All @@ -125,7 +113,6 @@ def __init__(self, **kwargs):
"question_type_dict", QUESTION_TYPE_DICT
).get(kwargs.get("type"), {})
self._survey_element_xpath: str | None = None
self._survey_element_repeats: dict = {}
for key, default in self.FIELDS.items():
self[key] = kwargs.get(key, default())
self._link_children()
Expand Down Expand Up @@ -176,14 +163,65 @@ def iter_descendants(self):
for e in self.children:
yield from e.iter_descendants()

def any_repeat(self, parent_xpath: str) -> bool:
"""Return True if there ia any repeat in `parent_xpath`."""
current_value = self._dict_get("_survey_element_repeats")
if parent_xpath not in current_value:
new_value = any_repeat(survey_element=self, parent_xpath=parent_xpath)
current_value[parent_xpath] = new_value
return new_value
return current_value[parent_xpath]
def iter_ancestors(self) -> Generator[tuple["SurveyElement", int], None, None]:
"""Get each self.parent with their distance from self (starting at 1)."""
distance = 1
current = self.parent
while current is not None:
yield current, distance
current = current.parent
distance += 1

def has_common_repeat_parent(
self, other: "SurveyElement"
) -> tuple[str, int | None, Optional["SurveyElement"]]:
"""
Get the relation type, steps (generations), and the common ancestor.
"""
# Quick check for immediate relation.
if self.parent is other:
return "Parent (other)", 1, other
elif other.parent is self:
return "Parent (self)", 1, self

# Traversal tracking
self_ancestors = {}
other_ancestors = {}
self_current = self
other_current = other
self_distance = 0
other_distance = 0

# Traverse up both ancestor chains as far as necessary.
while self_current or other_current:
# Step up the self chain
if self_current:
self_distance += 1
self_current = self_current.parent
if self_current:
self_ancestors[self_current] = self_distance
if (
self_current.type == const.REPEAT
and self_current in other_ancestors
):
max_steps = max(self_distance, other_ancestors[self_current])
return "Common Ancestor Repeat", max_steps, self_current

# Step up the other chain
if other_current:
other_distance += 1
other_current = other_current.parent
if other_current:
other_ancestors[other_current] = other_distance
if (
other_current.type == const.REPEAT
and other_current in self_ancestors
):
max_steps = max(other_distance, self_ancestors[other_current])
return "Common Ancestor Repeat", max_steps, other_current

# No common ancestor found.
return "Unrelated", None, None

def get_lineage(self):
"""
Expand Down Expand Up @@ -244,6 +282,7 @@ def to_json_dict(self):
"parent",
"question_type_dictionary",
"_created",
"_xpath",
*SURVEY_ELEMENT_LOCAL_KEYS,
]
# Delete all keys that may cause a "Circular Reference"
Expand Down

0 comments on commit 3a9d0bb

Please sign in to comment.