Skip to content

Commit

Permalink
Improved bracket matching. Fixes also #9
Browse files Browse the repository at this point in the history
  • Loading branch information
tdegeus committed Oct 11, 2022
1 parent ad9fa8a commit 9f08c05
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 23 deletions.
86 changes: 73 additions & 13 deletions bibtexparser/customization.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,61 @@ def splitname(name, strict_mode=True):
return parts


def find_matching(
text: str,
opening: str,
closing: str,
ignore_escaped: bool = True,
) -> dict:
r"""
Find matching 'brackets'.
:param text: The string to consider.
:param opening: The opening bracket (e.g. "(", "[", "{").
:param closing: The closing bracket (e.g. ")", "]", "}").
:param ignore_escaped: Ignore escaped bracket (e.g. "\(", "\[", "\{", "\)", "\]", "\}").
:return: Dictionary with ``{index_opening: index_closing}``
"""

a = []
b = []

if ignore_escaped:
opening = r"(?<!\\)" + opening
closing = r"(?<!\\)" + closing

for i in re.finditer(opening, text):
a.append(i.span()[0])

for i in re.finditer(closing, text):
b.append(-1 * i.span()[0])

if len(a) == 0 and len(b) == 0:
return {}

if len(a) != len(b):
raise IndexError(f"Unmatching {opening}...{closing} found")

brackets = sorted(a + b, key=lambda i: abs(i))

ret = {}
stack = []

for i in brackets:
if i >= 0:
stack.append(i)
else:
if len(stack) == 0:
raise IndexError(f"No closing {closing} at: {i:d}")
j = stack.pop()
ret[j] = -1 * i

if len(stack) > 0:
raise IndexError(f"No opening {opening} at {stack.pop():d}")

return ret


def getnames(names):
"""Convert people names as surname, firstnames
or surname, initials.
Expand All @@ -323,21 +378,26 @@ def getnames(names):
firsts = [i.strip() for i in namesplit[1].split()]
else:
if "{" in namestring and "}" in namestring:
try:
brackets = find_matching(namestring, "{", "}")
except IndexError:
tidynames.append(namestring)
continue
namesplit = []
opening = 0
start = 0
for i in range(len(namestring)):
print(namestring[i], i, opening)
if namestring[i] == "{":
opening += 1
elif namestring[i] == "}":
opening -= 1
if opening == 0:
if namestring[i] == " ":
namesplit.append(namestring[start:i])
start = i + 1
elif i == len(namestring) - 1:
namesplit.append(namestring[start:])
i = 0
while True:
if i in brackets:
i = brackets[i]
else:
i += 1
if i >= len(namestring):
break
if namestring[i] == " ":
namesplit.append(namestring[start:i])
start = i + 1
elif i == len(namestring) - 1:
namesplit.append(namestring[start:])
else:
namesplit = namestring.split()
last = namesplit.pop()
Expand Down
19 changes: 9 additions & 10 deletions bibtexparser/tests/test_customization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ def test_getnames(self):
'Jean la Tour',
'Jean le Tour',
'Mike ben Akar',
'A. {Delgado de Molina}',
r'M. Vign{\'e}',
'Tom {de Geus}',
'Foo B{\'a}r',
'{G{\'{e}}rard} {Ben Arous}',
r'{G{\'{e}}rard} {Ben Arous}',
'Incorrect {{name}',
#'Jean de la Tour',
#'Johannes Diderik van der Waals',
]
Expand All @@ -38,21 +41,17 @@ def test_getnames(self):
'la Tour, Jean',
'le Tour, Jean',
'ben Akar, Mike',
'{Delgado de Molina}, A.',
r'Vign{\'e}, M.',
'{de Geus}, Tom',
'B{\'a}r, Foo'
'{Ben Arous}, {G{\'{e}}rard}',
'B{\'a}r, Foo',
r'{Ben Arous}, {G{\'{e}}rard}',
'Incorrect {{name}',
#'de la Tour, Jean',
#'van der Waals, Johannes Diderik',
]
self.assertEqual(result, expected)

@unittest.skip('Bug #9')
def test_getnames_braces(self):
names = ['A. {Delgado de Molina}', r'M. Vign{\'e}']
result = getnames(names)
expected = ['Delgado de Molina, A.', 'Vigné, M.']
self.assertEqual(result, expected)

###########
# page_double_hyphen
###########
Expand Down

0 comments on commit 9f08c05

Please sign in to comment.