Skip to content

Commit

Permalink
fix get_doc_char_span and covering span (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
tamuhey authored Aug 15, 2020
1 parent 2890576 commit 2ac9db9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 11 deletions.
6 changes: 3 additions & 3 deletions camphr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ def token_from_char_pos(doc: Doc, i: int) -> Token:
return doc[bisect.bisect(token_idxs, i) - 1]


def _get_covering_span(doc: Doc, i: int, j: int) -> Span:
def _get_covering_span(doc: Doc, i: int, j: int, **kwargs) -> Span:
token_idxs = [t.idx for t in doc]
i = bisect.bisect(token_idxs, i) - 1
j = bisect.bisect_left(token_idxs, j)
return doc[i:j]
return Span(doc, i, j, **kwargs)


def destruct_token(doc: Doc, *char_pos: int) -> Doc:
Expand All @@ -84,7 +84,7 @@ def get_doc_char_span(
"""
span = doc.char_span(i, j, **kwargs)
if not span and covering:
span = _get_covering_span(doc, i, j)
span = _get_covering_span(doc, i, j, **kwargs)
if not span and destructive:
destruct_token(doc, i, j)
span = doc.char_span(i, j, **kwargs)
Expand Down
20 changes: 12 additions & 8 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,20 @@ def test_split_keepsep(text, sep, expected):


@pytest.mark.parametrize(
"tokens,i,j,destructive,covering,expected",
"tokens,i,j,destructive,covering,expected,label",
[
(["Foo", "bar", "baz"], 0, 2, True, False, "Fo"),
(["Foo", "bar", "baz"], 0, 5, True, False, "Foo b"),
(["Foo", "bar", "baz"], 0, 5, False, True, "Foo bar"),
(["Foo", "bar", "baz"], 0, 3, False, True, "Foo"),
(["Foo", "bar", "baz"], 1, 5, False, True, "Foo bar"),
(["Foo", "bar", "baz"], 0, 2, True, False, "Fo", None),
(["Foo", "bar", "baz"], 0, 5, True, False, "Foo b", None),
(["Foo", "bar", "baz"], 0, 5, False, True, "Foo bar", None),
(["Foo", "bar", "baz"], 0, 3, False, True, "Foo", None),
(["Foo", "bar", "baz"], 1, 5, False, True, "Foo bar", "LABEL"),
],
)
def test_get_doc_char_span(vocab, tokens, i, j, destructive, covering, expected):
def test_get_doc_char_span(vocab, tokens, i, j, destructive, covering, expected, label):
doc = Doc(vocab, tokens)
span = get_doc_char_span(doc, i, j, destructive=destructive, covering=covering)
span = get_doc_char_span(
doc, i, j, destructive=destructive, covering=covering, label=label or ""
)
assert span.text == expected
if label is not None:
assert span.label_ == label

0 comments on commit 2ac9db9

Please sign in to comment.