diff --git a/CHANGELOG.md b/CHANGELOG.md index e5b32e03..6e241764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ * `delphin.itsdb.ItsdbProfile.exists()` (#112) * `delphin.itsdb.ItsdbProfile.size()` (#112) * `--in-place` option to the `delphin mkprof` command (#109) +* `delphin.derivation.UdfNode.preterminals()` (#105) +* `delphin.derivation.UdfNode.terminals()` (#105) ### Changed diff --git a/delphin/derivation.py b/delphin/derivation.py index a163fbb7..837da906 100644 --- a/delphin/derivation.py +++ b/delphin/derivation.py @@ -374,6 +374,32 @@ def lexical_type(self): warnings.warn('Deprecated; try UdfNode.type', DeprecationWarning) return self.type + # Convenience methods + + def preterminals(self): + """ + Return the list of preterminals (i.e. lexical grammar-entities). + """ + nodes = [] + for dtr in self.daughters: + if isinstance(dtr, UdfTerminal): + nodes.append(self) + else: + nodes.extend(dtr.preterminals()) + return nodes + + def terminals(self): + """ + Return the list of terminals (i.e. lexical units). + """ + nodes = [] + for dtr in self.daughters: + if isinstance(dtr, UdfTerminal): + nodes.append(dtr) + else: + nodes.extend(dtr.terminals()) + return nodes + class Derivation(UdfNode): """ A class for reading, writing, and storing derivation trees. Objects diff --git a/tests/derivation_test.py b/tests/derivation_test.py index 1bd77dce..1afb86da 100644 --- a/tests/derivation_test.py +++ b/tests/derivation_test.py @@ -323,6 +323,38 @@ def test_lexical_type(self): assert node.daughters[0].lexical_type() == 'a-type_le' assert node.daughters[1].lexical_type() == 'b-type_le' + def test_preterminals(self): + a = D.from_string('(root (1 some-thing -1 -1 -1' + ' (2 a-thing -1 -1 -1 ("a"))' + ' (3 b-thing -1 -1 -1 ("b"))))') + assert [t.id for t in a.preterminals()] == [2, 3] + a = D.from_string('(root' + ' (1 some-thing@some-type 0.4 0 5' + ' (2 a-lex@a-type 0.8 0 1' + ' ("a b"' + ' 3 "token [ +FORM \\"a\\" ]"' + ' 4 "token [ +FORM \\"b\\" ]"))' + ' (5 b-lex@b-type 0.9 1 2' + ' ("b"' + ' 6 "token [ +FORM \\"b\\" ]"))))') + assert [t.id for t in a.preterminals()] == [2, 5] + + def test_terminals(self): + a = D.from_string('(root (1 some-thing -1 -1 -1' + ' (2 a-thing -1 -1 -1 ("a"))' + ' (3 b-thing -1 -1 -1 ("b"))))') + assert [t.form for t in a.terminals()] == ['a', 'b'] + a = D.from_string('(root' + ' (1 some-thing@some-type 0.4 0 5' + ' (2 a-lex@a-type 0.8 0 1' + ' ("a b"' + ' 3 "token [ +FORM \\"a\\" ]"' + ' 4 "token [ +FORM \\"b\\" ]"))' + ' (5 b-lex@b-type 0.9 1 2' + ' ("b"' + ' 6 "token [ +FORM \\"b\\" ]"))))') + assert [t.form for t in a.terminals()] == ['a b', 'b'] + def test_to_udf(self): s = '(1 some-thing -1 -1 -1 ("token"))' assert D.from_string(s).to_udf(indent=None) == s