Skip to content

Commit

Permalink
Add 'ace' codec for the 'convert' subcommand
Browse files Browse the repository at this point in the history
This change makes it easier to convert the MRS output of ACE without
having to massage the data stream first. It works with normal ACE
output and --tsdb-stdout output. For the former, it uses the SENT:
line, if available, to set the 'surface' attribute of the following
MRSs (until the next double-newline, indicating the end of the results
list).

Resolves #92
  • Loading branch information
goodmami committed Aug 22, 2018
1 parent 4b64321 commit 49868c5
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ changes are prefixed with "**BREAKING**"
* The `convert` command can take a `--predicate-modifiers` option which
attempts to rejoin disconnected EDS graphs that fit certain criteria
* Documentation for implementing an ACE preprocessor (#91)
* `ace` as a `--from` codec for the `convert` subcommand, which reads
SimpleMRS strings from ACE output (#92)

### Changed

Expand Down
36 changes: 36 additions & 0 deletions delphin/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from delphin.mrs.components import Lnk
from delphin import itsdb
from delphin.repp import REPP
from delphin.util import SExpr


def main():
Expand Down Expand Up @@ -45,6 +46,7 @@ def convert(args):
from delphin.extra import latex
codecs = {
'simplemrs': (simplemrs.loads, simplemrs.dumps),
'ace': (_read_ace_parse, None),
'mrx': (mrx.loads, mrx.dumps),
'dmrx': (dmrx.loads, dmrx.dumps),
'eds': (eds.loads, eds.dumps),
Expand Down Expand Up @@ -332,6 +334,40 @@ def do_trace(stream):

## Helper definitions

# read simplemrs from ACE output

def _read_ace_parse(s):
from delphin.mrs import simplemrs
surface = None
newline = False
for line in s.splitlines():
if line.startswith('SENT: '):
surface = line[6:]
# regular ACE output
elif line.startswith('['):
m = line.partition(' ; ')[0].strip()
m = simplemrs.loads(m, single=True)
m.surface = surface
yield m
# with --tsdb-stdout
elif line.startswith('('):
while line:
expr = SExpr.parse(line)
line = expr.remainder.lstrip()
if len(expr.data) == 2 and expr.data[0] == ':results':
for result in expr.data[1]:
for key, val in result:
if key == ':mrs':
yield simplemrs.loads(val, single=True)
elif line == '\n':
if newline:
surface = None
newline = False
else:
newline = True
else:
pass

# simulate json codecs for MRS and DMRS

class _MRS_JSON(object):
Expand Down

0 comments on commit 49868c5

Please sign in to comment.