Skip to content

Commit

Permalink
Merge pull request #44 from josephbredahl/annotations
Browse files Browse the repository at this point in the history
Fix annotation and namespace issue (continued)
  • Loading branch information
jombr authored Mar 28, 2024
2 parents 813bc86 + 12073d9 commit 03dff10
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 49 deletions.
25 changes: 20 additions & 5 deletions rnc2rng/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def pprint(n, level=0):
'DATATYPES', 'DEFAULT_NS', 'DEFINE', 'DIV', 'DOCUMENTATION', 'ELEM',
'EMPTY', 'EXCEPT', 'GRAMMAR', 'GROUP', 'INTERLEAVE', 'LIST', 'LITERAL',
'MAYBE', 'MIXED', 'NAME', 'NOT_ALLOWED', 'NS', 'PARAM', 'PARENT', 'REF',
'ROOT', 'SEQ', 'SOME', 'TEXT',
'ROOT', 'SEQ', 'SOME', 'TEXT', 'LITERAL_TYPE'
]

for _node_type in NODE_TYPES:
Expand Down Expand Up @@ -392,7 +392,7 @@ def particle_some(s, p):
def particle_primary(s, p):
return p[0]

@pg.production('annotated-primary : LPAREN pattern RPAREN')
@pg.production('primary : LPAREN pattern RPAREN')
def annotated_primary_group(s, p):
return Node('GROUP', None, p[1])

Expand Down Expand Up @@ -427,7 +427,11 @@ def primary_literal(s, p): # from datatypeValue

@pg.production('primary : CNAME')
def primary_cname(s, p):
return Node('DATATAG', p[0].value.split(':', 1)[1])
return Node('DATATAG', p[0].value)

@pg.production('primary : CNAME strlit')
def primary_ctyped_string(s, p):
return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', p[0].value)])

@pg.production('primary : CNAME LBRACE params RBRACE')
def primary_type_params(s, p):
Expand All @@ -439,12 +443,20 @@ def primary_string(s, p):

@pg.production('primary : STRING strlit')
def primary_typed_string(s, p):
return Node('DATATAG', 'string', [p[1].value])
return Node('LITERAL', p[1].value, [Node('LITERAL_TYPE', 'string')])

@pg.production('primary : STRING LBRACE params RBRACE')
def primary_string_parametrized(s, p):
return Node('DATATAG', 'string', p[2])

@pg.production('primary : TOKEN')
def primary_text(s, p):
return Node('DATATAG', 'token')

@pg.production('primary : TOKEN strlit')
def primary_text(s, p):
return Node('LITERAL', p[1].value) # the default type is token, so no LITERAL_TYPE

@pg.production('primary : TEXT')
def primary_text(s, p):
return Node('TEXT', None)
Expand Down Expand Up @@ -532,7 +544,10 @@ def name_class_group(s, p):
@pg.production('documentations : DOCUMENTATION documentations')
def documentations_multi(s, p):
cur = Node('DOCUMENTATION', None, []) if not p[1] else p[1][0]
cur.value.insert(0, p[0].value.lstrip('# ').rstrip('\r'))
content = p[0].value.lstrip('#').rstrip('\r') # strip all leading "#" ( left-recursion in documentationLineContent)
if content.startswith(' '):
content = content[1:] # strip *one* " ", but no more (now the production is readOfLine)
cur.value.insert(0, content)
return [cur]

@pg.production('documentations : ')
Expand Down
100 changes: 68 additions & 32 deletions rnc2rng/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
ANNO_ATTR, ANNOTATION, ANY, ASSIGN, ATTR, CHOICE, DATATAG, DATATYPES,
DEFAULT_NS, DEFINE, DIV, DOCUMENTATION, ELEM, EMPTY, EXCEPT, GRAMMAR,
GROUP, INTERLEAVE, LIST, LITERAL, MAYBE, MIXED, NAME, NOT_ALLOWED, NS,
PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT,
PARAM, PARENT, REF, ROOT, SEQ, SOME, TEXT, LITERAL_TYPE
)

import html

QUANTS = {SOME: 'oneOrMore', MAYBE: 'optional', ANY: 'zeroOrMore'}
TYPELIB_NS = 'http://www.w3.org/2001/XMLSchema-datatypes'
TYPELIBS = {
'xsd': 'http://www.w3.org/2001/XMLSchema-datatypes'
}
NAMESPACES = {
'a': 'http://relaxng.org/ns/compatibility/annotations/1.0',
'xml': 'http://www.w3.org/XML/1998/namespace',
Expand All @@ -25,15 +27,20 @@ def __init__(self, indent=None):

def reset(self):
self.buf = []
self.needs = {}
self.types = None
self.ns = {}
self.typelibs = {}
self.default = ''
self.level = 0

def write(self, s):
self.buf.append(self.indent * self.level + s)

def datatype_library(self, prefix):
assert prefix in self.typelibs or prefix in TYPELIBS, prefix
if prefix not in self.typelibs:
self.typelibs[prefix] = TYPELIBS[prefix]
return self.typelibs[prefix]

def namespace(self, ns):
assert ns in self.ns or ns in NAMESPACES, ns
if ns not in self.ns:
Expand All @@ -43,11 +50,9 @@ def namespace(self, ns):
def toxml(self, node):

self.reset()
types = None
for n in node.value:
if n.type == DATATYPES:
types = n.value[0]
self.types = types
self.typelibs[n.name] = n.value[0]
elif n.type == DEFAULT_NS:
self.default = n.value[0]
if n.name is not None:
Expand All @@ -63,9 +68,10 @@ def toxml(self, node):
self.visit(node.value)
for ns, url in sorted(self.ns.items()):
prelude.append(' xmlns:%s="%s"' % (ns, url))
if types is not None or self.needs.get('types'):
url = types if types is not None else TYPELIB_NS
prelude.append(' datatypeLibrary="%s"' % url)

# if xsd:* ever referenced, print it at the grammar level
if 'xsd' in self.typelibs:
prelude.append(' datatypeLibrary="%s"' % self.typelibs['xsd'])

prelude[-1] = prelude[-1] + '>'
self.write('</grammar>')
Expand All @@ -78,6 +84,19 @@ def anno_attrs(self, nodes):
return ''
return ' ' + ' '.join('%s="%s"' % attr for attr in pairs)

def type_attrs(self, name):
if ':' in name:
prefix, name = name.split(':', 1)
ns = self.datatype_library(prefix)
else:
assert name in ('string', 'token') # these are the only "built-in" datatypes
ns = ""

attrs = ' type="%s"' % name
if ns != TYPELIBS['xsd']:
attrs += ' datatypeLibrary="%s"' % ns # write all exceptions explicitly
return attrs

def visit(self, nodes, ctx=None, indent=True):
'''Visiting a list of nodes, writes out the XML content to the internal
line-based buffer. By default, adds one level of indentation to the
Expand All @@ -89,22 +108,22 @@ def visit(self, nodes, ctx=None, indent=True):

if not isinstance(x, parser.Node):
raise TypeError("Not a Node: " + repr(x))
elif x.type in set([ANNO_ATTR, DATATYPES, DEFAULT_NS, NS]):
elif x.type in set([ANNO_ATTR, LITERAL_TYPE, DATATYPES, DEFAULT_NS, NS]):
continue

attribs = self.anno_attrs(x.value)
if x.type == DEFINE:

op, attrib = x.value[0].name, ''
if op in set(['|=', '&=']):
modes = {'|': 'choice', '&': 'interleave'}
attrib = ' combine="%s"' % modes[op[0]]
for op in (x.name for x in x.value if x.type == 'ASSIGN'):
modes = {'|=': 'choice', '&=': 'interleave'}
if op in modes:
attribs = (' combine="%s"' % modes[op]) + attribs
break;

if x.name == 'start':
self.write('<start%s%s>' % (attrib, attribs))
self.write('<start%s>' % attribs)
else:
bits = x.name, attrib, attribs
self.write('<define name="%s"%s%s>' % bits)
bits = x.name, attribs
self.write('<define name="%s"%s>' % bits)

self.visit(x.value)
if x.name == 'start':
Expand Down Expand Up @@ -158,9 +177,20 @@ def visit(self, nodes, ctx=None, indent=True):
self.write('<name ns="%s">%s</name>' % (ns, name))
elif x.type in set([REF, PARENT]):
bits = x.type.lower(), x.name, attribs
self.write('<%s name="%s"%s/>' % bits)
if not x.value: # no parameters
self.write('<%s name="%s"%s/>' % bits)
else:
self.write('<%s name="%s"%s>' % bits)
self.visit(x.value)
self.write('</%s>' % x.type.lower())
elif x.type == LITERAL:
types = [n.name for n in x.value if isinstance(n, parser.Node) and n.type == LITERAL_TYPE]
if types:
assert len(types) == 1
attribs += self.type_attrs(types[0])

bits = attribs, html.escape(x.name)

self.write('<value%s>%s</value>' % bits)
self.visit(x.value, indent=False)
elif x.type == ANNOTATION:
Expand All @@ -178,6 +208,11 @@ def visit(self, nodes, ctx=None, indent=True):
tail = html.escape(''.join(literals)) + '</%s>' % x.name

bits = x.name, attribs, end, tail

if ':' in x.name:
parts = x.name.split(':', 1)
ns = self.namespace(parts[0])

self.write('<%s%s%s>%s' % bits)
if not rest:
continue
Expand All @@ -195,9 +230,12 @@ def visit(self, nodes, ctx=None, indent=True):
self.write('</%s>' % x.name)

elif x.type == DOCUMENTATION:
self.namespace('a')
fmt = '<a:documentation>%s</a:documentation>'
self.write(fmt % html.escape('\n'.join(x.value)))
xmlns_attr = ''
if self.namespace('a') != NAMESPACES['a']:
xmlns_attr = ' xmlns:a="%s"' % NAMESPACES['a'] # the user is already using namespace a: for something else

fmt = '<a:documentation%s>%s</a:documentation>'
self.write(fmt % (xmlns_attr, html.escape('\n'.join(x.value))))
elif x.type == GROUP:
if len(x.value) == 1 and x.value[0].type != SEQ:
self.visit(x.value, indent=False)
Expand All @@ -212,14 +250,10 @@ def visit(self, nodes, ctx=None, indent=True):
elif x.type == SEQ:
self.visit(x.value, indent=False)
elif x.type == DATATAG:
self.needs['types'] = True
if not x.value: # no parameters
self.write('<data type="%s"/>' % x.name)
self.write('<data%s/>' % self.type_attrs(x.name))
else:
name = x.name
if name not in ('string', 'token'):
name = x.name.split(':', 1)[1]
self.write('<data type="%s">' % name)
self.write('<data%s>' % self.type_attrs(x.name))
self.visit(x.value)
self.write('</data>')
elif x.type == PARAM:
Expand All @@ -234,11 +268,13 @@ def visit(self, nodes, ctx=None, indent=True):
self.visit(x.value, ctx=x.type)
self.write('</attribute>')
elif x.type == ROOT:
# Verify the included document has the same metadata
for n in x.value:
# Record included document's custom datatypes
if n.type == DATATYPES:
assert self.types == n.value[0]
elif n.type == DEFAULT_NS:
self.typelibs[n.name] = n.value[0]

# Verify the included document has the same metadata
if n.type == DEFAULT_NS:
assert self.default == n.value[0]
elif n.type == NS:
assert n.name in self.ns
Expand Down
16 changes: 15 additions & 1 deletion tests/annotations.rnc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
namespace x = "http://www.example.com"
namespace dc = "http://purl.org/dc/elements/1.1/"
namespace sch = "http://www.ascc.net/xml/schematron"
namespace a = "http://relaxng.org/ns/compatibility/annotations/1.0"

x:entity [ name="picture" systemId="picture.jpg" notation="jpg" ]
dc:title [ "Foo without contents & escaped" ]
Expand Down Expand Up @@ -39,5 +40,18 @@ div {
start = foo

## documentation for definition
## continues on the next line
## indented continuation on the next line
## # subheading with leading # (perhaps markdown-style head)
bar = element bar { empty }

baz = element baz {
## documentation for a group
(
foo,
## documentation for a ref
bar
)
}

## combining definition
baz |= empty
19 changes: 18 additions & 1 deletion tests/annotations.rng
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,27 @@
</start>
<define name="bar">
<a:documentation>documentation for definition
continues on the next line</a:documentation>
indented continuation on the next line
# subheading with leading # (perhaps markdown-style head)</a:documentation>
<element>
<name ns="">bar</name>
<empty/>
</element>
</define>
<define name="baz">
<element>
<name ns="">baz</name>
<group>
<a:documentation>documentation for a group</a:documentation>
<ref name="foo"/>
<ref name="bar">
<a:documentation>documentation for a ref</a:documentation>
</ref>
</group>
</element>
</define>
<define name="baz" combine="choice">
<a:documentation>combining definition</a:documentation>
<empty/>
</define>
</grammar>
13 changes: 12 additions & 1 deletion tests/datatypes.rnc
Original file line number Diff line number Diff line change
@@ -1,2 +1,13 @@
datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes"
element height { xsd:double }
datatypes custom="uri:custom-datatype-library"

start = element token { token },
element token_abc { token "abc" },
element string { string },
element string_abc { string "abc" },
element xsd_string { xsd:string },
element xsd_string_abc { xsd:string "abc" },
element xsd_double { xsd:double },
element xsd_double_42 { xsd:double "42" },
element custom_foo { custom:foo },
element custom_foo_abc { custom:foo "abc" }
38 changes: 37 additions & 1 deletion tests/datatypes.rng
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,44 @@
datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
<start>
<element>
<name ns="">height</name>
<name ns="">token</name>
<data type="token" datatypeLibrary=""/>
</element>
<element>
<name ns="">token_abc</name>
<value>abc</value>
</element>
<element>
<name ns="">string</name>
<data type="string" datatypeLibrary=""/>
</element>
<element>
<name ns="">string_abc</name>
<value type="string" datatypeLibrary="">abc</value>
</element>
<element>
<name ns="">xsd_string</name>
<data type="string"/>
</element>
<element>
<name ns="">xsd_string_abc</name>
<value type="string">abc</value>
</element>
<element>
<name ns="">xsd_double</name>
<data type="double"/>
</element>
<element>
<name ns="">xsd_double_42</name>
<value type="double">42</value>
</element>
<element>
<name ns="">custom_foo</name>
<data type="foo" datatypeLibrary="uri:custom-datatype-library"/>
</element>
<element>
<name ns="">custom_foo_abc</name>
<value type="foo" datatypeLibrary="uri:custom-datatype-library">abc</value>
</element>
</start>
</grammar>
Loading

0 comments on commit 03dff10

Please sign in to comment.