From ebe37a90170f12ac4cbb4b4d31d06e261242eab6 Mon Sep 17 00:00:00 2001 From: invisig0th Date: Mon, 1 Feb 2021 16:55:19 -0500 Subject: [PATCH] Added model interfaces and msoffice mime forms (#2040) Added model interfaces and msoffice mime forms --- synapse/datamodel.py | 41 +++++++++++++++ synapse/models/files.py | 62 ++++++++++++++++++++++ synapse/tests/test_datamodel.py | 24 ++++++++- synapse/tests/test_model_files.py | 85 +++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 1 deletion(-) diff --git a/synapse/datamodel.py b/synapse/datamodel.py index 31519f2a2f..3ad440295a 100644 --- a/synapse/datamodel.py +++ b/synapse/datamodel.py @@ -254,6 +254,8 @@ def __init__(self, modl, name, info): self.type.form = self self.props = {} # name: Prop() + self.ifaces = {} # name: + self.refsout = None self.locked = False @@ -419,6 +421,7 @@ def __init__(self): self.types = {} # name: Type() self.forms = {} # name: Form() self.props = {} # (form,name): Prop() and full: Prop() + self.ifaces = {} # name: self.tagprops = {} # name: TagProp() self.formabbr = {} # name: [Form(), ... ] self.modeldefs = [] @@ -427,6 +430,8 @@ def __init__(self): self.propsbytype = collections.defaultdict(list) # name: Prop() self.arraysbytype = collections.defaultdict(list) + # TODO use this for -> foo:iface + self.formsbyiface = collections.defaultdict(list) self._type_pends = collections.defaultdict(list) self._modeldef = { @@ -560,6 +565,7 @@ def getModelDict(self): 'types': {}, 'forms': {}, 'tagprops': {}, + 'interfaces': {}, } for tobj in self.types.values(): @@ -626,6 +632,11 @@ def addDataModels(self, mods): typeinfo['custom'] = custom self.addType(typename, basename, typeopts, typeinfo) + # load all the interfaces... + for _, mdef in mods: + for name, info in mdef.get('interfaces', ()): + self.addIface(name, info) + # Load all the universal properties for _, mdef in mods: for univname, typedef, univinfo in mdef.get('univs', ()): @@ -687,6 +698,11 @@ def addForm(self, formname, forminfo, propdefs): propname, typedef, propinfo = propdef self._addFormProp(form, propname, typedef, propinfo) + # interfaces are listed in typeinfo for the form to + # maintain backward compatibility for populated models + for ifname in form.type.info.get('interfaces', ()): + self._addFormIface(form, ifname) + return form def delForm(self, formname): @@ -698,9 +714,16 @@ def delForm(self, formname): if isinstance(form.type, s_types.Array): self.arraysbytype[form.type.arraytype.name].remove(form) + for ifname in form.ifaces.keys(): + self.formsbyiface[ifname].remove(form) + self.forms.pop(formname, None) self.props.pop(formname, None) + def addIface(self, name, info): + # TODO should we add some meta-props here for queries? + self.ifaces[name] = info + def delType(self, typename): _type = self.types.get(typename) @@ -755,6 +778,24 @@ def _addFormProp(self, form, name, tdef, info): self.props[prop.full] = prop return prop + def _addFormIface(self, form, name): + + iface = self.ifaces.get(name) + + if iface is None: + mesg = f'Form {form.name} depends on non-existant interface: {name}' + raise s_exc.NoSuchName(mesg=mesg) + + for propname, typedef, propinfo in iface.get('props', ()): + self._addFormProp(form, propname, typedef, propinfo) + + # TODO use this to allow storm: +foo:iface + form.ifaces[name] = iface + self.formsbyiface[name].append(form) + + for ifname in iface.get('interfaces', ()): + self._addFormIface(form, ifname) + def delTagProp(self, name): return self.tagprops.pop(name) diff --git a/synapse/models/files.py b/synapse/models/files.py index d8b6fcc207..000a0ecb5d 100644 --- a/synapse/models/files.py +++ b/synapse/models/files.py @@ -183,6 +183,39 @@ def getModelDefs(self): 'ex': 'c:/windows/system32/calc.exe'}), ), + 'interfaces': ( + ('file:mime:meta', { + 'props': ( + ('file', ('file:bytes', {}), { + 'doc': 'The file that the mime info was parsed from.'}), + ('file:offs', ('int', {}), { + 'doc': 'The optional offset where the mime info was parsed from.'}), + ('file:data', ('data', {}), { + 'doc': 'A mime specific arbitrary data structure for non-indexed data.', + }), + ), + 'doc': 'Properties common to mime specific file metadata types.', + }), + ('file:mime:msoffice', { + 'props': ( + ('title', ('str', {}), { + 'doc': 'The title extracted from Microsoft Office metadata.'}), + ('author', ('str', {}), { + 'doc': 'The author extracted from Microsoft Office metadata.'}), + ('subject', ('str', {}), { + 'doc': 'The subject extracted from Microsoft Office metadata.'}), + ('application', ('str', {}), { + 'doc': 'The creating_application extracted from Microsoft Office metadata.'}), + ('created', ('time', {}), { + 'doc': 'The create_time extracted from Microsoft Office metadata.'}), + ('lastsaved', ('time', {}), { + 'doc': 'The last_saved_time extracted from Microsoft Office metadata.'}), + ), + 'doc': 'Properties common to various microsoft office file formats.', + 'interfaces': ('file:mime:meta',), + }), + ), + 'types': ( ('file:subfile', ('comp', {'fields': (('parent', 'file:bytes'), ('child', 'file:bytes'))}), { @@ -202,6 +235,26 @@ def getModelDefs(self): 'doc': 'Records one, of potentially multiple, mime types for a given file.', }), + ('file:mime:msdoc', ('guid', {}), { + 'doc': 'The GUID of a set of mime metadata for a Microsoft Word file.', + 'interfaces': ('file:mime:msoffice',), + }), + + ('file:mime:msxls', ('guid', {}), { + 'doc': 'The GUID of a set of mime metadata for a Microsoft Excel file.', + 'interfaces': ('file:mime:msoffice',), + }), + + ('file:mime:msppt', ('guid', {}), { + 'doc': 'The GUID of a set of mime metadata for a Microsoft Powerpoint file.', + 'interfaces': ('file:mime:msoffice',), + }), + + ('file:mime:rtf', ('guid', {}), { + 'doc': 'The GUID of a set of mime metadata for a .rtf file.', + 'interfaces': ('file:mime:meta',), + }), + ('file:mime:pe:section', ('comp', {'fields': ( ('file', 'file:bytes'), ('name', 'str'), @@ -313,6 +366,15 @@ def getModelDefs(self): }), )), + ('file:mime:msdoc', {}, ()), + ('file:mime:msxls', {}, ()), + ('file:mime:msppt', {}, ()), + + ('file:mime:rtf', {}, ( + ('guid', ('guid', {}), { + 'doc': 'The parsed GUID embedded in the .rtf file.'}), + )), + ('file:mime:pe:section', {}, ( ('file', ('file:bytes', {}), { 'ro': True, diff --git a/synapse/tests/test_datamodel.py b/synapse/tests/test_datamodel.py index 63402b13c1..3254dcb042 100644 --- a/synapse/tests/test_datamodel.py +++ b/synapse/tests/test_datamodel.py @@ -57,6 +57,24 @@ async def test_datmodel_formname(self): with self.raises(s_exc.BadFormDef): modl.addDataModels(mods) + async def test_datamodel_no_interface(self): + modl = s_datamodel.Model() + mods = ( + ('hehe', { + 'types': ( + ('test:derp', ('int', {}), { + 'interfaces': ('foo:bar',), + }), + ), + 'forms': ( + ('test:derp', {}, ()), + ), + }), + ) + + with self.raises(s_exc.NoSuchName): + modl.addDataModels(mods) + async def test_datamodel_dynamics(self): modl = s_datamodel.Model() @@ -88,8 +106,10 @@ async def test_datamodel_dynamics(self): with self.raises(s_exc.NoSuchUniv): modl.delUnivProp('newp') + modl.addIface('test:iface', {}) + modl.addType('bar', 'int', {}, {}) - modl.addType('foo:foo', 'int', {}, {}) + modl.addType('foo:foo', 'int', {}, {'interfaces': ('test:iface',)}) modl.addForm('foo:foo', {}, ()) modl.addFormProp('foo:foo', 'bar', ('bar', {}), {}) @@ -97,6 +117,8 @@ async def test_datamodel_dynamics(self): with self.raises(s_exc.CantDelType): modl.delType('bar') + modl.delForm('foo:foo') + async def test_datamodel_del_prop(self): modl = s_datamodel.Model() diff --git a/synapse/tests/test_model_files.py b/synapse/tests/test_model_files.py index 6a3ddffdb0..e557b77bdb 100644 --- a/synapse/tests/test_model_files.py +++ b/synapse/tests/test_model_files.py @@ -216,3 +216,88 @@ async def test_model_file_ismime(self): node = nodes[0] self.eq(node.ndef, ('file:ismime', (guid, 'text/plain'))) + + async def test_model_file_mime_msoffice(self): + + async with self.getTestCore() as core: + + fileguid = s_common.guid() + opts = {'vars': {'fileguid': f'guid:{fileguid}'}} + + def testmsoffice(n): + self.eq('lolz', n.get('title')) + self.eq('deep_value', n.get('author')) + self.eq('GME stonks', n.get('subject')) + self.eq('stonktrader3000', n.get('application')) + self.eq(1611100800000, n.get('created')) + self.eq(1611187200000, n.get('lastsaved')) + + self.eq(f'guid:{fileguid}', n.get('file')) + self.eq(0, n.get('file:offs')) + self.eq(('foo', 'bar'), n.get('file:data')) + + nodes = await core.nodes('''[ + file:mime:msdoc=* + :file=$fileguid + :file:offs=0 + :file:data=(foo, bar) + :title=lolz + :author=deep_value + :subject="GME stonks" + :application=stonktrader3000 + :created=20210120 + :lastsaved=20210121 + ]''', opts=opts) + self.len(1, nodes) + testmsoffice(nodes[0]) + + nodes = await core.nodes('''[ + file:mime:msxls=* + :file=$fileguid + :file:offs=0 + :file:data=(foo, bar) + :title=lolz + :author=deep_value + :subject="GME stonks" + :application=stonktrader3000 + :created=20210120 + :lastsaved=20210121 + ]''', opts=opts) + self.len(1, nodes) + testmsoffice(nodes[0]) + + nodes = await core.nodes('''[ + file:mime:msppt=* + :file=$fileguid + :file:offs=0 + :file:data=(foo, bar) + :title=lolz + :author=deep_value + :subject="GME stonks" + :application=stonktrader3000 + :created=20210120 + :lastsaved=20210121 + ]''', opts=opts) + self.len(1, nodes) + testmsoffice(nodes[0]) + + async def test_model_file_mime_rtf(self): + + async with self.getTestCore() as core: + + fileguid = s_common.guid() + opts = {'vars': {'fileguid': f'guid:{fileguid}'}} + + nodes = await core.nodes('''[ + file:mime:rtf=* + :file=$fileguid + :file:offs=0 + :file:data=(foo, bar) + :guid=aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + ]''', opts=opts) + + self.len(1, nodes) + self.eq(f'guid:{fileguid}', nodes[0].get('file')) + self.eq(0, nodes[0].get('file:offs')) + self.eq(('foo', 'bar'), nodes[0].get('file:data')) + self.eq('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', nodes[0].get('guid'))