From 45ec55ec9d3bf729fc3f76b13b2ab9089a6269ee Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 15 Aug 2022 14:12:28 -0700 Subject: [PATCH 01/27] Build ingest query --- cartography/client/__init__py | 0 cartography/client/core/tx.py | 27 +++ cartography/graph/cleanup_query.py | 81 +++++++++ cartography/graph/querybuilder.py | 261 ++++++++++++++++++----------- 4 files changed, 271 insertions(+), 98 deletions(-) delete mode 100644 cartography/client/__init__py create mode 100644 cartography/graph/cleanup_query.py diff --git a/cartography/client/__init__py b/cartography/client/__init__py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cartography/client/core/tx.py b/cartography/client/core/tx.py index a9c1e2572..179aa51dc 100644 --- a/cartography/client/core/tx.py +++ b/cartography/client/core/tx.py @@ -146,3 +146,30 @@ def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Dict[str result.consume() return value + + +def _write_list_of_dicts_tx( + tx: neo4j.Transaction, + query: str, + dict_list: List[Dict[Any, Any]], + update_tag: int, + **kwargs, +) -> None: + # TODO batch this to 10k items by default and make the batch size configurable + tx.run(query, DictList=dict_list, UpdateTag=update_tag, kwargs=kwargs) + + +def load_graph_data( + neo4j_session: neo4j.Session, + query: str, + dict_list: List[Dict[Any, Any]], + update_tag: int, + **kwargs, +) -> None: + neo4j_session.write_transaction( + _write_list_of_dicts_tx, + query, + dict_list, + update_tag, + kwargs, + ) diff --git a/cartography/graph/cleanup_query.py b/cartography/graph/cleanup_query.py new file mode 100644 index 000000000..b6c0253a2 --- /dev/null +++ b/cartography/graph/cleanup_query.py @@ -0,0 +1,81 @@ +from typing import List +from string import Template + + +def build_cleanup_queries( + node_label: str, + rel_label: str, + sub_resource_label: str, + sub_resource_value: str, + sub_resource_key: str = None, + cleanup_rel: bool = True, +) -> List[str]: + # Convention: we must always point from the sub resource out to the resource. + node_cleanup_query_template = Template( + """ + MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: {$sub_resource_value}}) + WHERE n.lastupdated <> {UPDATE_TAG} + WITH n + LIMIT {LIMIT_SIZE} + DETACH DELETE (n) + """ + ) + if not sub_resource_key: + sub_resource_key = 'id' + + node_cleanup_query = node_cleanup_query_template.safe_substitute( + node_label=node_label, + rel_label=rel_label, + sub_resource_key=sub_resource_key, + sub_resource_label=sub_resource_label, + sub_resource_value=sub_resource_value, + ) + + result = [node_cleanup_query] + if cleanup_rel: + rel_cleanup_query_template = Template( + """ + MATCH (:$node_label)<-[r:$rel_label]-(:$sub_resource_label{$sub_resource_key: {$sub_resource_value}}) + WHERE r.lastupdated <> {UPDATE_TAG} + WITH r LIMIT {LIMIT_SIZE} + DELETE r + """ + ) + rel_cleanup_query = rel_cleanup_query_template.safe_substitute( + node_label=node_label, + rel_label=rel_label, + sub_resource_label=sub_resource_label, + sub_resource_key=sub_resource_key, + sub_resource_value=sub_resource_value, + ) + result.append(rel_cleanup_query) + + return result + + +def build_remove_attribute_query( + attribute_name: str, + node_label: str, + rel_label: str, + sub_resource_label: str, + sub_resource_value: str, + sub_resource_key: str = None, +) -> str: + attribute_removal_template = Template( + """ + MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: $sub_resource_value}) + WHERE EXISTS (n.$attribute_name) + REMOVE n.$attribute_name + """ + ) + if not sub_resource_key: + sub_resource_key = 'id' + + return attribute_removal_template.safe_substitute( + node_label=node_label, + rel_label=rel_label, + sub_resource_label=sub_resource_label, + sub_resource_key=sub_resource_key, + sub_resource_value=sub_resource_value, + attribute_name=attribute_name, + ) \ No newline at end of file diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index f46f79ed4..1251c0d29 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -1,118 +1,183 @@ from string import Template -from typing import Dict +from typing import Dict, Optional +from typing import List +from enum import Enum +from enum import auto -def build_node_ingestion_query(node_label: str, node_property_map: Dict[str, str]) -> str: - """ - Generates Neo4j query string to write a list of dicts as nodes to the graph with the - given node_label, id_field, and other arbitrary fields as provided by field_list. The - resulting query looks like +class LinkDirection(Enum): + OUTWARD = auto() + INWARD = auto() - UNWIND {DictList} AS item - MERGE (i:`node_label`{id:item.`node_property_map['id']`}) - ON CREATE SET i.firstseen = timestamp() - SET i.lastupdated = {UpdateTag}, - ... ... - Note that `node_property_map` **must** have an `id` key defined. +class PropertyRef: + def __init__(self, name: str, static=False): + self.name = name + self.static = static - :param node_label: The label of the nodes to write, e.g. EC2Instance - :param node_property_map: A mapping of node property names to dict key names. - :return: A Neo4j query string using the UNWIND + MERGE pattern to write a list of nodes - in batch. This exposes 2 parameters: `{DictList}` accepts a list of dictionaries to - write as nodes to the graph, and `{UpdateTag}` is the standard cartography int update tag. - """ - if 'id' not in node_property_map or not node_property_map['id']: - raise ValueError('node_property_map must have key `id` set.') + def _parameterize_name(self) -> str: + # TODO in neo4j 4.x, we will want to change this to `${self.name}` instead + # of "{" + self.name "}" + return "{" + self.name + "}" - ingest_preamble_template = Template(""" - UNWIND {DictList} AS item - MERGE (i:$NodeLabel{id:item.$DictIdField}) - ON CREATE SET i.firstseen = timestamp() - SET i.lastupdated = {UpdateTag}""") - ingest_fields_template = Template(' i.$NodeProperty = item.$DictProperty') + def __repr__(self) -> str: + return f"item.{self.name}" if not self.static else self._parameterize_name() - ingest_preamble = ingest_preamble_template.safe_substitute( - NodeLabel=node_label, DictIdField=node_property_map['id'], - ) + +class CartographyLink: + def __init__( + self, + label: str, + key: str, + dict_field_ref: PropertyRef, + rel_label: str, + direction: LinkDirection = None, + rel_property_map: Dict[str, PropertyRef] = None, + ): + self.label = label + self.key = key + self.dict_field_ref = dict_field_ref + self.rel_label = rel_label + self.direction = LinkDirection.INWARD if not direction else direction + self.rel_property_map = rel_property_map + + +def _build_node_properties_statement( + node_property_map: Dict[str, PropertyRef], + node_extra_labels: List[str], +) -> Optional[str]: + ingest_fields_template = Template(' i.$node_property = $property_ref') + set_clause = 'i.lastupdated = {UpdateTag}' # If the node_property_map contains more than just `id`, generate a SET statement for the other fields. if len(node_property_map.keys()) > 1: - set_clause = ',\n'.join([ - ingest_fields_template.safe_substitute(NodeProperty=node_property, DictProperty=dict_property) - for node_property, dict_property in node_property_map.items() - if not node_property == 'id' # Make sure to exclude setting the `id` again. + set_clause += ',\n' + ',\n'.join([ + ingest_fields_template.safe_substitute(node_property=node_property, property_ref=property_ref) + for node_property, property_ref in node_property_map.items() + if node_property != 'id' # Make sure to exclude setting the `id` again. ]) - ingest_query = ingest_preamble + ",\n" + set_clause - else: - ingest_query = ingest_preamble - return ingest_query + # Set extra labels on the node if specified + if node_extra_labels: + extra_labels = ':'.join([label for label in node_extra_labels]) + set_clause += f",\n i:{extra_labels}" + return set_clause -def build_relationship_ingestion_query( - node_label_a: str, search_property_a: str, dict_key_a: str, - node_label_b: str, search_property_b: str, dict_key_b: str, - rel_label: str, - rel_property_map: Dict[str, str] = None, -) -> str: - """ - Generates Neo4j query string that looks like - UNWIND {RelMappingList} AS item - MATCH (a:`node_label_a`{`search_property_a`:item.`dict_key_a`}) - MATCH (b:`node_label_b`{`search_property_b`:item.`dict_key_b}) - MERGE (a)-[r:`rel_label`]->(b) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = {UpdateTag}, - ... ... - - To summarize, for each dict in RelMappingList, we create the paths - `($NodeA)-[:$RELATIONSHIP_NAME]->($NodeB)`. - - :param node_label_a: The label of $NodeA. - :param search_property_a: the search key to used to search the graph to find node A. For - performance, this should be an indexed property. If your graph is large, querying on - non-indexed properties can cause your syncs to take **days** to run! - :param dict_key_a: The dict key on what value of `search_property_a` to search for. - :param node_label_b: The label of $NodeB. - :param search_property_b: the search key to used to search the graph to find node B. For - performance, this should be an indexed property. If your graph is large, querying on - non-indexed properties can cause your syncs to take **days** to run! - :param dict_key_b: The dict key on what value of `search_property_b` to search for. - :param rel_label: The $RELATIONSHIP_NAME from $NodeA to $NodeB. - :param rel_property_map: Optional mapping of relationship property names to set and their - corresponding keys on the input data dict. Note: relationships in Neo4j 3.5 cannot be indexed - so performing searches on them is slow. Reconsider your schema design if you expect to need - to run queries using relationship fields as search keys. - :return: Neo4j query string to draw relationships between $NodeA and $NodeB. This exposes 2 - parameters: `{RelMappingList}` accepts a list of dictionaries to write as relationships to the - graph, and `{UpdateTag}` is the standard cartography int update tag. - """ - ingest_preamble_template = Template(""" - UNWIND {RelMappingList} AS item - MATCH (a:$NodeLabelA{$SearchPropertyA:item.$DictKeyA}) - MATCH (b:$NodeLabelB{$SearchPropertyB:item.$DictKeyB}) - MERGE (a)-[r:$LabelR]->(b) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = {UpdateTag}""") - ingest_fields_template = Template(' r.$RelProperty = item.$DictProperty') - - ingest_preamble = ingest_preamble_template.safe_substitute( - NodeLabelA=node_label_a, - SearchPropertyA=search_property_a, - DictKeyA=dict_key_a, - NodeLabelB=node_label_b, - SearchPropertyB=search_property_b, - DictKeyB=dict_key_b, - LabelR=rel_label, - ) +def _build_rel_properties_statement(rel_var: str, rel_property_map: Dict[str, PropertyRef] = None) -> str: + set_clause = rel_var + '.lastupdated = {UpdateTag}' + ingest_fields_template = Template(' $rel_var.$rel_property = $property_ref') if rel_property_map: - set_clause = ',\n'.join([ - ingest_fields_template.safe_substitute(RelProperty=rel_property, DictProperty=dict_property) - for rel_property, dict_property in rel_property_map.items() + set_clause += ',\n' + ',\n'.join([ + ingest_fields_template.safe_substitute( + rel_var=rel_var, + rel_property=rel_property, + property_ref=property_ref, + ) + for rel_property, property_ref in rel_property_map.items() ]) - ingest_query = ingest_preamble + ",\n" + set_clause + return set_clause + + +def _build_attach_sub_resource_statement(sub_resource_link: CartographyLink) -> str: + """ + Attaches sub resource to node i. + """ + sub_resource_attach_template = Template(""" + WITH i, item + MATCH (j:$SubResourceLabel{$SubResourceKey: $SubResourceRef}) + $RelMergeClause + ON CREATE SET r.firstseen = timestamp() + SET + $set_rel_properties_statement + """) + + if sub_resource_link.direction == LinkDirection.INWARD: + rel_merge_template = Template("""MERGE (i)<-[r:$SubResourceRelLabel]-(j)""") else: - ingest_query = ingest_preamble + rel_merge_template = Template("""MERGE (i)-[r:$SubResourceRelLabel]->(j)""") + + rel_merge_clause = rel_merge_template.safe_substitute(SubResourceRelLabel=sub_resource_link.rel_label) + + attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( + SubResourceLabel=sub_resource_link.label, + SubResourceKey=sub_resource_link.key, + SubResourceRef=sub_resource_link.dict_field_ref, + RelMergeClause=rel_merge_clause, + SubResourceRelLabel=sub_resource_link.rel_label, + set_rel_properties_statement=_build_rel_properties_statement('r', sub_resource_link.rel_property_map) + ) + return attach_sub_resource_statement + + +def _build_attach_additional_links_statement(additional_links: List[CartographyLink]) -> str: + """ + Attaches one or more CartographyLinks to node i. + """ + additional_links_template = Template(""" + WITH i, item + MATCH ($node_var:$AddlLabel{$AddlKey: $AddlRef}) + $RelMerge + ON CREATE SET $rel_var.firstseen = timestamp() + SET + $set_rel_properties_statement + """ + ) + links = [] + for num, link in enumerate(additional_links): + node_var = f"n{num}" + rel_var = f"r{num}" + + if link.direction == LinkDirection.INWARD: + rel_merge_template = Template("""MERGE (i)-[$rel_var:$AddlRelLabel]->($node_var)""") + else: + rel_merge_template = Template("""MERGE (i)-[$rel_var:$AddlRelLabel]->($node_var)""") + + rel_merge = rel_merge_template.safe_substitute( + rel_var=rel_var, + AddlRelLabel=link.rel_label, + node_var=node_var, + ) + + additional_ref = additional_links_template.safe_substitute( + AddlLabel=link.label, + AddlKey=link.key, + AddlRef=link.dict_field_ref, + node_var=node_var, + rel_var=rel_var, + RelMerge=rel_merge, + set_rel_properties_statement=_build_rel_properties_statement(rel_var, link.rel_property_map) + ) + links.append(additional_ref) + + return '\n'.join(links) + + +def build_ingest_query( + node_label: str, + node_property_map: Dict[str, PropertyRef], + sub_resource_link: CartographyLink, + additional_links: List[CartographyLink] = None, + node_extra_labels: List[str] = None, +) -> str: + query_template = Template(""" + UNWIND {DictList} AS item + MERGE (i:$node_label{id: item.$dict_id_field}) + ON CREATE SET i.firstseen = timestamp() + SET + $set_node_properties_statement + $attach_sub_resource_statement + $attach_additional_links_statement + """) + if 'id' not in node_property_map or not node_property_map['id']: + raise ValueError('node_property_map must have key `id` set.') + + ingest_query = query_template.safe_substitute( + node_label=node_label, + dict_id_field=node_property_map['id'], + set_node_properties_statement=_build_node_properties_statement(node_property_map, node_extra_labels), + attach_sub_resource_statement=_build_attach_sub_resource_statement(sub_resource_link), + attach_additional_links_statement=_build_attach_additional_links_statement(additional_links), + ) return ingest_query From a641ad08371779ac54eff62df9636c710f8afb14 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 15 Aug 2022 14:13:07 -0700 Subject: [PATCH 02/27] Linter --- cartography/graph/cleanup_query.py | 12 ++++++------ cartography/graph/querybuilder.py | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/cartography/graph/cleanup_query.py b/cartography/graph/cleanup_query.py index b6c0253a2..64ebdb998 100644 --- a/cartography/graph/cleanup_query.py +++ b/cartography/graph/cleanup_query.py @@ -1,5 +1,5 @@ -from typing import List from string import Template +from typing import List def build_cleanup_queries( @@ -16,9 +16,9 @@ def build_cleanup_queries( MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: {$sub_resource_value}}) WHERE n.lastupdated <> {UPDATE_TAG} WITH n - LIMIT {LIMIT_SIZE} + LIMIT {LIMIT_SIZE} DETACH DELETE (n) - """ + """, ) if not sub_resource_key: sub_resource_key = 'id' @@ -39,7 +39,7 @@ def build_cleanup_queries( WHERE r.lastupdated <> {UPDATE_TAG} WITH r LIMIT {LIMIT_SIZE} DELETE r - """ + """, ) rel_cleanup_query = rel_cleanup_query_template.safe_substitute( node_label=node_label, @@ -66,7 +66,7 @@ def build_remove_attribute_query( MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: $sub_resource_value}) WHERE EXISTS (n.$attribute_name) REMOVE n.$attribute_name - """ + """, ) if not sub_resource_key: sub_resource_key = 'id' @@ -78,4 +78,4 @@ def build_remove_attribute_query( sub_resource_key=sub_resource_key, sub_resource_value=sub_resource_value, attribute_name=attribute_name, - ) \ No newline at end of file + ) diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 1251c0d29..cd50cdfa3 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -1,8 +1,9 @@ +from enum import auto +from enum import Enum from string import Template -from typing import Dict, Optional +from typing import Dict from typing import List -from enum import Enum -from enum import auto +from typing import Optional class LinkDirection(Enum): @@ -106,7 +107,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: CartographyLink) -> SubResourceRef=sub_resource_link.dict_field_ref, RelMergeClause=rel_merge_clause, SubResourceRelLabel=sub_resource_link.rel_label, - set_rel_properties_statement=_build_rel_properties_statement('r', sub_resource_link.rel_property_map) + set_rel_properties_statement=_build_rel_properties_statement('r', sub_resource_link.rel_property_map), ) return attach_sub_resource_statement @@ -115,14 +116,15 @@ def _build_attach_additional_links_statement(additional_links: List[CartographyL """ Attaches one or more CartographyLinks to node i. """ - additional_links_template = Template(""" + additional_links_template = Template( + """ WITH i, item MATCH ($node_var:$AddlLabel{$AddlKey: $AddlRef}) $RelMerge ON CREATE SET $rel_var.firstseen = timestamp() SET $set_rel_properties_statement - """ + """, ) links = [] for num, link in enumerate(additional_links): @@ -147,7 +149,7 @@ def _build_attach_additional_links_statement(additional_links: List[CartographyL node_var=node_var, rel_var=rel_var, RelMerge=rel_merge, - set_rel_properties_statement=_build_rel_properties_statement(rel_var, link.rel_property_map) + set_rel_properties_statement=_build_rel_properties_statement(rel_var, link.rel_property_map), ) links.append(additional_ref) @@ -165,7 +167,7 @@ def build_ingest_query( UNWIND {DictList} AS item MERGE (i:$node_label{id: item.$dict_id_field}) ON CREATE SET i.firstseen = timestamp() - SET + SET $set_node_properties_statement $attach_sub_resource_statement $attach_additional_links_statement From 5b3c83c1b2193c89fb2f5c4e4fa9a01617bad948 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 15 Aug 2022 14:19:24 -0700 Subject: [PATCH 03/27] Save cleanup query for another PR --- cartography/graph/cleanup_query.py | 81 ------------------------------ 1 file changed, 81 deletions(-) delete mode 100644 cartography/graph/cleanup_query.py diff --git a/cartography/graph/cleanup_query.py b/cartography/graph/cleanup_query.py deleted file mode 100644 index 64ebdb998..000000000 --- a/cartography/graph/cleanup_query.py +++ /dev/null @@ -1,81 +0,0 @@ -from string import Template -from typing import List - - -def build_cleanup_queries( - node_label: str, - rel_label: str, - sub_resource_label: str, - sub_resource_value: str, - sub_resource_key: str = None, - cleanup_rel: bool = True, -) -> List[str]: - # Convention: we must always point from the sub resource out to the resource. - node_cleanup_query_template = Template( - """ - MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: {$sub_resource_value}}) - WHERE n.lastupdated <> {UPDATE_TAG} - WITH n - LIMIT {LIMIT_SIZE} - DETACH DELETE (n) - """, - ) - if not sub_resource_key: - sub_resource_key = 'id' - - node_cleanup_query = node_cleanup_query_template.safe_substitute( - node_label=node_label, - rel_label=rel_label, - sub_resource_key=sub_resource_key, - sub_resource_label=sub_resource_label, - sub_resource_value=sub_resource_value, - ) - - result = [node_cleanup_query] - if cleanup_rel: - rel_cleanup_query_template = Template( - """ - MATCH (:$node_label)<-[r:$rel_label]-(:$sub_resource_label{$sub_resource_key: {$sub_resource_value}}) - WHERE r.lastupdated <> {UPDATE_TAG} - WITH r LIMIT {LIMIT_SIZE} - DELETE r - """, - ) - rel_cleanup_query = rel_cleanup_query_template.safe_substitute( - node_label=node_label, - rel_label=rel_label, - sub_resource_label=sub_resource_label, - sub_resource_key=sub_resource_key, - sub_resource_value=sub_resource_value, - ) - result.append(rel_cleanup_query) - - return result - - -def build_remove_attribute_query( - attribute_name: str, - node_label: str, - rel_label: str, - sub_resource_label: str, - sub_resource_value: str, - sub_resource_key: str = None, -) -> str: - attribute_removal_template = Template( - """ - MATCH (n:$node_label)<-[:$rel_label]-(:$sub_resource_label{$sub_resource_key: $sub_resource_value}) - WHERE EXISTS (n.$attribute_name) - REMOVE n.$attribute_name - """, - ) - if not sub_resource_key: - sub_resource_key = 'id' - - return attribute_removal_template.safe_substitute( - node_label=node_label, - rel_label=rel_label, - sub_resource_label=sub_resource_label, - sub_resource_key=sub_resource_key, - sub_resource_value=sub_resource_value, - attribute_name=attribute_name, - ) From 71edc8b1cf1ca227c696209eeb6b0dfd3fb98f2a Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Tue, 22 Nov 2022 21:44:29 -0800 Subject: [PATCH 04/27] Implement schema --- cartography/client/core/tx.py | 10 +- cartography/graph/model.py | 152 ++++++++++++++++ cartography/graph/querybuilder.py | 167 ++++++++++-------- cartography/intel/aws/emr.py | 116 +++++++----- .../cartography/intel/aws/test_emr.py | 4 +- tests/unit/cartography/graph/helpers.py | 10 ++ .../cartography/graph/test_querybuilder.py | 90 ---------- .../graph/test_querybuilder_complex.py | 132 ++++++++++++++ .../graph/test_querybuilder_simple.py | 107 +++++++++++ 9 files changed, 570 insertions(+), 218 deletions(-) create mode 100644 cartography/graph/model.py create mode 100644 tests/unit/cartography/graph/helpers.py delete mode 100644 tests/unit/cartography/graph/test_querybuilder.py create mode 100644 tests/unit/cartography/graph/test_querybuilder_complex.py create mode 100644 tests/unit/cartography/graph/test_querybuilder_simple.py diff --git a/cartography/client/core/tx.py b/cartography/client/core/tx.py index 179aa51dc..430650ba3 100644 --- a/cartography/client/core/tx.py +++ b/cartography/client/core/tx.py @@ -151,25 +151,19 @@ def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Dict[str def _write_list_of_dicts_tx( tx: neo4j.Transaction, query: str, - dict_list: List[Dict[Any, Any]], - update_tag: int, **kwargs, ) -> None: # TODO batch this to 10k items by default and make the batch size configurable - tx.run(query, DictList=dict_list, UpdateTag=update_tag, kwargs=kwargs) + tx.run(query, kwargs) def load_graph_data( neo4j_session: neo4j.Session, query: str, - dict_list: List[Dict[Any, Any]], - update_tag: int, **kwargs, ) -> None: neo4j_session.write_transaction( _write_list_of_dicts_tx, query, - dict_list, - update_tag, - kwargs, + **kwargs, ) diff --git a/cartography/graph/model.py b/cartography/graph/model.py new file mode 100644 index 000000000..a02d53fbe --- /dev/null +++ b/cartography/graph/model.py @@ -0,0 +1,152 @@ +import abc +from dataclasses import dataclass +from dataclasses import field +from enum import auto +from enum import Enum +from typing import List +from typing import Optional + + +class LinkDirection(Enum): + OUTWARD = auto() + INWARD = auto() + + +class PropertyRef: + """ + We dynamically build Neo4j queries and allow module authors to define a schema for their + nodes and relationships. + + The end result is we write dicts to Neo4j. To define nodes and rels, we need a mechanism + to allow the schema to refer to properties on the data dict. + + A PropertyRef is how we reference properties on the data dict when dynamically creating + queries. + """ + + def __init__(self, name: str, static=False): + # The name of the property as seen on the data dict + self.name = name + # If true, the property is not defined on the data dict. Otherwise look for the property + # in the data dict. + # TODO consider naming this something better + self.static = static + + def _parameterize_name(self) -> str: + return f"${self.name}" + + def __repr__(self) -> str: + return f"item.{self.name}" if not self.static else self._parameterize_name() + + +@dataclass +class CartographyNodeProperties(abc.ABC): + # Enforce that all subclasses will have an id and a lastupdated field + id: PropertyRef = field(init=False) + lastupdated: PropertyRef = field(init=False) + + def __post_init__(self): + if self.__class__ == CartographyNodeProperties: + raise TypeError("Cannot instantiate abstract class.") + + +@dataclass +class CartographyRelProperties(abc.ABC): + lastupdated: PropertyRef = field(init=False) + + +@dataclass +class CartographyRelSchema(abc.ABC): + @property + @abc.abstractmethod + def properties(self) -> CartographyRelProperties: + pass + + @property + @abc.abstractmethod + def target_node_label(self) -> str: + pass + + @property + @abc.abstractmethod + def target_node_key(self) -> str: + pass + + @property + @abc.abstractmethod + def rel_label(self) -> str: + pass + + @property + @abc.abstractmethod + def direction(self) -> LinkDirection: + pass + + @property + @abc.abstractmethod + # TODO name this something better maybe + def dict_field_ref(self) -> PropertyRef: + pass + + +@dataclass +class CartographyNodeSchema(abc.ABC): + _extra_labels: Optional[List[str]] = field(init=False, default=None) + _other_relationships: Optional[List[CartographyRelSchema]] = field(init=False, default=None) + + @property + @abc.abstractmethod + def label(self) -> str: + """ + :return: The primary str label of the node. + """ + pass + + @property + @abc.abstractmethod + def properties(self) -> CartographyNodeProperties: + """ + :return: The properties of the node. + """ + pass + + @property + def subresource_relationship(self) -> Optional[CartographyRelSchema]: + """ + Optional. + Allows subclasses to specify a subresource relationship for the given node. "Subresource" is a term we made up + best defined by examples: + - In the AWS module, the subresource is an AWSAccount + - In Azure, the subresource is a Subscription + - In GCP, the subresource is a GCPProject + - In Okta, the subresource is an OktaOrganization + ... and so on and so forth. + :return: + """ + return None + + @property + def other_relationships(self) -> Optional[List[CartographyRelSchema]]: + """ + Optional. + Allows subclasses to specify additional cartography relationships on the node. + :return: None of not overriden. Else return a list of CartographyRelSchema associated with the node. + """ + return self._other_relationships + + @other_relationships.setter + def other_relationships(self, other_rels: List[CartographyRelSchema]) -> None: + self._other_relationships = other_rels + + @property + def extra_labels(self) -> Optional[List[str]]: + """ + Optional. + Allows subclasses to specify extra labels on the node. + :return: None if not overriden. Else return a str list of the extra labels specified on the node. + """ + return self._extra_labels + + @extra_labels.setter + def extra_labels(self, labels: List[str]) -> None: + self._extra_labels = labels diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index cd50cdfa3..5d7934f8d 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -1,58 +1,51 @@ -from enum import auto -from enum import Enum +import logging +from copy import copy +from dataclasses import asdict +from dataclasses import Field +from dataclasses import field from string import Template +from typing import Any from typing import Dict from typing import List from typing import Optional +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef -class LinkDirection(Enum): - OUTWARD = auto() - INWARD = auto() +logger = logging.getLogger(__name__) -class PropertyRef: - def __init__(self, name: str, static=False): - self.name = name - self.static = static - def _parameterize_name(self) -> str: - # TODO in neo4j 4.x, we will want to change this to `${self.name}` instead - # of "{" + self.name "}" - return "{" + self.name + "}" - - def __repr__(self) -> str: - return f"item.{self.name}" if not self.static else self._parameterize_name() +def default_field(obj: Any) -> Field: + """ + Helper function from https://stackoverflow.com/questions/52063759/passing-default-list-argument-to-dataclasses. + We use this so that we can work around how dataclass fields disallow mutable objects by wrapping them in lambdas. + Put another way, writing `field(default_factory=lambda: ['Label1', 'Label2'])` is so much more work than writing + `default_field(['Label1', 'Label2']`. + Note that if the Field is decorated with @property (like everything in our object model), then we will need to also + use this technique to correctly implement the setter: + https://florimond.dev/en/posts/2018/10/reconciling-dataclasses-and-properties-in-python/. -class CartographyLink: - def __init__( - self, - label: str, - key: str, - dict_field_ref: PropertyRef, - rel_label: str, - direction: LinkDirection = None, - rel_property_map: Dict[str, PropertyRef] = None, - ): - self.label = label - self.key = key - self.dict_field_ref = dict_field_ref - self.rel_label = rel_label - self.direction = LinkDirection.INWARD if not direction else direction - self.rel_property_map = rel_property_map + :param obj: The mutable default object (e.g. a List) that we want to set as a default for a dataclass field. + :return: A dataclass Field object. + """ + return field(default_factory=lambda: copy(obj)) def _build_node_properties_statement( node_property_map: Dict[str, PropertyRef], - node_extra_labels: List[str], + node_extra_labels: Optional[List[str]], ) -> Optional[str]: - ingest_fields_template = Template(' i.$node_property = $property_ref') - set_clause = 'i.lastupdated = {UpdateTag}' + ingest_fields_template = Template('i.$node_property = $property_ref') + set_clause = '' # If the node_property_map contains more than just `id`, generate a SET statement for the other fields. if len(node_property_map.keys()) > 1: - set_clause += ',\n' + ',\n'.join([ + set_clause += ',\n'.join([ ingest_fields_template.safe_substitute(node_property=node_property, property_ref=property_ref) for node_property, property_ref in node_property_map.items() if node_property != 'id' # Make sure to exclude setting the `id` again. @@ -65,12 +58,12 @@ def _build_node_properties_statement( return set_clause -def _build_rel_properties_statement(rel_var: str, rel_property_map: Dict[str, PropertyRef] = None) -> str: - set_clause = rel_var + '.lastupdated = {UpdateTag}' - ingest_fields_template = Template(' $rel_var.$rel_property = $property_ref') +def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dict[str, PropertyRef]] = None) -> str: + set_clause = '' + ingest_fields_template = Template('$rel_var.$rel_property = $property_ref') if rel_property_map: - set_clause += ',\n' + ',\n'.join([ + set_clause += ',\n'.join([ ingest_fields_template.safe_substitute( rel_var=rel_var, rel_property=rel_property, @@ -81,18 +74,23 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Dict[str, Pr return set_clause -def _build_attach_sub_resource_statement(sub_resource_link: CartographyLink) -> str: +def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema]) -> str: """ Attaches sub resource to node i. """ - sub_resource_attach_template = Template(""" + if not sub_resource_link: + return '' + + sub_resource_attach_template = Template( + """ WITH i, item MATCH (j:$SubResourceLabel{$SubResourceKey: $SubResourceRef}) $RelMergeClause ON CREATE SET r.firstseen = timestamp() SET $set_rel_properties_statement - """) + """, + ) if sub_resource_link.direction == LinkDirection.INWARD: rel_merge_template = Template("""MERGE (i)<-[r:$SubResourceRelLabel]-(j)""") @@ -101,21 +99,27 @@ def _build_attach_sub_resource_statement(sub_resource_link: CartographyLink) -> rel_merge_clause = rel_merge_template.safe_substitute(SubResourceRelLabel=sub_resource_link.rel_label) + rel_props_as_dict: Dict[str, PropertyRef] = asdict(sub_resource_link.properties) + attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( - SubResourceLabel=sub_resource_link.label, - SubResourceKey=sub_resource_link.key, + SubResourceLabel=sub_resource_link.target_node_label, + SubResourceKey=sub_resource_link.target_node_key, SubResourceRef=sub_resource_link.dict_field_ref, RelMergeClause=rel_merge_clause, SubResourceRelLabel=sub_resource_link.rel_label, - set_rel_properties_statement=_build_rel_properties_statement('r', sub_resource_link.rel_property_map), + set_rel_properties_statement=_build_rel_properties_statement('r', rel_props_as_dict), ) return attach_sub_resource_statement -def _build_attach_additional_links_statement(additional_links: List[CartographyLink]) -> str: +def _build_attach_additional_links_statement(additional_links: Optional[List[CartographyRelSchema]]) -> str: """ - Attaches one or more CartographyLinks to node i. + Attaches one or more CartographyRels to node i. """ + if not additional_links: + return '' + + # TODO - support matching on multiple properties additional_links_template = Template( """ WITH i, item @@ -124,7 +128,7 @@ def _build_attach_additional_links_statement(additional_links: List[CartographyL ON CREATE SET $rel_var.firstseen = timestamp() SET $set_rel_properties_statement - """, + """, ) links = [] for num, link in enumerate(additional_links): @@ -132,7 +136,7 @@ def _build_attach_additional_links_statement(additional_links: List[CartographyL rel_var = f"r{num}" if link.direction == LinkDirection.INWARD: - rel_merge_template = Template("""MERGE (i)-[$rel_var:$AddlRelLabel]->($node_var)""") + rel_merge_template = Template("""MERGE (i)<-[$rel_var:$AddlRelLabel]-($node_var)""") else: rel_merge_template = Template("""MERGE (i)-[$rel_var:$AddlRelLabel]->($node_var)""") @@ -142,44 +146,55 @@ def _build_attach_additional_links_statement(additional_links: List[CartographyL node_var=node_var, ) + # Give a helpful error message when forgetting to put `()` when instantiating a CartographyRelSchema, as this + # somehow isn't caught by IDEs like PyCharm. + try: + rel_props_as_dict: Dict[str, PropertyRef] = asdict(link.properties) + except TypeError as e: + if e.args and e.args[0] and e.args == 'asdict() should be called on dataclass instances': + logger.error( + f'TypeError thrown when trying to draw relation "{link.rel_label}" to a "{link.target_node_label}" ' + f'node. Please make sure that you did not forget to write `()` when specifying `properties` in the' + f'dataclass. ' + f'For example, do `properties: RelProp = RelProp()`; NOT `properties: RelProp = RelProp`.', + ) + raise + additional_ref = additional_links_template.safe_substitute( - AddlLabel=link.label, - AddlKey=link.key, + AddlLabel=link.target_node_label, + AddlKey=link.target_node_key, AddlRef=link.dict_field_ref, node_var=node_var, rel_var=rel_var, RelMerge=rel_merge, - set_rel_properties_statement=_build_rel_properties_statement(rel_var, link.rel_property_map), + set_rel_properties_statement=_build_rel_properties_statement(rel_var, rel_props_as_dict), ) links.append(additional_ref) return '\n'.join(links) -def build_ingest_query( - node_label: str, - node_property_map: Dict[str, PropertyRef], - sub_resource_link: CartographyLink, - additional_links: List[CartographyLink] = None, - node_extra_labels: List[str] = None, -) -> str: - query_template = Template(""" - UNWIND {DictList} AS item - MERGE (i:$node_label{id: item.$dict_id_field}) - ON CREATE SET i.firstseen = timestamp() - SET - $set_node_properties_statement - $attach_sub_resource_statement - $attach_additional_links_statement - """) - if 'id' not in node_property_map or not node_property_map['id']: - raise ValueError('node_property_map must have key `id` set.') +def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: + query_template = Template( + """ + UNWIND $DictList AS item + MERGE (i:$node_label{id: $dict_id_field}) + ON CREATE SET i.firstseen = timestamp() + SET + $set_node_properties_statement + $attach_sub_resource_statement + $attach_additional_links_statement + """, + ) + + node_props: CartographyNodeProperties = node_schema.properties + node_props_as_dict: Dict[str, PropertyRef] = asdict(node_props) ingest_query = query_template.safe_substitute( - node_label=node_label, - dict_id_field=node_property_map['id'], - set_node_properties_statement=_build_node_properties_statement(node_property_map, node_extra_labels), - attach_sub_resource_statement=_build_attach_sub_resource_statement(sub_resource_link), - attach_additional_links_statement=_build_attach_additional_links_statement(additional_links), + node_label=node_schema.label, + dict_id_field=node_props.id, + set_node_properties_statement=_build_node_properties_statement(node_props_as_dict, node_schema.extra_labels), + attach_sub_resource_statement=_build_attach_sub_resource_statement(node_schema.subresource_relationship), + attach_additional_links_statement=_build_attach_additional_links_statement(node_schema.other_relationships), ) return ingest_query diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 27a900d72..7e0d151d3 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -1,5 +1,6 @@ import logging import time +from dataclasses import dataclass from typing import Dict from typing import List @@ -7,6 +8,14 @@ import botocore.exceptions import neo4j +from cartography.client.core.tx import load_graph_data +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef +from cartography.graph.querybuilder import build_ingestion_query from cartography.intel.aws.ec2.util import get_botocore_config from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job @@ -48,53 +57,76 @@ def get_emr_describe_cluster(boto3_session: boto3.session.Session, region: str, return cluster_details +@dataclass +class EMRClusterNodeProperties(CartographyNodeProperties): + arn: PropertyRef = PropertyRef('ClusterArn') + auto_terminate: PropertyRef = PropertyRef('AutoTerminate') + autoscaling_role: PropertyRef = PropertyRef('AutoScalingRole') + custom_ami_id: PropertyRef = PropertyRef('CustomAmiId') + firstseen: PropertyRef = PropertyRef('firstseen') + id: PropertyRef = PropertyRef('Id') + instance_collection_type: PropertyRef = PropertyRef('InstanceCollectionType') + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + log_encryption_kms_key_id: PropertyRef = PropertyRef('LogEncryptionKmsKeyId') + log_uri: PropertyRef = PropertyRef('LogUri') + master_public_dns_name: PropertyRef = PropertyRef('MasterPublicDnsName') + name: PropertyRef = PropertyRef('Name') + outpost_arn: PropertyRef = PropertyRef('OutpostArn') + region: PropertyRef = PropertyRef('Region', static=True) + release_label: PropertyRef = PropertyRef('ReleaseLabel') + repo_upgrade_on_boot: PropertyRef = PropertyRef('RepoUpgradeOnBoot') + requested_ami_version: PropertyRef = PropertyRef('RequestedAmiVersion') + running_ami_version: PropertyRef = PropertyRef('RunningAmiVersion') + scale_down_behavior: PropertyRef = PropertyRef('ScaleDownBehavior') + security_configuration: PropertyRef = PropertyRef('SecurityConfiguration') + servicerole: PropertyRef = PropertyRef('ServiceRole') + termination_protected: PropertyRef = PropertyRef('TerminationProtected') + visible_to_all_users: PropertyRef = PropertyRef('VisibleToAllUsers') + + +@dataclass +class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + + +@dataclass +# (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) +class EMRClusterToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() + dict_field_ref: PropertyRef = PropertyRef('AccountId', static=True) + + +@dataclass +class EMRClusterSchema(CartographyNodeSchema): + label: str = 'EMRCluster' + properties: EMRClusterNodeProperties = EMRClusterNodeProperties() + subresource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() + + @timeit def load_emr_clusters( - neo4j_session: neo4j.Session, cluster_data: List[Dict], region: str, current_aws_account_id: str, - aws_update_tag: int, + neo4j_session: neo4j.Session, + cluster_data: List[Dict], + region: str, + current_aws_account_id: str, + aws_update_tag: int, ) -> None: - query = """ - UNWIND {Clusters} as emr_cluster - MERGE (cluster:EMRCluster{id: emr_cluster.Name}) - ON CREATE SET cluster.firstseen = timestamp(), - cluster.arn = emr_cluster.ClusterArn, - cluster.id = emr_cluster.Id, - cluster.region = {Region} - SET cluster.name = emr_cluster.Name, - cluster.instance_collection_type = emr_cluster.InstanceCollectionType, - cluster.log_encryption_kms_key_id = emr_cluster.LogEncryptionKmsKeyId, - cluster.requested_ami_version = emr_cluster.RequestedAmiVersion, - cluster.running_ami_version = emr_cluster.RunningAmiVersion, - cluster.release_label = emr_cluster.ReleaseLabel, - cluster.auto_terminate = emr_cluster.AutoTerminate, - cluster.termination_protected = emr_cluster.TerminationProtected, - cluster.visible_to_all_users = emr_cluster.VisibleToAllUsers, - cluster.master_public_dns_name = emr_cluster.MasterPublicDnsName, - cluster.security_configuration = emr_cluster.SecurityConfiguration, - cluster.autoscaling_role = emr_cluster.AutoScalingRole, - cluster.scale_down_behavior = emr_cluster.ScaleDownBehavior, - cluster.custom_ami_id = emr_cluster.CustomAmiId, - cluster.repo_upgrade_on_boot = emr_cluster.RepoUpgradeOnBoot, - cluster.outpost_arn = emr_cluster.OutpostArn, - cluster.log_uri = emr_cluster.LogUri, - cluster.servicerole = emr_cluster.ServiceRole, - cluster.lastupdated = {aws_update_tag} - WITH cluster - - MATCH (owner:AWSAccount{id: {AWS_ACCOUNT_ID}}) - MERGE (owner)-[r:RESOURCE]->(cluster) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = {aws_update_tag} - """ - logger.info("Loading EMR %d clusters for region '%s' into graph.", len(cluster_data), region) - neo4j_session.run( - query, - Clusters=cluster_data, + + ingestion_query = build_ingestion_query(EMRClusterSchema()) + + load_graph_data( + neo4j_session, + ingestion_query, + DictList=cluster_data, + lastupdated=aws_update_tag, Region=region, - aws_update_tag=aws_update_tag, - AWS_ACCOUNT_ID=current_aws_account_id, - ).consume() + AccountId=current_aws_account_id, + ) @timeit diff --git a/tests/integration/cartography/intel/aws/test_emr.py b/tests/integration/cartography/intel/aws/test_emr.py index 35b6ca7da..9df265976 100644 --- a/tests/integration/cartography/intel/aws/test_emr.py +++ b/tests/integration/cartography/intel/aws/test_emr.py @@ -35,9 +35,9 @@ def test_load_emr_clusters_relationships(neo4j_session): # Create Test AWSAccount neo4j_session.run( """ - MERGE (aws:AWSAccount{id: {aws_account_id}}) + MERGE (aws:AWSAccount{id: $aws_account_id}) ON CREATE SET aws.firstseen = timestamp() - SET aws.lastupdated = {aws_update_tag} + SET aws.lastupdated = $aws_update_tag """, aws_account_id=TEST_ACCOUNT_ID, aws_update_tag=TEST_UPDATE_TAG, diff --git a/tests/unit/cartography/graph/helpers.py b/tests/unit/cartography/graph/helpers.py new file mode 100644 index 000000000..3dbb4c11f --- /dev/null +++ b/tests/unit/cartography/graph/helpers.py @@ -0,0 +1,10 @@ +def remove_leading_whitespace_and_empty_lines(text: str) -> str: + """ + Helper function for tests. + On the given text string, remove all leading whitespace on each line and remove blank lines, + :param text: Text string + :return: The text string but with no leading whitespace and no blank lines. + """ + # We call lstrip() twice on the same line. This is inefficient but ok for small unit tests. + # Please change it if you want to. + return '\n'.join([line.lstrip() for line in text.split('\n') if line.lstrip() != '']) diff --git a/tests/unit/cartography/graph/test_querybuilder.py b/tests/unit/cartography/graph/test_querybuilder.py deleted file mode 100644 index fdcb1546e..000000000 --- a/tests/unit/cartography/graph/test_querybuilder.py +++ /dev/null @@ -1,90 +0,0 @@ -from cartography.graph.querybuilder import build_node_ingestion_query -from cartography.graph.querybuilder import build_relationship_ingestion_query - - -def test_build_node_ingestion_query(): - query = build_node_ingestion_query( - 'EC2Instance', - { - 'id': 'Arn', - 'arn': 'Arn', - 'publicdnsname': 'PublicDnsName', - 'privateipaddress': 'PrivateIpAddress', - 'publicipaddress': 'PublicIpAddress', - 'imageid': 'ImageId', - 'instancetype': 'InstanceType', - 'monitoringstate': 'MonitoringState', - 'state': 'State', - 'launchtime': 'LaunchTime', - 'launchtimeunix': 'LaunchTimeUnix', - 'region': 'Region', - 'iaminstanceprofile': 'IamInstanceProfile', - }, - ) - assert query == """ - UNWIND {DictList} AS item - MERGE (i:EC2Instance{id:item.Arn}) - ON CREATE SET i.firstseen = timestamp() - SET i.lastupdated = {UpdateTag}, - i.arn = item.Arn, - i.publicdnsname = item.PublicDnsName, - i.privateipaddress = item.PrivateIpAddress, - i.publicipaddress = item.PublicIpAddress, - i.imageid = item.ImageId, - i.instancetype = item.InstanceType, - i.monitoringstate = item.MonitoringState, - i.state = item.State, - i.launchtime = item.LaunchTime, - i.launchtimeunix = item.LaunchTimeUnix, - i.region = item.Region, - i.iaminstanceprofile = item.IamInstanceProfile""" - - -def test_build_node_ingestion_query_only_id(): - query = build_node_ingestion_query( - 'SomeNodeWithOnlyAnId', - { - 'id': 'IdOnTheDictObject', - }, - ) - assert query == """ - UNWIND {DictList} AS item - MERGE (i:SomeNodeWithOnlyAnId{id:item.IdOnTheDictObject}) - ON CREATE SET i.firstseen = timestamp() - SET i.lastupdated = {UpdateTag}""" - - -def test_build_relationship_ingestion_query(): - query = build_relationship_ingestion_query( - 'AWSAccount', 'id', 'Id', - 'EC2Instance', 'instanceid', 'InstanceId', - 'RESOURCE', - ) - assert query == """ - UNWIND {RelMappingList} AS item - MATCH (a:AWSAccount{id:item.Id}) - MATCH (b:EC2Instance{instanceid:item.InstanceId}) - MERGE (a)-[r:RESOURCE]->(b) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = {UpdateTag}""" - - -def test_build_relationship_with_attributes_query(): - query = build_relationship_ingestion_query( - 'Service', 'name', 'Name', - 'GoLibrary', 'id', 'Id', - 'REQUIRES', - { - 'libraryspecifier': 'LibrarySpecifier', - 'someotherrelfield': 'SomeOtherRelField', - }, - ) - assert query == """ - UNWIND {RelMappingList} AS item - MATCH (a:Service{name:item.Name}) - MATCH (b:GoLibrary{id:item.Id}) - MERGE (a)-[r:REQUIRES]->(b) - ON CREATE SET r.firstseen = timestamp() - SET r.lastupdated = {UpdateTag}, - r.libraryspecifier = item.LibrarySpecifier, - r.someotherrelfield = item.SomeOtherRelField""" diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py new file mode 100644 index 000000000..1541a23de --- /dev/null +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -0,0 +1,132 @@ +from dataclasses import dataclass +from dataclasses import Field +from typing import Any +from typing import List +from typing import Optional + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef +from cartography.graph.querybuilder import build_ingestion_query +from cartography.graph.querybuilder import default_field +from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines +from tests.unit.cartography.graph.test_querybuilder_simple import SimpleNodeProperties + + +@dataclass +class InterestingAssetProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + property1: PropertyRef = PropertyRef('property1') + property2: PropertyRef = PropertyRef('property2') + + +@dataclass +class InterestingAssetToSubResourceRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + another_rel_field: PropertyRef = PropertyRef('AnotherField') + yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") + + +# (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) +@dataclass +class InterestingAssetToSubResourceRel(CartographyRelSchema): + target_node_label: str = 'SubResource' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RELATIONSHIP_LABEL" + properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() + dict_field_ref: PropertyRef = PropertyRef('subresource_id', static=True) + + +@dataclass +class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + + +# (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) +@dataclass +class InterestingAssetToHelloAssetRel(CartographyRelSchema): + target_node_label: str = 'HelloAsset' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "ASSOCIATED_WITH" + properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() + dict_field_ref: PropertyRef = PropertyRef('hello_asset_id') + + +@dataclass +class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + + +# (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) +@dataclass +class InterestingAssetToWorldAssetRel(CartographyRelSchema): + target_node_label: str = 'WorldAsset' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "CONNECTED" + properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() + dict_field_ref: PropertyRef = PropertyRef('world_asset_id') + + +@dataclass +class InterestingAssetSchema(CartographyNodeSchema): + extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) + label: str = 'InterestingNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + subresource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() + other_relationships: Field[Any] = default_field( + [ + InterestingAssetToHelloAssetRel(), + InterestingAssetToWorldAssetRel(), + ], + ) + + +def test_build_ingestion_query_complex(): + # Act + query = build_ingestion_query(InterestingAssetSchema()) + + expected = """ + UNWIND $DictList AS item + MERGE (i:InterestingNode{id: item.Id}) + ON CREATE SET i.firstseen = timestamp() + SET + i.lastupdated = $lastupdated, + i.property1 = item.property1, + i.property2 = item.property2, + i:AnotherNodeLabel:YetAnotherNodeLabel + + WITH i, item + MATCH (j:SubResource{id: $subresource_id}) + MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) + ON CREATE SET r.firstseen = timestamp() + SET + r.lastupdated = $lastupdated, + r.another_rel_field = item.AnotherField, + r.yet_another_rel_field = item.YetAnotherRelField + + WITH i, item + MATCH (n0:HelloAsset{id: item.hello_asset_id}) + MERGE (i)-[r0:ASSOCIATED_WITH]->(n0) + ON CREATE SET r0.firstseen = timestamp() + SET + r0.lastupdated = $lastupdated + + WITH i, item + MATCH (n1:WorldAsset{id: item.world_asset_id}) + MERGE (i)<-[r1:CONNECTED]-(n1) + ON CREATE SET r1.firstseen = timestamp() + SET + r1.lastupdated = $lastupdated + """ + + # Assert: compare query outputs while ignoring leading whitespace. + actual_query = remove_leading_whitespace_and_empty_lines(query) + expected_query = remove_leading_whitespace_and_empty_lines(expected) + assert actual_query == expected_query diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py new file mode 100644 index 000000000..e72711cb9 --- /dev/null +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -0,0 +1,107 @@ +""" +Test cases + +- Single node label +- Multiple node labels +- Node properties x +- Relationship properties x +- Additional links +""" +from dataclasses import dataclass + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef +from cartography.graph.querybuilder import build_ingestion_query +from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines + + +@dataclass +class SimpleNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + property1: PropertyRef = PropertyRef('property1') + property2: PropertyRef = PropertyRef('property2') + + +@dataclass +class SimpleNodeToSubResourceRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + + +@dataclass +# (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) +class SimpleNodeToSubResourceRel(CartographyRelSchema): + target_node_label: str = 'SubResource' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RELATIONSHIP_LABEL" + properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() + dict_field_ref: PropertyRef = PropertyRef('subresource_id', static=True) + + +@dataclass +class SimpleNodeSchema(CartographyNodeSchema): + label: str = 'SimpleNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + + +@dataclass +class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): + """ + Same as SimpleNodeSchema but with a subresource relationship now. + """ + label: str = 'SimpleNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + subresource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() + + +def test_simplenode_sanity_checks(): + """ + Test creating a simple node schema with no relationships + """ + schema: SimpleNodeSchema = SimpleNodeSchema() + # Assert that the unimplemented, non-abstract properties have None values. + assert schema.extra_labels is None + assert schema.subresource_relationship is None + assert schema.other_relationships is None + + +def test_simplenode_with_subresource_sanity_checks(): + """ + Test creating a simple node schema with a subresource relationship + """ + schema: SimpleNodeWithSubResourceSchema = SimpleNodeWithSubResourceSchema() + # Assert that the unimplemented, non-abstract properties have None values. + assert schema.extra_labels is None + assert schema.other_relationships is None + + +def test_build_ingestion_query_with_subresource(): + # Act + query = build_ingestion_query(SimpleNodeWithSubResourceSchema()) + + expected = """ + UNWIND $DictList AS item + MERGE (i:SimpleNode{id: item.Id}) + ON CREATE SET i.firstseen = timestamp() + SET + i.lastupdated = $lastupdated, + i.property1 = item.property1, + i.property2 = item.property2 + + WITH i, item + MATCH (j:SubResource{id: $subresource_id}) + MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) + ON CREATE SET r.firstseen = timestamp() + SET + r.lastupdated = $lastupdated + """ + + # Assert: compare query outputs while ignoring leading whitespace. + actual_query = remove_leading_whitespace_and_empty_lines(query) + expected_query = remove_leading_whitespace_and_empty_lines(expected) + assert actual_query == expected_query From 519cce97f817e2fa171c8a7f351f1531a782306a Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Tue, 22 Nov 2022 22:06:10 -0800 Subject: [PATCH 05/27] bump mypy to 0.981 for python/mypy#13398 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7a5a2c21f..449a7b9bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -39,7 +39,7 @@ repos: - id: reorder-python-imports args: [--py3-plus] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.950 + rev: v0.981 hooks: - id: mypy additional_dependencies: From b90014630654c4d0fbcad4359dbbfde28bfb61bb Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Tue, 22 Nov 2022 22:06:27 -0800 Subject: [PATCH 06/27] linter --- cartography/graph/querybuilder.py | 3 +-- tests/unit/cartography/graph/test_querybuilder_complex.py | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 5d7934f8d..70ea3a833 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -1,7 +1,6 @@ import logging from copy import copy from dataclasses import asdict -from dataclasses import Field from dataclasses import field from string import Template from typing import Any @@ -19,7 +18,7 @@ logger = logging.getLogger(__name__) -def default_field(obj: Any) -> Field: +def default_field(obj: Any): """ Helper function from https://stackoverflow.com/questions/52063759/passing-default-list-argument-to-dataclasses. We use this so that we can work around how dataclass fields disallow mutable objects by wrapping them in lambdas. diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index 1541a23de..9642d43a9 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -1,6 +1,4 @@ from dataclasses import dataclass -from dataclasses import Field -from typing import Any from typing import List from typing import Optional @@ -80,7 +78,7 @@ class InterestingAssetSchema(CartographyNodeSchema): label: str = 'InterestingNode' properties: SimpleNodeProperties = SimpleNodeProperties() subresource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() - other_relationships: Field[Any] = default_field( + other_relationships: Optional[List[CartographyRelSchema]] = default_field( [ InterestingAssetToHelloAssetRel(), InterestingAssetToWorldAssetRel(), From 05973fb5e5389c16d4205834cc516f756ed6eb6c Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 23 Nov 2022 09:23:52 -0800 Subject: [PATCH 07/27] make load_graph_data interface make more sense --- cartography/client/core/tx.py | 48 +++++++++++++++++++++++++++++++++-- cartography/intel/aws/emr.py | 2 +- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/cartography/client/core/tx.py b/cartography/client/core/tx.py index 430650ba3..ed72ab0be 100644 --- a/cartography/client/core/tx.py +++ b/cartography/client/core/tx.py @@ -148,11 +148,40 @@ def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Dict[str return value -def _write_list_of_dicts_tx( +def write_list_of_dicts_tx( tx: neo4j.Transaction, query: str, **kwargs, ) -> None: + """ + Writes a list of dicts to Neo4j. This is called by passing it as a function param to neo4j.write_transaction(). + + Example usage: + dict_list: List[Dict[Any, Any]] = [{...}, ...] + + neo4j_driver = neo4j.driver(... args ...) + neo4j_session = neo4j_driver.Session(... args ...) + + neo4j_session.write_transaction( + write_list_of_dicts_tx, + ''' + UNWIND $DictList as data + MERGE (a:SomeNode{id: data.id}) + SET + a.other_field = $other_field, + a.yet_another_kwarg_field = $yet_another_kwarg_field + ... + ''', + DictList=dict_list, + other_field='some extra value', + yet_another_kwarg_field=1234 + ) + + :param tx: The neo4j transaction + :param query: The Neo4j write query that you want to run + :param kwargs: Keyword args to be supplied to the Neo4j query + :return: None + """ # TODO batch this to 10k items by default and make the batch size configurable tx.run(query, kwargs) @@ -160,10 +189,25 @@ def _write_list_of_dicts_tx( def load_graph_data( neo4j_session: neo4j.Session, query: str, + dict_list: List[Dict[str, Any]], **kwargs, ) -> None: + """ + Writes data to the graph. + :param neo4j_session: The Neo4j session + :param query: The Neo4j write query to run. This must follow the UNWIND + MERGE pattern. For example: + UNWIND $DictList as item + MERGE (a:SomeNode{id: item.id}) + SET + a.field1 = item.field1, + ... + :param dict_list: The data to load to the graph represented as a list of dicts. + :param kwargs: Keyword args to be supplied to the Neo4j query. MUST have 'DictList' as a key. + :return: None + """ neo4j_session.write_transaction( - _write_list_of_dicts_tx, + write_list_of_dicts_tx, query, + DictList=dict_list, **kwargs, ) diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 7e0d151d3..ec0b0b08d 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -122,7 +122,7 @@ def load_emr_clusters( load_graph_data( neo4j_session, ingestion_query, - DictList=cluster_data, + cluster_data, lastupdated=aws_update_tag, Region=region, AccountId=current_aws_account_id, From b6f3faffdfc8444c8fec36a05e819016af54825b Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 23 Nov 2022 09:25:09 -0800 Subject: [PATCH 08/27] fix comment --- cartography/client/core/tx.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cartography/client/core/tx.py b/cartography/client/core/tx.py index ed72ab0be..8265bdfcf 100644 --- a/cartography/client/core/tx.py +++ b/cartography/client/core/tx.py @@ -177,9 +177,9 @@ def write_list_of_dicts_tx( yet_another_kwarg_field=1234 ) - :param tx: The neo4j transaction - :param query: The Neo4j write query that you want to run - :param kwargs: Keyword args to be supplied to the Neo4j query + :param tx: The neo4j transaction. + :param query: The Neo4j write query that you want to run. + :param kwargs: Keyword args to be supplied to the Neo4j query. :return: None """ # TODO batch this to 10k items by default and make the batch size configurable @@ -202,7 +202,7 @@ def load_graph_data( a.field1 = item.field1, ... :param dict_list: The data to load to the graph represented as a list of dicts. - :param kwargs: Keyword args to be supplied to the Neo4j query. MUST have 'DictList' as a key. + :param kwargs: Keyword args to be supplied to the Neo4j query. :return: None """ neo4j_session.write_transaction( From aafa38d8e4e58255e9f00438cc3f5ca6ec0faee1 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 23 Nov 2022 22:50:47 -0800 Subject: [PATCH 09/27] Docs and some better names --- cartography/graph/model.py | 144 +++++++++++++++--- cartography/graph/querybuilder.py | 98 ++++++++++-- cartography/intel/aws/emr.py | 4 +- .../graph/test_querybuilder_complex.py | 25 ++- .../graph/test_querybuilder_simple.py | 15 +- 5 files changed, 240 insertions(+), 46 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index a02d53fbe..642ba537d 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -8,27 +8,51 @@ class LinkDirection(Enum): - OUTWARD = auto() + """ + If a CartographyNodeSchema has relationships, then it will have one or more CartographyRelSchemas. + + Each CartographyRelSchema has a LinkDirection that determines whether the relationship points toward the original + node ("INWARD") or away from the original node ("OUTWARD"). + + For example the following code creates the path `(:EMRCluster)<-[:RESOURCE]-(:AWSAccount)`: + + class EMRCluster(CartographyNodeSchema): + label: str = "EMRCluster" + sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() + # ... + + class EMRClusterToAWSAccount(CartographyRelSchema): + target_node_label: str = "AWSAccount" + rel_label: str = "RESOURCE" + direction: LinkDirection = LinkDirection.INWARD + # ... + + If `EMRClusterToAWSAccount.direction` was LinkDirection.OUTWARD, then the directionality of the relationship would + be `(:EMRCluster)-[:RESOURCE]->(:AWSAccount)` instead. + """ INWARD = auto() + OUTWARD = auto() class PropertyRef: """ - We dynamically build Neo4j queries and allow module authors to define a schema for their - nodes and relationships. + We dynamically build Neo4j queries and allow module authors to define a schema for their nodes and relationships. - The end result is we write dicts to Neo4j. To define nodes and rels, we need a mechanism - to allow the schema to refer to properties on the data dict. + The end result is we write dicts to Neo4j. To define nodes and rels, we need a mechanism to allow the schema to + refer to properties on the data dict. - A PropertyRef is how we reference properties on the data dict when dynamically creating - queries. + A PropertyRef is how we reference properties on the data dict when dynamically creating queries. """ def __init__(self, name: str, static=False): - # The name of the property as seen on the data dict + """ + :param name: The name of the property as seen on the data dict + :param static: If true, the property is not defined on the data dict, and we expect to find the property in the + kwargs. + If False, looks for the property in the data dict. + Defaults to False. + """ self.name = name - # If true, the property is not defined on the data dict. Otherwise look for the property - # in the data dict. # TODO consider naming this something better self.static = static @@ -41,56 +65,142 @@ def __repr__(self) -> str: @dataclass class CartographyNodeProperties(abc.ABC): - # Enforce that all subclasses will have an id and a lastupdated field + """ + Abstract base dataclass that represents the properties on a CartographyNodeSchema. This is intended to enforce that + all subclasses will have an id and a lastupdated field defined on their resulting nodes. + """ id: PropertyRef = field(init=False) lastupdated: PropertyRef = field(init=False) def __post_init__(self): + """ + Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a + dataclass. + """ if self.__class__ == CartographyNodeProperties: raise TypeError("Cannot instantiate abstract class.") @dataclass class CartographyRelProperties(abc.ABC): + """ + Abstract class that represents the properties on a CartographyRelSchema. This is intended to enforce that all + subclasses will have a lastupdated field defined on their resulting relationships. + """ lastupdated: PropertyRef = field(init=False) + def __post_init__(self): + """ + Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a + dataclass. + """ + if self.__class__ == CartographyRelProperties: + raise TypeError("Cannot instantiate abstract class.") + @dataclass class CartographyRelSchema(abc.ABC): + """ + Abstract base dataclass that represents a cartography relationship. + + A CartographyNodeSchema is composed of a CartographyRelSchema. The CartographyRelSchema contains properties that + make it possible to connect the CartographyNodeSchema to other existing nodes in the graph. + + As example usage, this code: + + class EMRCluster(CartographyNodeSchema): + label: str = "EMRCluster" + properties: EMRClusterNodeProperties = EMRClusterNodeProperties() + sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() + + class EMRClusterToAWSAccount(CartographyRelSchema): + target_node_label: str = 'AWSAccount' + target_node_key: str = 'id' + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RESOURCE" + properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() + target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', static=True) + + + generates a Neo4j query that looks like this: + + UNWIND $DictList AS item + MERGE (i:EMRCluster{id: <... Expand the EMRClusterNodeProperties here ...>}) + ON CREATE SET i.firstseen = timestamp() + SET + // ... Expand EMRClusterNodeProperties here ... + + WITH i, item + MATCH (j:AWSAccount{id: $AccountId}) + MERGE (i)<-[r:RESOURCE]-(j) + ON CREATE SET r.firstseen = timestamp() + SET + // ... Expand EMRClusterToAwsAccountRelProperties here ... + """ @property @abc.abstractmethod def properties(self) -> CartographyRelProperties: + """ + :return: The properties of the relationship. + """ pass @property @abc.abstractmethod def target_node_label(self) -> str: + """ + :return: The target node label that this relationship will connect to. + """ pass @property @abc.abstractmethod def target_node_key(self) -> str: + """ + :return: The attribute on the target node used to uniquely identify what node to connect to. + """ pass @property @abc.abstractmethod - def rel_label(self) -> str: + def target_node_key_property_ref(self) -> PropertyRef: + """ + :return: The value of the target node attribute used to uniquely identify what node to connect to. + This is given as a PropertyRef. + """ pass @property @abc.abstractmethod - def direction(self) -> LinkDirection: + def rel_label(self) -> str: + """ + :return: The str label of the relationship. + """ pass @property @abc.abstractmethod - # TODO name this something better maybe - def dict_field_ref(self) -> PropertyRef: + def direction(self) -> LinkDirection: + """ + :return: The LinkDirection of the query. Please see the `LinkDirection` docs for a detailed explanation. + """ pass @dataclass class CartographyNodeSchema(abc.ABC): + """ + Abstract base dataclass that represents a graph node in cartography. This is used to dynamically generate graph + ingestion queries. + + A CartographyNodeSchema is composed of: + + - CartographyNodeProperties: contains the properties on the node and where to find their values with PropertyRef + objects. + - [Optional] A CartographyRelSchema pointing to the node's sub-resource (see the docstring on + `sub_resource_relationship` for details. + - [Optional] One or more other CartographyRelSchemas to other nodes. + """ _extra_labels: Optional[List[str]] = field(init=False, default=None) _other_relationships: Optional[List[CartographyRelSchema]] = field(init=False, default=None) @@ -111,10 +221,10 @@ def properties(self) -> CartographyNodeProperties: pass @property - def subresource_relationship(self) -> Optional[CartographyRelSchema]: + def sub_resource_relationship(self) -> Optional[CartographyRelSchema]: """ Optional. - Allows subclasses to specify a subresource relationship for the given node. "Subresource" is a term we made up + Allows subclasses to specify a subresource relationship for the given node. "Sub resource" is a term we made up best defined by examples: - In the AWS module, the subresource is an AWSAccount - In Azure, the subresource is a Subscription diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 70ea3a833..1f5222d0e 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -37,8 +37,31 @@ def default_field(obj: Any): def _build_node_properties_statement( node_property_map: Dict[str, PropertyRef], - node_extra_labels: Optional[List[str]], + node_extra_labels: Optional[List[str]] = None, ) -> Optional[str]: + """ + Given a node property map (key = graph node attribute names as str, value = PropertyRef telling whether the + associated value is located on the data dict or from a kwarg variable), generate a Neo4j SET clause. + + In this code example: + + node_property_map: Dict[str, PropertyRef] = { + 'id': PropertyRef("Id"), + 'node_prop_1': PropertyRef("Prop1"), + 'node_prop_2': PropertyRef("Prop2", static=True), + } + set_clause: str = _build_node_properties_statement(node_property_map) + + The returned set_clause will be: + + i.id = item.Id, + i.node_prop_1 = item.Prop1, + i.node_prop_2 = $Prop2 + + :param node_property_map: Mapping of node attribute names as str to PropertyRef objects + :param node_extra_labels: Optional list of extra labels to set on the node as str + :return: The resulting Neo4j SET clause to set the given attributes on the node + """ ingest_fields_template = Template('i.$node_property = $property_ref') set_clause = '' @@ -58,6 +81,27 @@ def _build_node_properties_statement( def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dict[str, PropertyRef]] = None) -> str: + """ + Given a relationship property map (key = relationship attribute names as str, value = PropertyRef telling whether + the associated value is located on the data dict or from a kwarg variable), generate a Neo4j SET clause. + + In this code example: + + rel_property_map: Dict[str, PropertyRef] = { + 'rel_prop_1': PropertyRef("Prop1"), + 'rel_prop_2': PropertyRef("Prop2", static=True), + } + set_clause: str = _build_rel_properties_statement('r', rel_property_map) + + The returned set_clause will be: + + r.rel_prop_1 = item.Prop1, + r.rel_prop_2 = $Prop2 + + :param rel_var: The variable name to use for the relationship in the Neo4j query + :param rel_property_map: Mapping of relationship attribute names as str to PropertyRef objects + :return: The resulting Neo4j SET clause to set the given attributes on the relationship + """ set_clause = '' ingest_fields_template = Template('$rel_var.$rel_property = $property_ref') @@ -73,9 +117,18 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dic return set_clause -def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema]) -> str: +def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema] = None) -> str: """ - Attaches sub resource to node i. + Generates a Neo4j statement to attach a sub resource to a node. A 'sub resource' is a term we made up to describe + billing units of a given resource. For example, + - In AWS, the sub resource is an AWSAccount. + - In Azure, the sub resource is a Subscription. + - In GCP, the sub resource is a GCPProject. + - etc. + This is a private function not meant to be called outside of build_ingest_query(). + :param sub_resource_link: Optional: The CartographyRelSchema object connecting previous node(s) to the sub resource. + :return: a Neo4j clause that connects previous node(s) to a sub resource, taking into account the labels, attribute + keys, and directionality. If sub_resource_link is None, return an empty string. """ if not sub_resource_link: return '' @@ -103,7 +156,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( SubResourceLabel=sub_resource_link.target_node_label, SubResourceKey=sub_resource_link.target_node_key, - SubResourceRef=sub_resource_link.dict_field_ref, + SubResourceRef=sub_resource_link.target_node_key_property_ref, RelMergeClause=rel_merge_clause, SubResourceRelLabel=sub_resource_link.rel_label, set_rel_properties_statement=_build_rel_properties_statement('r', rel_props_as_dict), @@ -111,11 +164,19 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography return attach_sub_resource_statement -def _build_attach_additional_links_statement(additional_links: Optional[List[CartographyRelSchema]]) -> str: +def _build_attach_additional_links_statement( + additional_relationships: Optional[List[CartographyRelSchema]] = None, +) -> str: """ - Attaches one or more CartographyRels to node i. + Generates a Neo4j statement to attaches one or more CartographyRelSchemas to node(s) previously mentioned in the + query. + This is a private function not meant to be called outside of build_ingestion_query(). + :param additional_relationships: Optional list of CartographyRelSchema describing what other relationships should + be created from the previous node(s) in this query. + :return: A Neo4j clause that connects previous node(s) to the given additional_links., taking into account the + labels, attribute keys, and directionality. If additional_relationships is None, return an empty string. """ - if not additional_links: + if not additional_relationships: return '' # TODO - support matching on multiple properties @@ -130,7 +191,7 @@ def _build_attach_additional_links_statement(additional_links: Optional[List[Car """, ) links = [] - for num, link in enumerate(additional_links): + for num, link in enumerate(additional_relationships): node_var = f"n{num}" rel_var = f"r{num}" @@ -146,7 +207,7 @@ def _build_attach_additional_links_statement(additional_links: Optional[List[Car ) # Give a helpful error message when forgetting to put `()` when instantiating a CartographyRelSchema, as this - # somehow isn't caught by IDEs like PyCharm. + # isn't always caught by IDEs like PyCharm. try: rel_props_as_dict: Dict[str, PropertyRef] = asdict(link.properties) except TypeError as e: @@ -162,7 +223,7 @@ def _build_attach_additional_links_statement(additional_links: Optional[List[Car additional_ref = additional_links_template.safe_substitute( AddlLabel=link.target_node_label, AddlKey=link.target_node_key, - AddlRef=link.dict_field_ref, + AddlRef=link.target_node_key_property_ref, node_var=node_var, rel_var=rel_var, RelMerge=rel_merge, @@ -174,6 +235,17 @@ def _build_attach_additional_links_statement(additional_links: Optional[List[Car def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: + """ + Generates a Neo4j query from the given CartographyNodeSchema to ingest the specified nodes and relationships so that + cartography module authors don't need to handwrite their own queries. This involves processing all attached + CartographyRelSchema objects. + :param node_schema: The CartographyNodeSchema object to build a Neo4j query from. + :return: An optimized Neo4j query that can be used to ingest nodes and relationships. + Important notes: + - The query assumes that a list of dicts will be passed to it through parameter $DictList. + - The query sets firstseen attributes on all of the nodes and relationships that it creates. + - The resulting query uses the UNWIND + MERGE pattern for batching and speed. + """ query_template = Template( """ UNWIND $DictList AS item @@ -181,8 +253,8 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: ON CREATE SET i.firstseen = timestamp() SET $set_node_properties_statement - $attach_sub_resource_statement - $attach_additional_links_statement + $attach_sub_resource_statement + $attach_additional_links_statement """, ) @@ -193,7 +265,7 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: node_label=node_schema.label, dict_id_field=node_props.id, set_node_properties_statement=_build_node_properties_statement(node_props_as_dict, node_schema.extra_labels), - attach_sub_resource_statement=_build_attach_sub_resource_statement(node_schema.subresource_relationship), + attach_sub_resource_statement=_build_attach_sub_resource_statement(node_schema.sub_resource_relationship), attach_additional_links_statement=_build_attach_additional_links_statement(node_schema.other_relationships), ) return ingest_query diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index ec0b0b08d..775c59673 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -94,17 +94,17 @@ class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', static=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RESOURCE" properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() - dict_field_ref: PropertyRef = PropertyRef('AccountId', static=True) @dataclass class EMRClusterSchema(CartographyNodeSchema): label: str = 'EMRCluster' properties: EMRClusterNodeProperties = EMRClusterNodeProperties() - subresource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() + sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() @timeit diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index 9642d43a9..f7e4cfa68 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -29,15 +29,18 @@ class InterestingAssetToSubResourceRelProps(CartographyRelProperties): yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") -# (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) @dataclass class InterestingAssetToSubResourceRel(CartographyRelSchema): + """ + Define a sub-resource relationship + (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) + """ target_node_label: str = 'SubResource' target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', static=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RELATIONSHIP_LABEL" properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() - dict_field_ref: PropertyRef = PropertyRef('subresource_id', static=True) @dataclass @@ -45,15 +48,18 @@ class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) -# (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) @dataclass class InterestingAssetToHelloAssetRel(CartographyRelSchema): + """ + Define an additional relationship + (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) + """ target_node_label: str = 'HelloAsset' target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('hello_asset_id') direction: LinkDirection = LinkDirection.OUTWARD rel_label: str = "ASSOCIATED_WITH" properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() - dict_field_ref: PropertyRef = PropertyRef('hello_asset_id') @dataclass @@ -61,15 +67,18 @@ class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) -# (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) @dataclass class InterestingAssetToWorldAssetRel(CartographyRelSchema): + """ + Define yet another relationship. + (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) + """ target_node_label: str = 'WorldAsset' target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('world_asset_id') direction: LinkDirection = LinkDirection.INWARD rel_label: str = "CONNECTED" properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() - dict_field_ref: PropertyRef = PropertyRef('world_asset_id') @dataclass @@ -77,7 +86,7 @@ class InterestingAssetSchema(CartographyNodeSchema): extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) label: str = 'InterestingNode' properties: SimpleNodeProperties = SimpleNodeProperties() - subresource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() + sub_resource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() other_relationships: Optional[List[CartographyRelSchema]] = default_field( [ InterestingAssetToHelloAssetRel(), @@ -101,7 +110,7 @@ def test_build_ingestion_query_complex(): i:AnotherNodeLabel:YetAnotherNodeLabel WITH i, item - MATCH (j:SubResource{id: $subresource_id}) + MATCH (j:SubResource{id: $sub_resource_id}) MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) ON CREATE SET r.firstseen = timestamp() SET diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py index e72711cb9..31ba4bb0d 100644 --- a/tests/unit/cartography/graph/test_querybuilder_simple.py +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -33,14 +33,17 @@ class SimpleNodeToSubResourceRelProps(CartographyRelProperties): @dataclass -# (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) class SimpleNodeToSubResourceRel(CartographyRelSchema): + """ + Define a sub resource rel: + (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) + """ target_node_label: str = 'SubResource' target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', static=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RELATIONSHIP_LABEL" properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() - dict_field_ref: PropertyRef = PropertyRef('subresource_id', static=True) @dataclass @@ -52,11 +55,11 @@ class SimpleNodeSchema(CartographyNodeSchema): @dataclass class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): """ - Same as SimpleNodeSchema but with a subresource relationship now. + Same as SimpleNodeSchema but with a sub-resource relationship now. """ label: str = 'SimpleNode' properties: SimpleNodeProperties = SimpleNodeProperties() - subresource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() + sub_resource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() def test_simplenode_sanity_checks(): @@ -66,7 +69,7 @@ def test_simplenode_sanity_checks(): schema: SimpleNodeSchema = SimpleNodeSchema() # Assert that the unimplemented, non-abstract properties have None values. assert schema.extra_labels is None - assert schema.subresource_relationship is None + assert schema.sub_resource_relationship is None assert schema.other_relationships is None @@ -94,7 +97,7 @@ def test_build_ingestion_query_with_subresource(): i.property2 = item.property2 WITH i, item - MATCH (j:SubResource{id: $subresource_id}) + MATCH (j:SubResource{id: $sub_resource_id}) MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) ON CREATE SET r.firstseen = timestamp() SET From 1293b2e57c141308a33d3847ecabd60d9280298f Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 23 Nov 2022 23:00:28 -0800 Subject: [PATCH 10/27] add a todo --- cartography/intel/aws/emr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 775c59673..caa5e64de 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -57,9 +57,10 @@ def get_emr_describe_cluster(boto3_session: boto3.session.Session, region: str, return cluster_details +# TODO - how to autogenerate CREATE INDEX script from this? @dataclass class EMRClusterNodeProperties(CartographyNodeProperties): - arn: PropertyRef = PropertyRef('ClusterArn') + arn: PropertyRef = PropertyRef('ClusterArn', indexed=True) auto_terminate: PropertyRef = PropertyRef('AutoTerminate') autoscaling_role: PropertyRef = PropertyRef('AutoScalingRole') custom_ami_id: PropertyRef = PropertyRef('CustomAmiId') From b810d75ceb5a975aa42b2cfcb6c1d44fa89b262a Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Sun, 27 Nov 2022 22:43:55 -0800 Subject: [PATCH 11/27] Doc updates, rename some fields --- cartography/client/core/tx.py | 33 ++++----- cartography/graph/model.py | 74 ++++++------------- cartography/graph/querybuilder.py | 53 +++++++------ cartography/intel/aws/emr.py | 11 ++- .../graph/test_querybuilder_complex.py | 12 +-- .../graph/test_querybuilder_simple.py | 45 +++++------ 6 files changed, 93 insertions(+), 135 deletions(-) diff --git a/cartography/client/core/tx.py b/cartography/client/core/tx.py index 8265bdfcf..da643907b 100644 --- a/cartography/client/core/tx.py +++ b/cartography/client/core/tx.py @@ -7,6 +7,8 @@ import neo4j +from cartography.util import batch + def read_list_of_values_tx(tx: neo4j.Transaction, query: str, **kwargs) -> List[Union[str, int]]: """ @@ -154,9 +156,10 @@ def write_list_of_dicts_tx( **kwargs, ) -> None: """ - Writes a list of dicts to Neo4j. This is called by passing it as a function param to neo4j.write_transaction(). + Writes a list of dicts to Neo4j. Example usage: + import neo4j dict_list: List[Dict[Any, Any]] = [{...}, ...] neo4j_driver = neo4j.driver(... args ...) @@ -177,12 +180,11 @@ def write_list_of_dicts_tx( yet_another_kwarg_field=1234 ) - :param tx: The neo4j transaction. - :param query: The Neo4j write query that you want to run. + :param tx: The neo4j write transaction. + :param query: The Neo4j write query to run. :param kwargs: Keyword args to be supplied to the Neo4j query. :return: None """ - # TODO batch this to 10k items by default and make the batch size configurable tx.run(query, kwargs) @@ -195,19 +197,16 @@ def load_graph_data( """ Writes data to the graph. :param neo4j_session: The Neo4j session - :param query: The Neo4j write query to run. This must follow the UNWIND + MERGE pattern. For example: - UNWIND $DictList as item - MERGE (a:SomeNode{id: item.id}) - SET - a.field1 = item.field1, - ... + :param query: The Neo4j write query to run. This query is not meant to be handwritten, rather it should be generated + with cartography.graph.querybuilder.build_ingestion_query(). :param dict_list: The data to load to the graph represented as a list of dicts. - :param kwargs: Keyword args to be supplied to the Neo4j query. + :param kwargs: Allows additional keyword args to be supplied to the Neo4j query. :return: None """ - neo4j_session.write_transaction( - write_list_of_dicts_tx, - query, - DictList=dict_list, - **kwargs, - ) + for data_batch in batch(dict_list, size=10000): + neo4j_session.write_transaction( + write_list_of_dicts_tx, + query, + DictList=data_batch, + **kwargs, + ) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 642ba537d..d44381d31 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -9,8 +9,6 @@ class LinkDirection(Enum): """ - If a CartographyNodeSchema has relationships, then it will have one or more CartographyRelSchemas. - Each CartographyRelSchema has a LinkDirection that determines whether the relationship points toward the original node ("INWARD") or away from the original node ("OUTWARD"). @@ -36,31 +34,30 @@ class EMRClusterToAWSAccount(CartographyRelSchema): class PropertyRef: """ - We dynamically build Neo4j queries and allow module authors to define a schema for their nodes and relationships. - - The end result is we write dicts to Neo4j. To define nodes and rels, we need a mechanism to allow the schema to - refer to properties on the data dict. + PropertyRefs represent properties on cartography nodes and relationships. - A PropertyRef is how we reference properties on the data dict when dynamically creating queries. + cartography takes lists of Python dicts and loads them to Neo4j. PropertyRefs allow our dynamically generated Neo4j + ingestion queries to set values for a given node or relationship property from (A) a field on the dict being + processed (PropertyRef.override=False, default), or (B) from a single variable provided by a keyword argument + (PropertyRef.override=True). """ - def __init__(self, name: str, static=False): + def __init__(self, name: str, set_in_kwargs=False): """ - :param name: The name of the property as seen on the data dict - :param static: If true, the property is not defined on the data dict, and we expect to find the property in the - kwargs. + :param name: The name of the property + :param set_in_kwargs: Optional. If True, the property is not defined on the data dict, and we expect to find the + property in the kwargs. If False, looks for the property in the data dict. Defaults to False. """ self.name = name - # TODO consider naming this something better - self.static = static + self.set_in_kwargs = set_in_kwargs def _parameterize_name(self) -> str: return f"${self.name}" def __repr__(self) -> str: - return f"item.{self.name}" if not self.static else self._parameterize_name() + return f"item.{self.name}" if not self.set_in_kwargs else self._parameterize_name() @dataclass @@ -103,39 +100,8 @@ class CartographyRelSchema(abc.ABC): """ Abstract base dataclass that represents a cartography relationship. - A CartographyNodeSchema is composed of a CartographyRelSchema. The CartographyRelSchema contains properties that - make it possible to connect the CartographyNodeSchema to other existing nodes in the graph. - - As example usage, this code: - - class EMRCluster(CartographyNodeSchema): - label: str = "EMRCluster" - properties: EMRClusterNodeProperties = EMRClusterNodeProperties() - sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() - - class EMRClusterToAWSAccount(CartographyRelSchema): - target_node_label: str = 'AWSAccount' - target_node_key: str = 'id' - direction: LinkDirection = LinkDirection.INWARD - rel_label: str = "RESOURCE" - properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() - target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', static=True) - - - generates a Neo4j query that looks like this: - - UNWIND $DictList AS item - MERGE (i:EMRCluster{id: <... Expand the EMRClusterNodeProperties here ...>}) - ON CREATE SET i.firstseen = timestamp() - SET - // ... Expand EMRClusterNodeProperties here ... - - WITH i, item - MATCH (j:AWSAccount{id: $AccountId}) - MERGE (i)<-[r:RESOURCE]-(j) - ON CREATE SET r.firstseen = timestamp() - SET - // ... Expand EMRClusterToAwsAccountRelProperties here ... + The CartographyRelSchema contains properties that make it possible to connect the CartographyNodeSchema to other + existing nodes in the graph. """ @property @abc.abstractmethod @@ -157,7 +123,7 @@ def target_node_label(self) -> str: @abc.abstractmethod def target_node_key(self) -> str: """ - :return: The attribute on the target node used to uniquely identify what node to connect to. + :return: The attribute name on the target_node_label used to uniquely identify what node to connect to. """ pass @@ -165,8 +131,8 @@ def target_node_key(self) -> str: @abc.abstractmethod def target_node_key_property_ref(self) -> PropertyRef: """ - :return: The value of the target node attribute used to uniquely identify what node to connect to. - This is given as a PropertyRef. + :return: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a + PropertyRef. """ pass @@ -246,17 +212,23 @@ def other_relationships(self) -> Optional[List[CartographyRelSchema]]: @other_relationships.setter def other_relationships(self, other_rels: List[CartographyRelSchema]) -> None: + """ + Boilerplate setter function used to keep typehints happy. + """ self._other_relationships = other_rels @property def extra_labels(self) -> Optional[List[str]]: """ Optional. - Allows subclasses to specify extra labels on the node. + Allows specifying extra labels on the node. :return: None if not overriden. Else return a str list of the extra labels specified on the node. """ return self._extra_labels @extra_labels.setter def extra_labels(self, labels: List[str]) -> None: + """ + Boilerplate setter function used to keep typehints happy. + """ self._extra_labels = labels diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 1f5222d0e..afe40dd76 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -21,12 +21,14 @@ def default_field(obj: Any): """ Helper function from https://stackoverflow.com/questions/52063759/passing-default-list-argument-to-dataclasses. - We use this so that we can work around how dataclass fields disallow mutable objects by wrapping them in lambdas. - Put another way, writing `field(default_factory=lambda: ['Label1', 'Label2'])` is so much more work than writing - `default_field(['Label1', 'Label2']`. + We use this so that we can work around how dataclass field default values disallow mutable objects (like Lists) by + wrapping them in lambdas. - Note that if the Field is decorated with @property (like everything in our object model), then we will need to also - use this technique to correctly implement the setter: + Put another way, writing `field(default_factory=lambda: ['Label1', 'Label2'])` is so much + more work than writing `default_field(['Label1', 'Label2']`. + + Note that if the Field is decorated with @property (like everything in our object model), then the dataclass needs + to also use this technique to keep typehints happy: https://florimond.dev/en/posts/2018/10/reconciling-dataclasses-and-properties-in-python/. :param obj: The mutable default object (e.g. a List) that we want to set as a default for a dataclass field. @@ -38,21 +40,20 @@ def default_field(obj: Any): def _build_node_properties_statement( node_property_map: Dict[str, PropertyRef], node_extra_labels: Optional[List[str]] = None, -) -> Optional[str]: +) -> str: """ - Given a node property map (key = graph node attribute names as str, value = PropertyRef telling whether the - associated value is located on the data dict or from a kwarg variable), generate a Neo4j SET clause. + Generate a Neo4j clause that sets node properties using the given mapping of attribute names to PropertyRefs. - In this code example: + As seen in this example, node_property_map: Dict[str, PropertyRef] = { 'id': PropertyRef("Id"), 'node_prop_1': PropertyRef("Prop1"), - 'node_prop_2': PropertyRef("Prop2", static=True), + 'node_prop_2': PropertyRef("Prop2", set_in_kwargs=True), } set_clause: str = _build_node_properties_statement(node_property_map) - The returned set_clause will be: + the returned set_clause will be: i.id = item.Id, i.node_prop_1 = item.Prop1, @@ -63,15 +64,12 @@ def _build_node_properties_statement( :return: The resulting Neo4j SET clause to set the given attributes on the node """ ingest_fields_template = Template('i.$node_property = $property_ref') - set_clause = '' - # If the node_property_map contains more than just `id`, generate a SET statement for the other fields. - if len(node_property_map.keys()) > 1: - set_clause += ',\n'.join([ - ingest_fields_template.safe_substitute(node_property=node_property, property_ref=property_ref) - for node_property, property_ref in node_property_map.items() - if node_property != 'id' # Make sure to exclude setting the `id` again. - ]) + set_clause = ',\n'.join([ + ingest_fields_template.safe_substitute(node_property=node_property, property_ref=property_ref) + for node_property, property_ref in node_property_map.items() + if node_property != 'id' # The `MERGE` clause will have already set `id`; let's not set it again. + ]) # Set extra labels on the node if specified if node_extra_labels: @@ -82,8 +80,8 @@ def _build_node_properties_statement( def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dict[str, PropertyRef]] = None) -> str: """ - Given a relationship property map (key = relationship attribute names as str, value = PropertyRef telling whether - the associated value is located on the data dict or from a kwarg variable), generate a Neo4j SET clause. + Generate a Neo4j clause that sets relationship properties using the given mapping of attribute names to + PropertyRefs. In this code example: @@ -93,7 +91,7 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dic } set_clause: str = _build_rel_properties_statement('r', rel_property_map) - The returned set_clause will be: + the returned set_clause will be: r.rel_prop_1 = item.Prop1, r.rel_prop_2 = $Prop2 @@ -168,7 +166,7 @@ def _build_attach_additional_links_statement( additional_relationships: Optional[List[CartographyRelSchema]] = None, ) -> str: """ - Generates a Neo4j statement to attaches one or more CartographyRelSchemas to node(s) previously mentioned in the + Generates a Neo4j statement to attach one or more CartographyRelSchemas to node(s) previously mentioned in the query. This is a private function not meant to be called outside of build_ingestion_query(). :param additional_relationships: Optional list of CartographyRelSchema describing what other relationships should @@ -237,14 +235,15 @@ def _build_attach_additional_links_statement( def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: """ Generates a Neo4j query from the given CartographyNodeSchema to ingest the specified nodes and relationships so that - cartography module authors don't need to handwrite their own queries. This involves processing all attached - CartographyRelSchema objects. + cartography module authors don't need to handwrite their own queries. :param node_schema: The CartographyNodeSchema object to build a Neo4j query from. :return: An optimized Neo4j query that can be used to ingest nodes and relationships. Important notes: + - The resulting query uses the UNWIND + MERGE pattern (see + https://neo4j.com/docs/cypher-manual/current/clauses/unwind/#unwind-creating-nodes-from-a-list-parameter) to batch + load the data for speed. - The query assumes that a list of dicts will be passed to it through parameter $DictList. - - The query sets firstseen attributes on all of the nodes and relationships that it creates. - - The resulting query uses the UNWIND + MERGE pattern for batching and speed. + - The query sets `firstseen` attributes on all the nodes and relationships that it creates. """ query_template = Template( """ diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index caa5e64de..5787dd978 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -57,23 +57,22 @@ def get_emr_describe_cluster(boto3_session: boto3.session.Session, region: str, return cluster_details -# TODO - how to autogenerate CREATE INDEX script from this? @dataclass class EMRClusterNodeProperties(CartographyNodeProperties): - arn: PropertyRef = PropertyRef('ClusterArn', indexed=True) + arn: PropertyRef = PropertyRef('ClusterArn') auto_terminate: PropertyRef = PropertyRef('AutoTerminate') autoscaling_role: PropertyRef = PropertyRef('AutoScalingRole') custom_ami_id: PropertyRef = PropertyRef('CustomAmiId') firstseen: PropertyRef = PropertyRef('firstseen') id: PropertyRef = PropertyRef('Id') instance_collection_type: PropertyRef = PropertyRef('InstanceCollectionType') - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) log_encryption_kms_key_id: PropertyRef = PropertyRef('LogEncryptionKmsKeyId') log_uri: PropertyRef = PropertyRef('LogUri') master_public_dns_name: PropertyRef = PropertyRef('MasterPublicDnsName') name: PropertyRef = PropertyRef('Name') outpost_arn: PropertyRef = PropertyRef('OutpostArn') - region: PropertyRef = PropertyRef('Region', static=True) + region: PropertyRef = PropertyRef('Region', set_in_kwargs=True) release_label: PropertyRef = PropertyRef('ReleaseLabel') repo_upgrade_on_boot: PropertyRef = PropertyRef('RepoUpgradeOnBoot') requested_ami_version: PropertyRef = PropertyRef('RequestedAmiVersion') @@ -87,7 +86,7 @@ class EMRClusterNodeProperties(CartographyNodeProperties): @dataclass class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @dataclass @@ -95,7 +94,7 @@ class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', static=True) + target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', set_in_kwargs=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RESOURCE" properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index f7e4cfa68..ce4223ebf 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -17,14 +17,14 @@ @dataclass class InterestingAssetProperties(CartographyNodeProperties): id: PropertyRef = PropertyRef('Id') - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) property1: PropertyRef = PropertyRef('property1') property2: PropertyRef = PropertyRef('property2') @dataclass class InterestingAssetToSubResourceRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) another_rel_field: PropertyRef = PropertyRef('AnotherField') yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") @@ -32,12 +32,12 @@ class InterestingAssetToSubResourceRelProps(CartographyRelProperties): @dataclass class InterestingAssetToSubResourceRel(CartographyRelSchema): """ - Define a sub-resource relationship + Define a sub resource relationship (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) """ target_node_label: str = 'SubResource' target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', static=True) + target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RELATIONSHIP_LABEL" properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() @@ -45,7 +45,7 @@ class InterestingAssetToSubResourceRel(CartographyRelSchema): @dataclass class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @dataclass @@ -64,7 +64,7 @@ class InterestingAssetToHelloAssetRel(CartographyRelSchema): @dataclass class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @dataclass diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py index 31ba4bb0d..cb2bbc885 100644 --- a/tests/unit/cartography/graph/test_querybuilder_simple.py +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -1,12 +1,3 @@ -""" -Test cases - -- Single node label -- Multiple node labels -- Node properties x -- Relationship properties x -- Additional links -""" from dataclasses import dataclass from cartography.graph.model import CartographyNodeProperties @@ -19,44 +10,39 @@ from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines +# Test defining a simple node with no relationships. @dataclass class SimpleNodeProperties(CartographyNodeProperties): id: PropertyRef = PropertyRef('Id') - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) property1: PropertyRef = PropertyRef('property1') property2: PropertyRef = PropertyRef('property2') +@dataclass +class SimpleNodeSchema(CartographyNodeSchema): + label: str = 'SimpleNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + + +# Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) @dataclass class SimpleNodeToSubResourceRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', static=True) + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @dataclass class SimpleNodeToSubResourceRel(CartographyRelSchema): - """ - Define a sub resource rel: - (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) - """ target_node_label: str = 'SubResource' target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', static=True) + target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RELATIONSHIP_LABEL" properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() -@dataclass -class SimpleNodeSchema(CartographyNodeSchema): - label: str = 'SimpleNode' - properties: SimpleNodeProperties = SimpleNodeProperties() - - @dataclass class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): - """ - Same as SimpleNodeSchema but with a sub-resource relationship now. - """ label: str = 'SimpleNode' properties: SimpleNodeProperties = SimpleNodeProperties() sub_resource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() @@ -64,7 +50,7 @@ class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): def test_simplenode_sanity_checks(): """ - Test creating a simple node schema with no relationships + Test creating a simple node schema with no relationships. """ schema: SimpleNodeSchema = SimpleNodeSchema() # Assert that the unimplemented, non-abstract properties have None values. @@ -75,7 +61,7 @@ def test_simplenode_sanity_checks(): def test_simplenode_with_subresource_sanity_checks(): """ - Test creating a simple node schema with a subresource relationship + Test creating a simple node schema with no relationships and ensure that the optional attributes are indeed None. """ schema: SimpleNodeWithSubResourceSchema = SimpleNodeWithSubResourceSchema() # Assert that the unimplemented, non-abstract properties have None values. @@ -83,7 +69,10 @@ def test_simplenode_with_subresource_sanity_checks(): assert schema.other_relationships is None -def test_build_ingestion_query_with_subresource(): +def test_build_ingestion_query_with_sub_resource(): + """ + Test creating a simple node schema with a sub resource relationship. + """ # Act query = build_ingestion_query(SimpleNodeWithSubResourceSchema()) From be30b3a252cd18db660f7f6f063a3410201c3a16 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 28 Nov 2022 10:23:04 -0800 Subject: [PATCH 12/27] Fix pre-commit --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 449a7b9bc..6fdeb7bb4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: - id: debug-statements - id: end-of-file-fixer - id: trailing-whitespace -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 From 8021520dc0dbaeefadac84f045368177fa2d5a6d Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Fri, 9 Dec 2022 14:53:04 -0800 Subject: [PATCH 13/27] Code commment suggestions Co-authored-by: Ramon Petgrave <32398091+ramonpetgrave64@users.noreply.github.com> --- cartography/graph/model.py | 9 +++++++-- cartography/graph/querybuilder.py | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index d44381d31..ad4f92174 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -38,8 +38,8 @@ class PropertyRef: cartography takes lists of Python dicts and loads them to Neo4j. PropertyRefs allow our dynamically generated Neo4j ingestion queries to set values for a given node or relationship property from (A) a field on the dict being - processed (PropertyRef.override=False, default), or (B) from a single variable provided by a keyword argument - (PropertyRef.override=True). + processed (PropertyRef. set_in_kwargs =False, default), or (B) from a single variable provided by a keyword argument + (PropertyRef. set_in_kwargs =True). """ def __init__(self, name: str, set_in_kwargs=False): @@ -57,6 +57,11 @@ def _parameterize_name(self) -> str: return f"${self.name}" def __repr__(self) -> str: + """ + By default, the querybuilder will render an UNWIND query so that + the value for this property will come from the dict being processed. + If set_in_kwargs is True, then the value will instead come from kwargs. + """ return f"item.{self.name}" if not self.set_in_kwargs else self._parameterize_name() diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index afe40dd76..7d85a7111 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -53,12 +53,13 @@ def _build_node_properties_statement( } set_clause: str = _build_node_properties_statement(node_property_map) - the returned set_clause will be: - + the returned set_clause will be + ``` i.id = item.Id, i.node_prop_1 = item.Prop1, i.node_prop_2 = $Prop2 - + ``` + where `i` is a reference to the Neo4j node. :param node_property_map: Mapping of node attribute names as str to PropertyRef objects :param node_extra_labels: Optional list of extra labels to set on the node as str :return: The resulting Neo4j SET clause to set the given attributes on the node From 5bcb9158ac88d20af060416d72b265a0bb4f8773 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Fri, 9 Dec 2022 15:25:34 -0800 Subject: [PATCH 14/27] Stackoverflow comment for clarity) --- cartography/graph/model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index ad4f92174..8d8615ef7 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -78,6 +78,7 @@ def __post_init__(self): """ Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a dataclass. + See https://stackoverflow.com/q/60590442. """ if self.__class__ == CartographyNodeProperties: raise TypeError("Cannot instantiate abstract class.") From 52a1d29c6844272c798064a0721f52d1cf174c9c Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Sun, 11 Dec 2022 22:42:52 -0800 Subject: [PATCH 15/27] Support ingesting only parts of a schema without breaking the others --- cartography/graph/querybuilder.py | 42 ++++- tests/data/graph/__init__.py | 0 tests/data/graph/querybuilder/__init__.py | 0 tests/data/graph/querybuilder/sample.py | 22 +++ tests/data/graph/querybuilder/sample_model.py | 94 ++++++++++++ .../integration/cartography/graph/__init__.py | 0 .../cartography/graph/test_querybuilder.py | 125 +++++++++++++++ .../graph/test_querybuilder_complex.py | 145 ++++-------------- .../graph/test_querybuilder_simple.py | 13 +- 9 files changed, 313 insertions(+), 128 deletions(-) create mode 100644 tests/data/graph/__init__.py create mode 100644 tests/data/graph/querybuilder/__init__.py create mode 100644 tests/data/graph/querybuilder/sample.py create mode 100644 tests/data/graph/querybuilder/sample_model.py create mode 100644 tests/integration/cartography/graph/__init__.py create mode 100644 tests/integration/cartography/graph/test_querybuilder.py diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 7d85a7111..e54717db4 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -134,8 +134,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography sub_resource_attach_template = Template( """ - WITH i, item - MATCH (j:$SubResourceLabel{$SubResourceKey: $SubResourceRef}) + OPTIONAL MATCH (j:$SubResourceLabel{$SubResourceKey: $SubResourceRef}) $RelMergeClause ON CREATE SET r.firstseen = timestamp() SET @@ -182,7 +181,8 @@ def _build_attach_additional_links_statement( additional_links_template = Template( """ WITH i, item - MATCH ($node_var:$AddlLabel{$AddlKey: $AddlRef}) + OPTIONAL MATCH ($node_var:$AddlLabel{$AddlKey: $AddlRef}) + WITH i, item, $node_var WHERE $node_var IS NOT NULL $RelMerge ON CREATE SET $rel_var.firstseen = timestamp() SET @@ -230,7 +230,32 @@ def _build_attach_additional_links_statement( ) links.append(additional_ref) - return '\n'.join(links) + return 'UNION'.join(links) + + +def _build_attach_relationships_statement( + sub_resource_relationship: Optional[CartographyRelSchema], + other_relationships: Optional[List[CartographyRelSchema]], +) -> str: + attach_sub_resource_statement = _build_attach_sub_resource_statement(sub_resource_relationship) + attach_additional_links_statement = _build_attach_additional_links_statement(other_relationships) + + statements = [] + statements += [attach_sub_resource_statement] if attach_sub_resource_statement else [] + statements += [attach_additional_links_statement] if attach_additional_links_statement else [] + + attach_relationships_statement = 'UNION'.join(stmt for stmt in statements) + + query_template = Template( + """ + WITH i, item + CALL { + WITH i, item + $attach_relationships_statement + } + """ + ) + return query_template.safe_substitute(attach_relationships_statement=attach_relationships_statement) def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: @@ -253,8 +278,7 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: ON CREATE SET i.firstseen = timestamp() SET $set_node_properties_statement - $attach_sub_resource_statement - $attach_additional_links_statement + $attach_relationships_statement """, ) @@ -265,7 +289,9 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: node_label=node_schema.label, dict_id_field=node_props.id, set_node_properties_statement=_build_node_properties_statement(node_props_as_dict, node_schema.extra_labels), - attach_sub_resource_statement=_build_attach_sub_resource_statement(node_schema.sub_resource_relationship), - attach_additional_links_statement=_build_attach_additional_links_statement(node_schema.other_relationships), + attach_relationships_statement=_build_attach_relationships_statement( + node_schema.sub_resource_relationship, + node_schema.other_relationships, + ) ) return ingest_query diff --git a/tests/data/graph/__init__.py b/tests/data/graph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/graph/querybuilder/__init__.py b/tests/data/graph/querybuilder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/graph/querybuilder/sample.py b/tests/data/graph/querybuilder/sample.py new file mode 100644 index 000000000..80c5e8840 --- /dev/null +++ b/tests/data/graph/querybuilder/sample.py @@ -0,0 +1,22 @@ +MERGE_SUB_RESOURCE_QUERY = """ +MERGE (s:SubResource{id: "sub-resource-id"}) +ON CREATE SET s.lastupdated = 1 +""" + +MERGE_WORLD_ASSET_QUERY = """ +MERGE (w:WorldAsset{id: "the-worldasset-id-1"}) +ON CREATE SET w.lastupdated = 1 +""" + + +# This dataset shows an InterestingNode attached to a WorldAsset but no other relationships. +INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS = [ + { + 'Id': 'interesting-node-id', + 'property1': 'b', + 'property2': 'c', + 'AnotherField': 'd', + 'YetAnotherRelField': 'e', + 'world_asset_id': 'the-worldasset-id-1' + } +] \ No newline at end of file diff --git a/tests/data/graph/querybuilder/sample_model.py b/tests/data/graph/querybuilder/sample_model.py new file mode 100644 index 000000000..29e936064 --- /dev/null +++ b/tests/data/graph/querybuilder/sample_model.py @@ -0,0 +1,94 @@ +from dataclasses import dataclass +from typing import List +from typing import Optional + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef + +from cartography.graph.querybuilder import default_field +from tests.unit.cartography.graph.test_querybuilder_simple import SimpleNodeProperties + + +@dataclass +class InterestingAssetProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + property1: PropertyRef = PropertyRef('property1') + property2: PropertyRef = PropertyRef('property2') + + +@dataclass +class InterestingAssetToSubResourceRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + another_rel_field: PropertyRef = PropertyRef('AnotherField') + yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") + + +@dataclass +class InterestingAssetToSubResourceRel(CartographyRelSchema): + """ + Define a sub resource relationship + (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) + """ + target_node_label: str = 'SubResource' + target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RELATIONSHIP_LABEL" + properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() + + +@dataclass +class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass +class InterestingAssetToHelloAssetRel(CartographyRelSchema): + """ + Define an additional relationship + (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) + """ + target_node_label: str = 'HelloAsset' + target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('hello_asset_id') + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "ASSOCIATED_WITH" + properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() + + +@dataclass +class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass +class InterestingAssetToWorldAssetRel(CartographyRelSchema): + """ + Define yet another relationship. + (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) + """ + target_node_label: str = 'WorldAsset' + target_node_key: str = 'id' + target_node_key_property_ref: PropertyRef = PropertyRef('world_asset_id') + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "CONNECTED" + properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() + + +@dataclass +class InterestingAssetSchema(CartographyNodeSchema): + extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) + label: str = 'InterestingNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + sub_resource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() + other_relationships: Optional[List[CartographyRelSchema]] = default_field( + [ + InterestingAssetToHelloAssetRel(), + InterestingAssetToWorldAssetRel(), + ], + ) diff --git a/tests/integration/cartography/graph/__init__.py b/tests/integration/cartography/graph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/cartography/graph/test_querybuilder.py b/tests/integration/cartography/graph/test_querybuilder.py new file mode 100644 index 000000000..b834527a7 --- /dev/null +++ b/tests/integration/cartography/graph/test_querybuilder.py @@ -0,0 +1,125 @@ +from cartography.client.core.tx import load_graph_data +from cartography.graph.querybuilder import build_ingestion_query +from tests.data.graph.querybuilder.sample import MERGE_SUB_RESOURCE_QUERY, MERGE_WORLD_ASSET_QUERY, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS +from tests.data.graph.querybuilder.sample_model import InterestingAssetSchema + + +def test_load_graph_data_subset_of_relationships_1(neo4j_session): + """ + Test load_graph_data() if a schema defines multiple relationships but only a subset of them are present in our data. + + In this test case, the following relationships are possible: + (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) + (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) + (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) + but our test data does not include a HelloAsset. + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_WORLD_ASSET_QUERY) + + # Act + query = build_ingestion_query(InterestingAssetSchema()) + load_graph_data(neo4j_session, query, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, lastupdated=1, sub_resource_id='sub-resource-id') + + # Assert that the InterestingNode to SubResource relationship exists + expected = { + ('interesting-node-id', 'sub-resource-id'), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + # Assert that the InterestingNode to HelloAsset relationship does NOT exist + expected = { + ('interesting-node-id', None), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode) + OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) + RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + # Assert that the InterestingNode to WorldAsset relationship exists + expected = { + ('interesting-node-id', 'the-worldasset-id-1'), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + +def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session): + """ + In this test case, our test data only includes the sub resource relationship + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + + # Act + query = build_ingestion_query(InterestingAssetSchema()) + load_graph_data(neo4j_session, query, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, lastupdated=1, sub_resource_id='sub-resource-id') + + # Assert that the InterestingNode to SubResource relationship exists + expected = { + ('interesting-node-id', 'sub-resource-id'), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + # Assert that the InterestingNode to HelloAsset relationship does NOT exist + expected = { + ('interesting-node-id', None), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode) + OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) + RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + # Assert that the InterestingNode to WorldAsset relationship does NOT exist + expected = { + ('interesting-node-id', None), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingNode) + OPTIONAL MATCH (n1)<-[:CONNECTED]-(n2:WorldAsset) + RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index ce4223ebf..f7f29d06f 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -1,98 +1,6 @@ -from dataclasses import dataclass -from typing import List -from typing import Optional - -from cartography.graph.model import CartographyNodeProperties -from cartography.graph.model import CartographyNodeSchema -from cartography.graph.model import CartographyRelProperties -from cartography.graph.model import CartographyRelSchema -from cartography.graph.model import LinkDirection -from cartography.graph.model import PropertyRef from cartography.graph.querybuilder import build_ingestion_query -from cartography.graph.querybuilder import default_field +from tests.data.graph.querybuilder.sample_model import InterestingAssetSchema from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines -from tests.unit.cartography.graph.test_querybuilder_simple import SimpleNodeProperties - - -@dataclass -class InterestingAssetProperties(CartographyNodeProperties): - id: PropertyRef = PropertyRef('Id') - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - property1: PropertyRef = PropertyRef('property1') - property2: PropertyRef = PropertyRef('property2') - - -@dataclass -class InterestingAssetToSubResourceRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - another_rel_field: PropertyRef = PropertyRef('AnotherField') - yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") - - -@dataclass -class InterestingAssetToSubResourceRel(CartographyRelSchema): - """ - Define a sub resource relationship - (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) - """ - target_node_label: str = 'SubResource' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) - direction: LinkDirection = LinkDirection.INWARD - rel_label: str = "RELATIONSHIP_LABEL" - properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() - - -@dataclass -class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - - -@dataclass -class InterestingAssetToHelloAssetRel(CartographyRelSchema): - """ - Define an additional relationship - (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) - """ - target_node_label: str = 'HelloAsset' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('hello_asset_id') - direction: LinkDirection = LinkDirection.OUTWARD - rel_label: str = "ASSOCIATED_WITH" - properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() - - -@dataclass -class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - - -@dataclass -class InterestingAssetToWorldAssetRel(CartographyRelSchema): - """ - Define yet another relationship. - (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) - """ - target_node_label: str = 'WorldAsset' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('world_asset_id') - direction: LinkDirection = LinkDirection.INWARD - rel_label: str = "CONNECTED" - properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() - - -@dataclass -class InterestingAssetSchema(CartographyNodeSchema): - extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) - label: str = 'InterestingNode' - properties: SimpleNodeProperties = SimpleNodeProperties() - sub_resource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() - other_relationships: Optional[List[CartographyRelSchema]] = default_field( - [ - InterestingAssetToHelloAssetRel(), - InterestingAssetToWorldAssetRel(), - ], - ) def test_build_ingestion_query_complex(): @@ -108,29 +16,36 @@ def test_build_ingestion_query_complex(): i.property1 = item.property1, i.property2 = item.property2, i:AnotherNodeLabel:YetAnotherNodeLabel - + WITH i, item - MATCH (j:SubResource{id: $sub_resource_id}) - MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) - ON CREATE SET r.firstseen = timestamp() - SET - r.lastupdated = $lastupdated, - r.another_rel_field = item.AnotherField, - r.yet_another_rel_field = item.YetAnotherRelField - - WITH i, item - MATCH (n0:HelloAsset{id: item.hello_asset_id}) - MERGE (i)-[r0:ASSOCIATED_WITH]->(n0) - ON CREATE SET r0.firstseen = timestamp() - SET - r0.lastupdated = $lastupdated - - WITH i, item - MATCH (n1:WorldAsset{id: item.world_asset_id}) - MERGE (i)<-[r1:CONNECTED]-(n1) - ON CREATE SET r1.firstseen = timestamp() - SET - r1.lastupdated = $lastupdated + CALL { + WITH i, item + OPTIONAL MATCH (j:SubResource{id: $sub_resource_id}) + MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) + ON CREATE SET r.firstseen = timestamp() + SET + r.lastupdated = $lastupdated, + r.another_rel_field = item.AnotherField, + r.yet_another_rel_field = item.YetAnotherRelField + + UNION + WITH i, item + OPTIONAL MATCH (n0:HelloAsset{id: item.hello_asset_id}) + WITH i, item, n0 WHERE n0 IS NOT NULL + MERGE (i)-[r0:ASSOCIATED_WITH]->(n0) + ON CREATE SET r0.firstseen = timestamp() + SET + r0.lastupdated = $lastupdated + + UNION + WITH i, item + OPTIONAL MATCH (n1:WorldAsset{id: item.world_asset_id}) + WITH i, item, n1 WHERE n1 IS NOT NULL + MERGE (i)<-[r1:CONNECTED]-(n1) + ON CREATE SET r1.firstseen = timestamp() + SET + r1.lastupdated = $lastupdated + } """ # Assert: compare query outputs while ignoring leading whitespace. diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py index cb2bbc885..da5cf846b 100644 --- a/tests/unit/cartography/graph/test_querybuilder_simple.py +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -86,11 +86,14 @@ def test_build_ingestion_query_with_sub_resource(): i.property2 = item.property2 WITH i, item - MATCH (j:SubResource{id: $sub_resource_id}) - MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) - ON CREATE SET r.firstseen = timestamp() - SET - r.lastupdated = $lastupdated + CALL { + WITH i, item + OPTIONAL MATCH (j:SubResource{id: $sub_resource_id}) + MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) + ON CREATE SET r.firstseen = timestamp() + SET + r.lastupdated = $lastupdated + } """ # Assert: compare query outputs while ignoring leading whitespace. From b5875a97aaf0504c64e67b6ec55273ac99c32f3b Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Sun, 11 Dec 2022 22:48:27 -0800 Subject: [PATCH 16/27] Doc comment --- cartography/graph/querybuilder.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index e54717db4..8246a454c 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -237,6 +237,15 @@ def _build_attach_relationships_statement( sub_resource_relationship: Optional[CartographyRelSchema], other_relationships: Optional[List[CartographyRelSchema]], ) -> str: + """ + Use Neo4j subqueries to attach sub resource and/or other relationships. + Subqueries allow the query to continue to run even if we only have data for some but not all of the + relationships defined by a schema. + For example, if an EC2Instance has attachments to NetworkInterfaces and AWSAccounts but our data + only includes EC2Instance to AWSAccount information, structuring the ingestion query with sub- + queries allows us to build a query that will ignore the null relationships and build the ones that + exist. + """ attach_sub_resource_statement = _build_attach_sub_resource_statement(sub_resource_relationship) attach_additional_links_statement = _build_attach_additional_links_statement(other_relationships) From fa40b0952ed2593bc59966be882c141e487908ee Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 12 Dec 2022 08:30:28 -0800 Subject: [PATCH 17/27] Linter --- cartography/graph/querybuilder.py | 4 ++-- tests/data/graph/querybuilder/sample.py | 6 +++--- tests/data/graph/querybuilder/sample_model.py | 1 - .../cartography/graph/test_querybuilder.py | 20 ++++++++++++++++--- .../graph/test_querybuilder_complex.py | 6 +++--- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 8246a454c..b425ca1ee 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -262,7 +262,7 @@ def _build_attach_relationships_statement( WITH i, item $attach_relationships_statement } - """ + """, ) return query_template.safe_substitute(attach_relationships_statement=attach_relationships_statement) @@ -301,6 +301,6 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: attach_relationships_statement=_build_attach_relationships_statement( node_schema.sub_resource_relationship, node_schema.other_relationships, - ) + ), ) return ingest_query diff --git a/tests/data/graph/querybuilder/sample.py b/tests/data/graph/querybuilder/sample.py index 80c5e8840..03b922c4a 100644 --- a/tests/data/graph/querybuilder/sample.py +++ b/tests/data/graph/querybuilder/sample.py @@ -17,6 +17,6 @@ 'property2': 'c', 'AnotherField': 'd', 'YetAnotherRelField': 'e', - 'world_asset_id': 'the-worldasset-id-1' - } -] \ No newline at end of file + 'world_asset_id': 'the-worldasset-id-1', + }, +] diff --git a/tests/data/graph/querybuilder/sample_model.py b/tests/data/graph/querybuilder/sample_model.py index 29e936064..c47fadfb5 100644 --- a/tests/data/graph/querybuilder/sample_model.py +++ b/tests/data/graph/querybuilder/sample_model.py @@ -8,7 +8,6 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef - from cartography.graph.querybuilder import default_field from tests.unit.cartography.graph.test_querybuilder_simple import SimpleNodeProperties diff --git a/tests/integration/cartography/graph/test_querybuilder.py b/tests/integration/cartography/graph/test_querybuilder.py index b834527a7..010f263de 100644 --- a/tests/integration/cartography/graph/test_querybuilder.py +++ b/tests/integration/cartography/graph/test_querybuilder.py @@ -1,6 +1,8 @@ from cartography.client.core.tx import load_graph_data from cartography.graph.querybuilder import build_ingestion_query -from tests.data.graph.querybuilder.sample import MERGE_SUB_RESOURCE_QUERY, MERGE_WORLD_ASSET_QUERY, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS +from tests.data.graph.querybuilder.sample import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS +from tests.data.graph.querybuilder.sample import MERGE_SUB_RESOURCE_QUERY +from tests.data.graph.querybuilder.sample import MERGE_WORLD_ASSET_QUERY from tests.data.graph.querybuilder.sample_model import InterestingAssetSchema @@ -20,7 +22,13 @@ def test_load_graph_data_subset_of_relationships_1(neo4j_session): # Act query = build_ingestion_query(InterestingAssetSchema()) - load_graph_data(neo4j_session, query, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, lastupdated=1, sub_resource_id='sub-resource-id') + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + lastupdated=1, + sub_resource_id='sub-resource-id', + ) # Assert that the InterestingNode to SubResource relationship exists expected = { @@ -76,7 +84,13 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session # Act query = build_ingestion_query(InterestingAssetSchema()) - load_graph_data(neo4j_session, query, INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, lastupdated=1, sub_resource_id='sub-resource-id') + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + lastupdated=1, + sub_resource_id='sub-resource-id', + ) # Assert that the InterestingNode to SubResource relationship exists expected = { diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index f7f29d06f..06248b5ec 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -16,7 +16,7 @@ def test_build_ingestion_query_complex(): i.property1 = item.property1, i.property2 = item.property2, i:AnotherNodeLabel:YetAnotherNodeLabel - + WITH i, item CALL { WITH i, item @@ -27,7 +27,7 @@ def test_build_ingestion_query_complex(): r.lastupdated = $lastupdated, r.another_rel_field = item.AnotherField, r.yet_another_rel_field = item.YetAnotherRelField - + UNION WITH i, item OPTIONAL MATCH (n0:HelloAsset{id: item.hello_asset_id}) @@ -36,7 +36,7 @@ def test_build_ingestion_query_complex(): ON CREATE SET r0.firstseen = timestamp() SET r0.lastupdated = $lastupdated - + UNION WITH i, item OPTIONAL MATCH (n1:WorldAsset{id: item.world_asset_id}) From 80b6ff5046f8832835ad9d08c89671a646559de1 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 12 Dec 2022 16:21:38 -0800 Subject: [PATCH 18/27] Support matching on one or more properties --- cartography/graph/model.py | 23 +++++---- cartography/graph/querybuilder.py | 26 +++++++--- cartography/intel/aws/emr.py | 6 +-- .../querybuilder/sample_data/__init__.py | 0 .../sample_data/multiple_attr_match.py | 37 ++++++++++++++ .../partial_relationships.py} | 0 .../querybuilder/sample_models/__init__.py | 0 .../interesting_asset.py} | 16 +++--- .../sample_models/multiple_attr_match.py | 51 +++++++++++++++++++ .../querybuilder/sample_models/simple_node.py | 49 ++++++++++++++++++ ...st_querybuilder_match_on_multiple_attrs.py | 41 +++++++++++++++ ...er.py => test_querybuilder_rel_subsets.py} | 37 +++++++------- .../graph/test_querybuilder_complex.py | 5 +- .../graph/test_querybuilder_simple.py | 49 ++---------------- 14 files changed, 246 insertions(+), 94 deletions(-) create mode 100644 tests/data/graph/querybuilder/sample_data/__init__.py create mode 100644 tests/data/graph/querybuilder/sample_data/multiple_attr_match.py rename tests/data/graph/querybuilder/{sample.py => sample_data/partial_relationships.py} (100%) create mode 100644 tests/data/graph/querybuilder/sample_models/__init__.py rename tests/data/graph/querybuilder/{sample_model.py => sample_models/interesting_asset.py} (86%) create mode 100644 tests/data/graph/querybuilder/sample_models/multiple_attr_match.py create mode 100644 tests/data/graph/querybuilder/sample_models/simple_node.py create mode 100644 tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py rename tests/integration/cartography/graph/{test_querybuilder.py => test_querybuilder_rel_subsets.py} (68%) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 8d8615ef7..6d3501244 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -3,6 +3,7 @@ from dataclasses import field from enum import auto from enum import Enum +from typing import Dict from typing import List from typing import Optional @@ -109,6 +110,8 @@ class CartographyRelSchema(abc.ABC): The CartographyRelSchema contains properties that make it possible to connect the CartographyNodeSchema to other existing nodes in the graph. """ + _target_node_key_refs: Dict[str, PropertyRef] = field(init=False) + @property @abc.abstractmethod def properties(self) -> CartographyRelProperties: @@ -126,21 +129,21 @@ def target_node_label(self) -> str: pass @property - @abc.abstractmethod - def target_node_key(self) -> str: + def target_node_key_refs(self) -> Dict[str, PropertyRef]: """ - :return: The attribute name on the target_node_label used to uniquely identify what node to connect to. + :return: A dict mapping + From: one or more attribute names on the target_node_label used to uniquely identify what node to connect to. + To: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a + PropertyRef. """ - pass + return self._target_node_key_refs - @property - @abc.abstractmethod - def target_node_key_property_ref(self) -> PropertyRef: + @target_node_key_refs.setter + def target_node_key_refs(self, key_refs: Dict[str, PropertyRef]) -> None: """ - :return: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a - PropertyRef. + Boilerplate setter function used to keep typehints happy. """ - pass + self._target_node_key_refs = key_refs @property @abc.abstractmethod diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index b425ca1ee..2ad7d9c2f 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -116,6 +116,20 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dic return set_clause +def _build_match_clause(key_refs: Dict[str, PropertyRef]) -> str: + """ + Generate a Neo4j match statement on one or more keys and values for a given node. + """ + if not key_refs: + raise ValueError( + "Failed to create match clause because key_refs is Falsy. Please make sure that the `target_node_key_refs` " + "field on all subclasses of CartographyRelSchema are properly defined.", + ) + + match = Template("$Key: $PropRef") + return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in key_refs.items()) + + def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema] = None) -> str: """ Generates a Neo4j statement to attach a sub resource to a node. A 'sub resource' is a term we made up to describe @@ -134,7 +148,8 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography sub_resource_attach_template = Template( """ - OPTIONAL MATCH (j:$SubResourceLabel{$SubResourceKey: $SubResourceRef}) + OPTIONAL MATCH (j:$SubResourceLabel{$MatchClause}) + WITH i, item, j WHERE j IS NOT NULL $RelMergeClause ON CREATE SET r.firstseen = timestamp() SET @@ -153,8 +168,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( SubResourceLabel=sub_resource_link.target_node_label, - SubResourceKey=sub_resource_link.target_node_key, - SubResourceRef=sub_resource_link.target_node_key_property_ref, + MatchClause=_build_match_clause(sub_resource_link.target_node_key_refs), RelMergeClause=rel_merge_clause, SubResourceRelLabel=sub_resource_link.rel_label, set_rel_properties_statement=_build_rel_properties_statement('r', rel_props_as_dict), @@ -177,11 +191,10 @@ def _build_attach_additional_links_statement( if not additional_relationships: return '' - # TODO - support matching on multiple properties additional_links_template = Template( """ WITH i, item - OPTIONAL MATCH ($node_var:$AddlLabel{$AddlKey: $AddlRef}) + OPTIONAL MATCH ($node_var:$AddlLabel{$MatchClause}) WITH i, item, $node_var WHERE $node_var IS NOT NULL $RelMerge ON CREATE SET $rel_var.firstseen = timestamp() @@ -221,8 +234,7 @@ def _build_attach_additional_links_statement( additional_ref = additional_links_template.safe_substitute( AddlLabel=link.target_node_label, - AddlKey=link.target_node_key, - AddlRef=link.target_node_key_property_ref, + MatchClause=_build_match_clause(link.target_node_key_refs), node_var=node_var, rel_var=rel_var, RelMerge=rel_merge, diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 5787dd978..a90f4b7e0 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -16,6 +16,7 @@ from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef from cartography.graph.querybuilder import build_ingestion_query +from cartography.graph.querybuilder import default_field from cartography.intel.aws.ec2.util import get_botocore_config from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job @@ -93,8 +94,7 @@ class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): # (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('AccountId', set_in_kwargs=True) + target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('AccountId', set_in_kwargs=True)}) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RESOURCE" properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() @@ -104,7 +104,7 @@ class EMRClusterToAWSAccount(CartographyRelSchema): class EMRClusterSchema(CartographyNodeSchema): label: str = 'EMRCluster' properties: EMRClusterNodeProperties = EMRClusterNodeProperties() - sub_resource_relationship: CartographyRelSchema = EMRClusterToAWSAccount() + sub_resource_relationship: EMRClusterToAWSAccount = EMRClusterToAWSAccount() @timeit diff --git a/tests/data/graph/querybuilder/sample_data/__init__.py b/tests/data/graph/querybuilder/sample_data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/graph/querybuilder/sample_data/multiple_attr_match.py b/tests/data/graph/querybuilder/sample_data/multiple_attr_match.py new file mode 100644 index 000000000..1d137369e --- /dev/null +++ b/tests/data/graph/querybuilder/sample_data/multiple_attr_match.py @@ -0,0 +1,37 @@ +MERGE_PERSONS = """ +MERGE (s1:Person{id: 1, first_name: "Homer", last_name: "Simpson", lastupdated: 1}) +MERGE (s2:Person{id: 2, first_name: "Marge", last_name: "Simpson", lastupdated: 1}) +MERGE (s3:Person{id: 3, first_name: "Bart", last_name: "Simpson", lastupdated: 1}) +MERGE (s4:Person{id: 4, first_name: "Lisa", last_name: "Simpson", lastupdated: 1}) +MERGE (s5:Person{id: 5, first_name: "Maggie", last_name: "Simpson", lastupdated: 1}) +""" + + +# This is intended to test matching on more than one attribute. +# Lisa has 1 computer, Homer has 2, everyone else has no computers. +TEST_COMPUTERS = [ + { + 'Id': 1234, + 'RAM_GB': 16, + 'NumCores': 4, + 'name': 'macbook-air', + 'LastName': 'Simpson', + 'FirstName': "Lisa", + }, + { + 'Id': 9876, + 'RAM_GB': 128, + 'NumCores': 32, + 'name': 'server-in-the-closet', + 'LastName': 'Simpson', + 'FirstName': "Homer", + }, + { + 'Id': 1337, + 'RAM_GB': 2048, + 'NumCores': 1024, + 'name': 'beefy-box', + 'LastName': 'Simpson', + 'FirstName': "Homer", + }, +] diff --git a/tests/data/graph/querybuilder/sample.py b/tests/data/graph/querybuilder/sample_data/partial_relationships.py similarity index 100% rename from tests/data/graph/querybuilder/sample.py rename to tests/data/graph/querybuilder/sample_data/partial_relationships.py diff --git a/tests/data/graph/querybuilder/sample_models/__init__.py b/tests/data/graph/querybuilder/sample_models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/data/graph/querybuilder/sample_model.py b/tests/data/graph/querybuilder/sample_models/interesting_asset.py similarity index 86% rename from tests/data/graph/querybuilder/sample_model.py rename to tests/data/graph/querybuilder/sample_models/interesting_asset.py index c47fadfb5..d85fa17fb 100644 --- a/tests/data/graph/querybuilder/sample_model.py +++ b/tests/data/graph/querybuilder/sample_models/interesting_asset.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Dict from typing import List from typing import Optional @@ -9,7 +10,7 @@ from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef from cartography.graph.querybuilder import default_field -from tests.unit.cartography.graph.test_querybuilder_simple import SimpleNodeProperties +from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeProperties @dataclass @@ -34,8 +35,9 @@ class InterestingAssetToSubResourceRel(CartographyRelSchema): (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) """ target_node_label: str = 'SubResource' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) + target_node_key_refs: Dict[str, PropertyRef] = default_field( + {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, + ) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RELATIONSHIP_LABEL" properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() @@ -53,8 +55,7 @@ class InterestingAssetToHelloAssetRel(CartographyRelSchema): (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) """ target_node_label: str = 'HelloAsset' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('hello_asset_id') + target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('hello_asset_id')}) direction: LinkDirection = LinkDirection.OUTWARD rel_label: str = "ASSOCIATED_WITH" properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() @@ -72,8 +73,7 @@ class InterestingAssetToWorldAssetRel(CartographyRelSchema): (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) """ target_node_label: str = 'WorldAsset' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('world_asset_id') + target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('world_asset_id')}) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "CONNECTED" properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() @@ -82,7 +82,7 @@ class InterestingAssetToWorldAssetRel(CartographyRelSchema): @dataclass class InterestingAssetSchema(CartographyNodeSchema): extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) - label: str = 'InterestingNode' + label: str = 'InterestingAsset' properties: SimpleNodeProperties = SimpleNodeProperties() sub_resource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() other_relationships: Optional[List[CartographyRelSchema]] = default_field( diff --git a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py new file mode 100644 index 000000000..c24d1d1e5 --- /dev/null +++ b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py @@ -0,0 +1,51 @@ +from dataclasses import dataclass +from typing import Dict +from typing import List +from typing import Optional + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef +from cartography.graph.querybuilder import default_field + + +@dataclass +class TestComputerToPersonRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass +class TestComputerToPersonRel(CartographyRelSchema): + """ + (:TestComputer)<-[:OWNS]-(:Person) + """ + target_node_label: str = 'Person' + target_node_key_refs: Dict[str, PropertyRef] = default_field( + { + 'first_name': PropertyRef('FirstName'), + 'last_name': PropertyRef('LastName'), + }, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "OWNS" + properties: TestComputerToPersonRelProps = TestComputerToPersonRelProps() + + +# Test defining a simple node with no relationships. +@dataclass +class TestComputerProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + ram_gb: PropertyRef = PropertyRef('RAM_GB') + num_cores: PropertyRef = PropertyRef('NumCores') + name: PropertyRef = PropertyRef('name') + + +@dataclass +class TestComputer(CartographyNodeSchema): + label: str = 'TestComputer' + properties: TestComputerProperties = TestComputerProperties() + other_relationships: Optional[List[CartographyRelSchema]] = default_field([TestComputerToPersonRel()]) diff --git a/tests/data/graph/querybuilder/sample_models/simple_node.py b/tests/data/graph/querybuilder/sample_models/simple_node.py new file mode 100644 index 000000000..648be95fe --- /dev/null +++ b/tests/data/graph/querybuilder/sample_models/simple_node.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass +from typing import Dict + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import PropertyRef +from cartography.graph.querybuilder import default_field + + +# Test defining a simple node with no relationships. +@dataclass +class SimpleNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + property1: PropertyRef = PropertyRef('property1') + property2: PropertyRef = PropertyRef('property2') + + +@dataclass +class SimpleNodeSchema(CartographyNodeSchema): + label: str = 'SimpleNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + + +# Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) +@dataclass +class SimpleNodeToSubResourceRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass +class SimpleNodeToSubResourceRel(CartographyRelSchema): + target_node_label: str = 'SubResource' + target_node_key_refs: Dict[str, PropertyRef] = default_field( + {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, + ) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "RELATIONSHIP_LABEL" + properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() + + +@dataclass +class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): + label: str = 'SimpleNode' + properties: SimpleNodeProperties = SimpleNodeProperties() + sub_resource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() diff --git a/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py b/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py new file mode 100644 index 000000000..92641f477 --- /dev/null +++ b/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py @@ -0,0 +1,41 @@ +from cartography.client.core.tx import load_graph_data +from cartography.graph.querybuilder import build_ingestion_query +from tests.data.graph.querybuilder.sample_data.multiple_attr_match import MERGE_PERSONS +from tests.data.graph.querybuilder.sample_data.multiple_attr_match import TEST_COMPUTERS +from tests.data.graph.querybuilder.sample_models.multiple_attr_match import TestComputer + + +def test_load_graph_data_subset_of_relationships(neo4j_session): + """ + Test load_graph_data() if we have a relationship that matches on more than one attribute. + + In this test case, Persons can OWN TestComputers, and this assignment is made based on both first_name and + last_name. + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_PERSONS) + + # Act + query = build_ingestion_query(TestComputer()) + load_graph_data( + neo4j_session, + query, + TEST_COMPUTERS, + lastupdated=1, + ) + + # Assert that Homer has 2 computers and Lisa has 1 computer + expected = { + ('server-in-the-closet', 'Homer'), + ('beefy-box', 'Homer'), + ('macbook-air', 'Lisa'), + } + result = neo4j_session.run( + """ + MATCH (n1:TestComputer)<-[:OWNS]-(n2:Person) RETURN n1.name, n2.first_name; + """, + ) + actual = { + (r['n1.name'], r['n2.first_name']) for r in result + } + assert actual == expected diff --git a/tests/integration/cartography/graph/test_querybuilder.py b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py similarity index 68% rename from tests/integration/cartography/graph/test_querybuilder.py rename to tests/integration/cartography/graph/test_querybuilder_rel_subsets.py index 010f263de..9f824b1b3 100644 --- a/tests/integration/cartography/graph/test_querybuilder.py +++ b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py @@ -1,12 +1,12 @@ from cartography.client.core.tx import load_graph_data from cartography.graph.querybuilder import build_ingestion_query -from tests.data.graph.querybuilder.sample import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS -from tests.data.graph.querybuilder.sample import MERGE_SUB_RESOURCE_QUERY -from tests.data.graph.querybuilder.sample import MERGE_WORLD_ASSET_QUERY -from tests.data.graph.querybuilder.sample_model import InterestingAssetSchema +from tests.data.graph.querybuilder.sample_data.partial_relationships import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS +from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_SUB_RESOURCE_QUERY +from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_WORLD_ASSET_QUERY +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema -def test_load_graph_data_subset_of_relationships_1(neo4j_session): +def test_load_graph_data_subset_of_relationships(neo4j_session): """ Test load_graph_data() if a schema defines multiple relationships but only a subset of them are present in our data. @@ -30,13 +30,13 @@ def test_load_graph_data_subset_of_relationships_1(neo4j_session): sub_resource_id='sub-resource-id', ) - # Assert that the InterestingNode to SubResource relationship exists + # Assert that the InterestingAsset to SubResource relationship exists expected = { ('interesting-node-id', 'sub-resource-id'), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + MATCH (n1:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; """, ) actual = { @@ -44,13 +44,13 @@ def test_load_graph_data_subset_of_relationships_1(neo4j_session): } assert actual == expected - # Assert that the InterestingNode to HelloAsset relationship does NOT exist + # Assert that the InterestingAsset to HelloAsset relationship does NOT exist expected = { ('interesting-node-id', None), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode) + MATCH (n1:InterestingAsset) OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) RETURN n1.id, n2.id; """, @@ -60,13 +60,13 @@ def test_load_graph_data_subset_of_relationships_1(neo4j_session): } assert actual == expected - # Assert that the InterestingNode to WorldAsset relationship exists + # Assert that the InterestingAsset to WorldAsset relationship exists expected = { ('interesting-node-id', 'the-worldasset-id-1'), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; + MATCH (n1:InterestingAsset)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; """, ) actual = { @@ -79,7 +79,8 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session """ In this test case, our test data only includes the sub resource relationship """ - # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + # Arrange: add (:SubResource{id:sub-resource-id}) + neo4j_session.run("MATCH (n) DETACH DELETE n;") neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) # Act @@ -92,13 +93,13 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session sub_resource_id='sub-resource-id', ) - # Assert that the InterestingNode to SubResource relationship exists + # Assert that the InterestingAsset to SubResource relationship exists expected = { ('interesting-node-id', 'sub-resource-id'), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + MATCH (n1:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; """, ) actual = { @@ -106,13 +107,13 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session } assert actual == expected - # Assert that the InterestingNode to HelloAsset relationship does NOT exist + # Assert that the InterestingAsset to HelloAsset relationship does NOT exist expected = { ('interesting-node-id', None), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode) + MATCH (n1:InterestingAsset) OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) RETURN n1.id, n2.id; """, @@ -122,13 +123,13 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session } assert actual == expected - # Assert that the InterestingNode to WorldAsset relationship does NOT exist + # Assert that the InterestingAsset to WorldAsset relationship does NOT exist expected = { ('interesting-node-id', None), } result = neo4j_session.run( """ - MATCH (n1:InterestingNode) + MATCH (n1:InterestingAsset) OPTIONAL MATCH (n1)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; """, diff --git a/tests/unit/cartography/graph/test_querybuilder_complex.py b/tests/unit/cartography/graph/test_querybuilder_complex.py index 06248b5ec..acdafe083 100644 --- a/tests/unit/cartography/graph/test_querybuilder_complex.py +++ b/tests/unit/cartography/graph/test_querybuilder_complex.py @@ -1,5 +1,5 @@ from cartography.graph.querybuilder import build_ingestion_query -from tests.data.graph.querybuilder.sample_model import InterestingAssetSchema +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines @@ -9,7 +9,7 @@ def test_build_ingestion_query_complex(): expected = """ UNWIND $DictList AS item - MERGE (i:InterestingNode{id: item.Id}) + MERGE (i:InterestingAsset{id: item.Id}) ON CREATE SET i.firstseen = timestamp() SET i.lastupdated = $lastupdated, @@ -21,6 +21,7 @@ def test_build_ingestion_query_complex(): CALL { WITH i, item OPTIONAL MATCH (j:SubResource{id: $sub_resource_id}) + WITH i, item, j WHERE j IS NOT NULL MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) ON CREATE SET r.firstseen = timestamp() SET diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py index da5cf846b..c989b3727 100644 --- a/tests/unit/cartography/graph/test_querybuilder_simple.py +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -1,53 +1,9 @@ -from dataclasses import dataclass - -from cartography.graph.model import CartographyNodeProperties -from cartography.graph.model import CartographyNodeSchema -from cartography.graph.model import CartographyRelProperties -from cartography.graph.model import CartographyRelSchema -from cartography.graph.model import LinkDirection -from cartography.graph.model import PropertyRef from cartography.graph.querybuilder import build_ingestion_query +from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeSchema +from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeWithSubResourceSchema from tests.unit.cartography.graph.helpers import remove_leading_whitespace_and_empty_lines -# Test defining a simple node with no relationships. -@dataclass -class SimpleNodeProperties(CartographyNodeProperties): - id: PropertyRef = PropertyRef('Id') - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - property1: PropertyRef = PropertyRef('property1') - property2: PropertyRef = PropertyRef('property2') - - -@dataclass -class SimpleNodeSchema(CartographyNodeSchema): - label: str = 'SimpleNode' - properties: SimpleNodeProperties = SimpleNodeProperties() - - -# Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) -@dataclass -class SimpleNodeToSubResourceRelProps(CartographyRelProperties): - lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) - - -@dataclass -class SimpleNodeToSubResourceRel(CartographyRelSchema): - target_node_label: str = 'SubResource' - target_node_key: str = 'id' - target_node_key_property_ref: PropertyRef = PropertyRef('sub_resource_id', set_in_kwargs=True) - direction: LinkDirection = LinkDirection.INWARD - rel_label: str = "RELATIONSHIP_LABEL" - properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() - - -@dataclass -class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): - label: str = 'SimpleNode' - properties: SimpleNodeProperties = SimpleNodeProperties() - sub_resource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() - - def test_simplenode_sanity_checks(): """ Test creating a simple node schema with no relationships. @@ -89,6 +45,7 @@ def test_build_ingestion_query_with_sub_resource(): CALL { WITH i, item OPTIONAL MATCH (j:SubResource{id: $sub_resource_id}) + WITH i, item, j WHERE j IS NOT NULL MERGE (i)<-[r:RELATIONSHIP_LABEL]-(j) ON CREATE SET r.firstseen = timestamp() SET From 346ad50ccfbc01386540cbe5440879cf0f08ecb0 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Mon, 12 Dec 2022 16:25:54 -0800 Subject: [PATCH 19/27] Correctly name test --- .../graph/test_querybuilder_match_on_multiple_attrs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py b/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py index 92641f477..04da06855 100644 --- a/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py +++ b/tests/integration/cartography/graph/test_querybuilder_match_on_multiple_attrs.py @@ -5,7 +5,7 @@ from tests.data.graph.querybuilder.sample_models.multiple_attr_match import TestComputer -def test_load_graph_data_subset_of_relationships(neo4j_session): +def test_load_graph_data_match_on_multiple_attrs(neo4j_session): """ Test load_graph_data() if we have a relationship that matches on more than one attribute. From 2e65877f9426b5c18d08e4455174d27cb75180f1 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Tue, 13 Dec 2022 16:41:02 -0800 Subject: [PATCH 20/27] Change key_refs to TargetNodeMatcher to enforce it as a mandatory field --- cartography/graph/model.py | 20 +++++++++---------- cartography/graph/querybuilder.py | 13 ++++++------ cartography/intel/aws/emr.py | 5 ++++- .../sample_models/interesting_asset.py | 8 ++++---- .../sample_models/multiple_attr_match.py | 4 ++-- .../querybuilder/sample_models/simple_node.py | 5 ++--- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 6d3501244..7ad34ded4 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -102,6 +102,11 @@ def __post_init__(self): raise TypeError("Cannot instantiate abstract class.") +@dataclass +class TargetNodeMatcher: + key_refs: Dict[str, PropertyRef] + + @dataclass class CartographyRelSchema(abc.ABC): """ @@ -110,8 +115,6 @@ class CartographyRelSchema(abc.ABC): The CartographyRelSchema contains properties that make it possible to connect the CartographyNodeSchema to other existing nodes in the graph. """ - _target_node_key_refs: Dict[str, PropertyRef] = field(init=False) - @property @abc.abstractmethod def properties(self) -> CartographyRelProperties: @@ -129,21 +132,16 @@ def target_node_label(self) -> str: pass @property - def target_node_key_refs(self) -> Dict[str, PropertyRef]: + @abc.abstractmethod + def target_node_matcher(self) -> TargetNodeMatcher: """ :return: A dict mapping + TODO update this docstring From: one or more attribute names on the target_node_label used to uniquely identify what node to connect to. To: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a PropertyRef. """ - return self._target_node_key_refs - - @target_node_key_refs.setter - def target_node_key_refs(self, key_refs: Dict[str, PropertyRef]) -> None: - """ - Boilerplate setter function used to keep typehints happy. - """ - self._target_node_key_refs = key_refs + pass @property @abc.abstractmethod diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 2ad7d9c2f..a476acf38 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -13,6 +13,7 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef +from cartography.graph.model import TargetNodeMatcher logger = logging.getLogger(__name__) @@ -116,18 +117,18 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dic return set_clause -def _build_match_clause(key_refs: Dict[str, PropertyRef]) -> str: +def _build_match_clause(matcher: TargetNodeMatcher) -> str: """ Generate a Neo4j match statement on one or more keys and values for a given node. """ - if not key_refs: + if not matcher.key_refs: raise ValueError( - "Failed to create match clause because key_refs is Falsy. Please make sure that the `target_node_key_refs` " + "Failed to create match clause because key_refs is Falsy. Please make sure that the `target_node_matcher` " "field on all subclasses of CartographyRelSchema are properly defined.", ) match = Template("$Key: $PropRef") - return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in key_refs.items()) + return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher.key_refs.items()) def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema] = None) -> str: @@ -168,7 +169,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( SubResourceLabel=sub_resource_link.target_node_label, - MatchClause=_build_match_clause(sub_resource_link.target_node_key_refs), + MatchClause=_build_match_clause(sub_resource_link.target_node_matcher), RelMergeClause=rel_merge_clause, SubResourceRelLabel=sub_resource_link.rel_label, set_rel_properties_statement=_build_rel_properties_statement('r', rel_props_as_dict), @@ -234,7 +235,7 @@ def _build_attach_additional_links_statement( additional_ref = additional_links_template.safe_substitute( AddlLabel=link.target_node_label, - MatchClause=_build_match_clause(link.target_node_key_refs), + MatchClause=_build_match_clause(link.target_node_matcher), node_var=node_var, rel_var=rel_var, RelMerge=rel_merge, diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index a90f4b7e0..1bc5e8d9d 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -15,6 +15,7 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef +from cartography.graph.model import TargetNodeMatcher from cartography.graph.querybuilder import build_ingestion_query from cartography.graph.querybuilder import default_field from cartography.intel.aws.ec2.util import get_botocore_config @@ -94,7 +95,9 @@ class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): # (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' - target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('AccountId', set_in_kwargs=True)}) + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( + {'id': PropertyRef('AccountId', set_in_kwargs=True)}, + ) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "RESOURCE" properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() diff --git a/tests/data/graph/querybuilder/sample_models/interesting_asset.py b/tests/data/graph/querybuilder/sample_models/interesting_asset.py index d85fa17fb..55515573f 100644 --- a/tests/data/graph/querybuilder/sample_models/interesting_asset.py +++ b/tests/data/graph/querybuilder/sample_models/interesting_asset.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict from typing import List from typing import Optional @@ -9,6 +8,7 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef +from cartography.graph.model import TargetNodeMatcher from cartography.graph.querybuilder import default_field from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeProperties @@ -35,7 +35,7 @@ class InterestingAssetToSubResourceRel(CartographyRelSchema): (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) """ target_node_label: str = 'SubResource' - target_node_key_refs: Dict[str, PropertyRef] = default_field( + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, ) direction: LinkDirection = LinkDirection.INWARD @@ -55,7 +55,7 @@ class InterestingAssetToHelloAssetRel(CartographyRelSchema): (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) """ target_node_label: str = 'HelloAsset' - target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('hello_asset_id')}) + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher({'id': PropertyRef('hello_asset_id')}) direction: LinkDirection = LinkDirection.OUTWARD rel_label: str = "ASSOCIATED_WITH" properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() @@ -73,7 +73,7 @@ class InterestingAssetToWorldAssetRel(CartographyRelSchema): (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) """ target_node_label: str = 'WorldAsset' - target_node_key_refs: Dict[str, PropertyRef] = default_field({'id': PropertyRef('world_asset_id')}) + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher({'id': PropertyRef('world_asset_id')}) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "CONNECTED" properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() diff --git a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py index c24d1d1e5..342d8e3ce 100644 --- a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py +++ b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict from typing import List from typing import Optional @@ -9,6 +8,7 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef +from cartography.graph.model import TargetNodeMatcher from cartography.graph.querybuilder import default_field @@ -23,7 +23,7 @@ class TestComputerToPersonRel(CartographyRelSchema): (:TestComputer)<-[:OWNS]-(:Person) """ target_node_label: str = 'Person' - target_node_key_refs: Dict[str, PropertyRef] = default_field( + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( { 'first_name': PropertyRef('FirstName'), 'last_name': PropertyRef('LastName'), diff --git a/tests/data/graph/querybuilder/sample_models/simple_node.py b/tests/data/graph/querybuilder/sample_models/simple_node.py index 648be95fe..5ec7d3aec 100644 --- a/tests/data/graph/querybuilder/sample_models/simple_node.py +++ b/tests/data/graph/querybuilder/sample_models/simple_node.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import Dict from cartography.graph.model import CartographyNodeProperties from cartography.graph.model import CartographyNodeSchema @@ -7,7 +6,7 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection from cartography.graph.model import PropertyRef -from cartography.graph.querybuilder import default_field +from cartography.graph.model import TargetNodeMatcher # Test defining a simple node with no relationships. @@ -34,7 +33,7 @@ class SimpleNodeToSubResourceRelProps(CartographyRelProperties): @dataclass class SimpleNodeToSubResourceRel(CartographyRelSchema): target_node_label: str = 'SubResource' - target_node_key_refs: Dict[str, PropertyRef] = default_field( + target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, ) direction: LinkDirection = LinkDirection.INWARD From cb233c30c1d3bd5ffa1a4b1a234c1246f96b45b3 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Tue, 13 Dec 2022 17:34:55 -0800 Subject: [PATCH 21/27] Remove use of hacky default_field() --- cartography/graph/model.py | 73 +++++++++---------- cartography/graph/querybuilder.py | 50 ++++--------- cartography/intel/aws/emr.py | 1 - .../sample_models/interesting_asset.py | 8 +- .../sample_models/multiple_attr_match.py | 5 +- .../graph/test_querybuilder_simple.py | 4 +- 6 files changed, 56 insertions(+), 85 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 7ad34ded4..b680c23df 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -39,8 +39,8 @@ class PropertyRef: cartography takes lists of Python dicts and loads them to Neo4j. PropertyRefs allow our dynamically generated Neo4j ingestion queries to set values for a given node or relationship property from (A) a field on the dict being - processed (PropertyRef. set_in_kwargs =False, default), or (B) from a single variable provided by a keyword argument - (PropertyRef. set_in_kwargs =True). + processed (PropertyRef.set_in_kwargs=False, default), or (B) from a single variable provided by a keyword argument + (PropertyRef.set_in_kwargs=True). """ def __init__(self, name: str, set_in_kwargs=False): @@ -102,8 +102,16 @@ def __post_init__(self): raise TypeError("Cannot instantiate abstract class.") -@dataclass +@dataclass(frozen=True) class TargetNodeMatcher: + """ + Dataclass used to encapsulate the following mapping: + Keys: one or more attribute names on the target_node_label used to uniquely identify what node to connect to. + Values: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a + PropertyRef. + This is needed because we need to include this in the CartographyRelSchema dataclass but dicts are mutable, so we + need to do this wrapping. + """ key_refs: Dict[str, PropertyRef] @@ -135,11 +143,7 @@ def target_node_label(self) -> str: @abc.abstractmethod def target_node_matcher(self) -> TargetNodeMatcher: """ - :return: A dict mapping - TODO update this docstring - From: one or more attribute names on the target_node_label used to uniquely identify what node to connect to. - To: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a - PropertyRef. + :return: A TargetNodeMatcher object used to find what node(s) to attach the relationship to. """ pass @@ -160,23 +164,30 @@ def direction(self) -> LinkDirection: pass +@dataclass(frozen=True) +class OtherRelationships: + """ + Encapsulates a list of CartographyRelSchema. This is used to ensure dataclass immutability when composed as part of + a CartographyNodeSchema object. + """ + rels: List[CartographyRelSchema] + + +@dataclass(frozen=True) +class ExtraNodeLabels: + """ + Encapsulates a list of str representing additional labels for the CartographyNodeSchema that this is composed on. + This wrapping is used to ensure dataclass immutability for the CartographyNodeSchema. + """ + labels: List[str] + + @dataclass class CartographyNodeSchema(abc.ABC): """ Abstract base dataclass that represents a graph node in cartography. This is used to dynamically generate graph ingestion queries. - - A CartographyNodeSchema is composed of: - - - CartographyNodeProperties: contains the properties on the node and where to find their values with PropertyRef - objects. - - [Optional] A CartographyRelSchema pointing to the node's sub-resource (see the docstring on - `sub_resource_relationship` for details. - - [Optional] One or more other CartographyRelSchemas to other nodes. """ - _extra_labels: Optional[List[str]] = field(init=False, default=None) - _other_relationships: Optional[List[CartographyRelSchema]] = field(init=False, default=None) - @property @abc.abstractmethod def label(self) -> str: @@ -209,33 +220,19 @@ def sub_resource_relationship(self) -> Optional[CartographyRelSchema]: return None @property - def other_relationships(self) -> Optional[List[CartographyRelSchema]]: + def other_relationships(self) -> Optional[OtherRelationships]: """ Optional. Allows subclasses to specify additional cartography relationships on the node. - :return: None of not overriden. Else return a list of CartographyRelSchema associated with the node. - """ - return self._other_relationships - - @other_relationships.setter - def other_relationships(self, other_rels: List[CartographyRelSchema]) -> None: - """ - Boilerplate setter function used to keep typehints happy. + :return: None if not overriden. Else return an OtherRelationships object. """ - self._other_relationships = other_rels + return None @property - def extra_labels(self) -> Optional[List[str]]: + def extra_node_labels(self) -> Optional[ExtraNodeLabels]: """ Optional. Allows specifying extra labels on the node. :return: None if not overriden. Else return a str list of the extra labels specified on the node. """ - return self._extra_labels - - @extra_labels.setter - def extra_labels(self, labels: List[str]) -> None: - """ - Boilerplate setter function used to keep typehints happy. - """ - self._extra_labels = labels + return None diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index a476acf38..a34495c80 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -1,17 +1,15 @@ import logging -from copy import copy from dataclasses import asdict -from dataclasses import field from string import Template -from typing import Any from typing import Dict -from typing import List from typing import Optional from cartography.graph.model import CartographyNodeProperties from cartography.graph.model import CartographyNodeSchema from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import ExtraNodeLabels from cartography.graph.model import LinkDirection +from cartography.graph.model import OtherRelationships from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher @@ -19,28 +17,9 @@ logger = logging.getLogger(__name__) -def default_field(obj: Any): - """ - Helper function from https://stackoverflow.com/questions/52063759/passing-default-list-argument-to-dataclasses. - We use this so that we can work around how dataclass field default values disallow mutable objects (like Lists) by - wrapping them in lambdas. - - Put another way, writing `field(default_factory=lambda: ['Label1', 'Label2'])` is so much - more work than writing `default_field(['Label1', 'Label2']`. - - Note that if the Field is decorated with @property (like everything in our object model), then the dataclass needs - to also use this technique to keep typehints happy: - https://florimond.dev/en/posts/2018/10/reconciling-dataclasses-and-properties-in-python/. - - :param obj: The mutable default object (e.g. a List) that we want to set as a default for a dataclass field. - :return: A dataclass Field object. - """ - return field(default_factory=lambda: copy(obj)) - - def _build_node_properties_statement( node_property_map: Dict[str, PropertyRef], - node_extra_labels: Optional[List[str]] = None, + extra_node_labels: Optional[ExtraNodeLabels] = None, ) -> str: """ Generate a Neo4j clause that sets node properties using the given mapping of attribute names to PropertyRefs. @@ -62,7 +41,7 @@ def _build_node_properties_statement( ``` where `i` is a reference to the Neo4j node. :param node_property_map: Mapping of node attribute names as str to PropertyRef objects - :param node_extra_labels: Optional list of extra labels to set on the node as str + :param extra_node_labels: Optional ExtraNodeLabels object to set on the node as string :return: The resulting Neo4j SET clause to set the given attributes on the node """ ingest_fields_template = Template('i.$node_property = $property_ref') @@ -74,8 +53,8 @@ def _build_node_properties_statement( ]) # Set extra labels on the node if specified - if node_extra_labels: - extra_labels = ':'.join([label for label in node_extra_labels]) + if extra_node_labels: + extra_labels = ':'.join([label for label in extra_node_labels.labels]) set_clause += f",\n i:{extra_labels}" return set_clause @@ -121,12 +100,6 @@ def _build_match_clause(matcher: TargetNodeMatcher) -> str: """ Generate a Neo4j match statement on one or more keys and values for a given node. """ - if not matcher.key_refs: - raise ValueError( - "Failed to create match clause because key_refs is Falsy. Please make sure that the `target_node_matcher` " - "field on all subclasses of CartographyRelSchema are properly defined.", - ) - match = Template("$Key: $PropRef") return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher.key_refs.items()) @@ -178,7 +151,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography def _build_attach_additional_links_statement( - additional_relationships: Optional[List[CartographyRelSchema]] = None, + additional_relationships: Optional[OtherRelationships] = None, ) -> str: """ Generates a Neo4j statement to attach one or more CartographyRelSchemas to node(s) previously mentioned in the @@ -204,7 +177,7 @@ def _build_attach_additional_links_statement( """, ) links = [] - for num, link in enumerate(additional_relationships): + for num, link in enumerate(additional_relationships.rels): node_var = f"n{num}" rel_var = f"r{num}" @@ -248,7 +221,7 @@ def _build_attach_additional_links_statement( def _build_attach_relationships_statement( sub_resource_relationship: Optional[CartographyRelSchema], - other_relationships: Optional[List[CartographyRelSchema]], + other_relationships: Optional[OtherRelationships], ) -> str: """ Use Neo4j subqueries to attach sub resource and/or other relationships. @@ -310,7 +283,10 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: ingest_query = query_template.safe_substitute( node_label=node_schema.label, dict_id_field=node_props.id, - set_node_properties_statement=_build_node_properties_statement(node_props_as_dict, node_schema.extra_labels), + set_node_properties_statement=_build_node_properties_statement( + node_props_as_dict, + node_schema.extra_node_labels, + ), attach_relationships_statement=_build_attach_relationships_statement( node_schema.sub_resource_relationship, node_schema.other_relationships, diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 1bc5e8d9d..8941b39ac 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -17,7 +17,6 @@ from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher from cartography.graph.querybuilder import build_ingestion_query -from cartography.graph.querybuilder import default_field from cartography.intel.aws.ec2.util import get_botocore_config from cartography.util import aws_handle_regions from cartography.util import run_cleanup_job diff --git a/tests/data/graph/querybuilder/sample_models/interesting_asset.py b/tests/data/graph/querybuilder/sample_models/interesting_asset.py index 55515573f..e9a4b033b 100644 --- a/tests/data/graph/querybuilder/sample_models/interesting_asset.py +++ b/tests/data/graph/querybuilder/sample_models/interesting_asset.py @@ -1,15 +1,15 @@ from dataclasses import dataclass -from typing import List from typing import Optional from cartography.graph.model import CartographyNodeProperties from cartography.graph.model import CartographyNodeSchema from cartography.graph.model import CartographyRelProperties from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import ExtraNodeLabels from cartography.graph.model import LinkDirection +from cartography.graph.model import OtherRelationships from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher -from cartography.graph.querybuilder import default_field from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeProperties @@ -81,11 +81,11 @@ class InterestingAssetToWorldAssetRel(CartographyRelSchema): @dataclass class InterestingAssetSchema(CartographyNodeSchema): - extra_labels: Optional[List[str]] = default_field(['AnotherNodeLabel', 'YetAnotherNodeLabel']) + extra_node_labels: Optional[ExtraNodeLabels] = ExtraNodeLabels(['AnotherNodeLabel', 'YetAnotherNodeLabel']) label: str = 'InterestingAsset' properties: SimpleNodeProperties = SimpleNodeProperties() sub_resource_relationship: InterestingAssetToSubResourceRel = InterestingAssetToSubResourceRel() - other_relationships: Optional[List[CartographyRelSchema]] = default_field( + other_relationships: Optional[OtherRelationships] = OtherRelationships( [ InterestingAssetToHelloAssetRel(), InterestingAssetToWorldAssetRel(), diff --git a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py index 342d8e3ce..db5ca5831 100644 --- a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py +++ b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from typing import List from typing import Optional from cartography.graph.model import CartographyNodeProperties @@ -7,9 +6,9 @@ from cartography.graph.model import CartographyRelProperties from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection +from cartography.graph.model import OtherRelationships from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher -from cartography.graph.querybuilder import default_field @dataclass @@ -48,4 +47,4 @@ class TestComputerProperties(CartographyNodeProperties): class TestComputer(CartographyNodeSchema): label: str = 'TestComputer' properties: TestComputerProperties = TestComputerProperties() - other_relationships: Optional[List[CartographyRelSchema]] = default_field([TestComputerToPersonRel()]) + other_relationships: Optional[OtherRelationships] = OtherRelationships([TestComputerToPersonRel()]) diff --git a/tests/unit/cartography/graph/test_querybuilder_simple.py b/tests/unit/cartography/graph/test_querybuilder_simple.py index c989b3727..2e05fdb85 100644 --- a/tests/unit/cartography/graph/test_querybuilder_simple.py +++ b/tests/unit/cartography/graph/test_querybuilder_simple.py @@ -10,7 +10,7 @@ def test_simplenode_sanity_checks(): """ schema: SimpleNodeSchema = SimpleNodeSchema() # Assert that the unimplemented, non-abstract properties have None values. - assert schema.extra_labels is None + assert schema.extra_node_labels is None assert schema.sub_resource_relationship is None assert schema.other_relationships is None @@ -21,7 +21,7 @@ def test_simplenode_with_subresource_sanity_checks(): """ schema: SimpleNodeWithSubResourceSchema = SimpleNodeWithSubResourceSchema() # Assert that the unimplemented, non-abstract properties have None values. - assert schema.extra_labels is None + assert schema.extra_node_labels is None assert schema.other_relationships is None From 3be472b570799fdab362d6fc42bb6cd6046fe819 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 14 Dec 2022 00:15:10 -0800 Subject: [PATCH 22/27] Support subset of schema relationships for query generation, test multiple node labels --- cartography/graph/model.py | 21 +++- cartography/graph/querybuilder.py | 62 +++++++++- cartography/intel/aws/emr.py | 16 ++- .../sample_models/interesting_asset.py | 23 ++-- .../sample_models/multiple_attr_match.py | 11 +- .../querybuilder/sample_models/simple_node.py | 13 ++- ...st_querybuilder_labels_and_var_num_rels.py | 109 ++++++++++++++++++ 7 files changed, 217 insertions(+), 38 deletions(-) create mode 100644 tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py diff --git a/cartography/graph/model.py b/cartography/graph/model.py index b680c23df..84746523b 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -1,6 +1,7 @@ import abc from dataclasses import dataclass from dataclasses import field +from dataclasses import make_dataclass from enum import auto from enum import Enum from typing import Dict @@ -66,7 +67,7 @@ def __repr__(self) -> str: return f"item.{self.name}" if not self.set_in_kwargs else self._parameterize_name() -@dataclass +@dataclass(frozen=True) class CartographyNodeProperties(abc.ABC): """ Abstract base dataclass that represents the properties on a CartographyNodeSchema. This is intended to enforce that @@ -85,7 +86,7 @@ def __post_init__(self): raise TypeError("Cannot instantiate abstract class.") -@dataclass +@dataclass(frozen=True) class CartographyRelProperties(abc.ABC): """ Abstract class that represents the properties on a CartographyRelSchema. This is intended to enforce that all @@ -112,10 +113,11 @@ class TargetNodeMatcher: This is needed because we need to include this in the CartographyRelSchema dataclass but dicts are mutable, so we need to do this wrapping. """ - key_refs: Dict[str, PropertyRef] + pass + # key_refs: Dict[str, PropertyRef] -@dataclass +@dataclass(frozen=True) class CartographyRelSchema(abc.ABC): """ Abstract base dataclass that represents a cartography relationship. @@ -182,7 +184,7 @@ class ExtraNodeLabels: labels: List[str] -@dataclass +@dataclass(frozen=True) class CartographyNodeSchema(abc.ABC): """ Abstract base dataclass that represents a graph node in cartography. This is used to dynamically generate graph @@ -236,3 +238,12 @@ def extra_node_labels(self) -> Optional[ExtraNodeLabels]: :return: None if not overriden. Else return a str list of the extra labels specified on the node. """ return None + + +def make_target_node_matcher(key_ref_dict: Dict[str, PropertyRef]) -> TargetNodeMatcher: + fields = [(key, PropertyRef, field(default=prop_ref)) for key, prop_ref in key_ref_dict.items()] + return make_dataclass( + TargetNodeMatcher.__name__, + fields, + frozen=True, + )() diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index a34495c80..f0775add6 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -3,6 +3,8 @@ from string import Template from typing import Dict from typing import Optional +from typing import Set +from typing import Tuple from cartography.graph.model import CartographyNodeProperties from cartography.graph.model import CartographyNodeSchema @@ -101,7 +103,8 @@ def _build_match_clause(matcher: TargetNodeMatcher) -> str: Generate a Neo4j match statement on one or more keys and values for a given node. """ match = Template("$Key: $PropRef") - return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher.key_refs.items()) + matcher_asdict = asdict(matcher) + return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher_asdict.items()) def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema] = None) -> str: @@ -232,6 +235,9 @@ def _build_attach_relationships_statement( queries allows us to build a query that will ignore the null relationships and build the ones that exist. """ + if not sub_resource_relationship and not other_relationships: + return "" + attach_sub_resource_statement = _build_attach_sub_resource_statement(sub_resource_relationship) attach_additional_links_statement = _build_attach_additional_links_statement(other_relationships) @@ -253,11 +259,52 @@ def _build_attach_relationships_statement( return query_template.safe_substitute(attach_relationships_statement=attach_relationships_statement) -def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: +def _filter_selected_relationships( + node_schema: CartographyNodeSchema, + selected_relationships: Set[CartographyRelSchema], +) -> Tuple[Optional[CartographyRelSchema], Optional[OtherRelationships]]: + # Empty set means no relationships are selected + if selected_relationships == set(): + return None, None + + all_rels_on_node = {node_schema.sub_resource_relationship} + if node_schema.other_relationships: + for rel in node_schema.other_relationships.rels: + all_rels_on_node.add(rel) + + # Ensure that the selected_relationships are actually present on the node_schema. + for selected_rel in selected_relationships: + if selected_rel not in all_rels_on_node: + raise ValueError( + f"build_ingestion_query() failed: CartographyRelSchema {selected_rel.__class__.__name__} is not " + f"defined on CartographyNodeSchema type {node_schema.__class__.__name__}. Please verify the " + f"value of `selected_relationships` passed to `build_ingestion_query()`.", + ) + + sub_resource_rel = node_schema.sub_resource_relationship + if sub_resource_rel not in selected_relationships: + sub_resource_rel = None + + # By this point, everything in selected_relationships is validated to be present in node_schema + filtered_other_rels = OtherRelationships([rel for rel in selected_relationships if rel != sub_resource_rel]) + + return sub_resource_rel, filtered_other_rels + + +def build_ingestion_query( + node_schema: CartographyNodeSchema, + selected_relationships: Optional[Set[CartographyRelSchema]] = None, +) -> str: """ Generates a Neo4j query from the given CartographyNodeSchema to ingest the specified nodes and relationships so that cartography module authors don't need to handwrite their own queries. :param node_schema: The CartographyNodeSchema object to build a Neo4j query from. + :param selected_relationships: If specified, generates a query that attaches only the relationships in this optional + set of CartographyRelSchema. The RelSchema specified here _must_ be preset in node_schema.sub_resource_relationship + or node_schema.other_relationships. + If None (default), then we create a query using all RelSchema in node_schema.sub_resource_relationship + + node_schema.other_relationships. + If equal to the empty set (set()), we create a query with no relationships at all. :return: An optimized Neo4j query that can be used to ingest nodes and relationships. Important notes: - The resulting query uses the UNWIND + MERGE pattern (see @@ -280,6 +327,12 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: node_props: CartographyNodeProperties = node_schema.properties node_props_as_dict: Dict[str, PropertyRef] = asdict(node_props) + # Handle selected relationships + sub_resource_rel: Optional[CartographyRelSchema] = node_schema.sub_resource_relationship + other_rels: Optional[OtherRelationships] = node_schema.other_relationships + if selected_relationships or selected_relationships == set(): + sub_resource_rel, other_rels = _filter_selected_relationships(node_schema, selected_relationships) + ingest_query = query_template.safe_substitute( node_label=node_schema.label, dict_id_field=node_props.id, @@ -287,9 +340,6 @@ def build_ingestion_query(node_schema: CartographyNodeSchema) -> str: node_props_as_dict, node_schema.extra_node_labels, ), - attach_relationships_statement=_build_attach_relationships_statement( - node_schema.sub_resource_relationship, - node_schema.other_relationships, - ), + attach_relationships_statement=_build_attach_relationships_statement(sub_resource_rel, other_rels), ) return ingest_query diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 8941b39ac..1aac6dfb4 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -14,6 +14,7 @@ from cartography.graph.model import CartographyRelProperties from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection +from cartography.graph.model import make_target_node_matcher from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher from cartography.graph.querybuilder import build_ingestion_query @@ -58,7 +59,7 @@ def get_emr_describe_cluster(boto3_session: boto3.session.Session, region: str, return cluster_details -@dataclass +@dataclass(frozen=True) class EMRClusterNodeProperties(CartographyNodeProperties): arn: PropertyRef = PropertyRef('ClusterArn') auto_terminate: PropertyRef = PropertyRef('AutoTerminate') @@ -85,16 +86,21 @@ class EMRClusterNodeProperties(CartographyNodeProperties): visible_to_all_users: PropertyRef = PropertyRef('VisibleToAllUsers') -@dataclass +@dataclass(frozen=True) class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) -@dataclass +@dataclass(frozen=True) # (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( + # target_node_matcher: TargetNodeMatcher = make_dataclass( + # 'TargetNodeMatcher', + # fields=[('id', PropertyRef, field(default=PropertyRef('AccountId', set_in_kwargs=True)))], + # frozen=True, + # )() + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( {'id': PropertyRef('AccountId', set_in_kwargs=True)}, ) direction: LinkDirection = LinkDirection.INWARD @@ -102,7 +108,7 @@ class EMRClusterToAWSAccount(CartographyRelSchema): properties: EMRClusterToAwsAccountRelProperties = EMRClusterToAwsAccountRelProperties() -@dataclass +@dataclass(frozen=True) class EMRClusterSchema(CartographyNodeSchema): label: str = 'EMRCluster' properties: EMRClusterNodeProperties = EMRClusterNodeProperties() diff --git a/tests/data/graph/querybuilder/sample_models/interesting_asset.py b/tests/data/graph/querybuilder/sample_models/interesting_asset.py index e9a4b033b..93b31a4d9 100644 --- a/tests/data/graph/querybuilder/sample_models/interesting_asset.py +++ b/tests/data/graph/querybuilder/sample_models/interesting_asset.py @@ -7,13 +7,14 @@ from cartography.graph.model import CartographyRelSchema from cartography.graph.model import ExtraNodeLabels from cartography.graph.model import LinkDirection +from cartography.graph.model import make_target_node_matcher from cartography.graph.model import OtherRelationships from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeProperties -@dataclass +@dataclass(frozen=True) class InterestingAssetProperties(CartographyNodeProperties): id: PropertyRef = PropertyRef('Id') lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @@ -21,21 +22,21 @@ class InterestingAssetProperties(CartographyNodeProperties): property2: PropertyRef = PropertyRef('property2') -@dataclass +@dataclass(frozen=True) class InterestingAssetToSubResourceRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) another_rel_field: PropertyRef = PropertyRef('AnotherField') yet_another_rel_field: PropertyRef = PropertyRef("YetAnotherRelField") -@dataclass +@dataclass(frozen=True) class InterestingAssetToSubResourceRel(CartographyRelSchema): """ Define a sub resource relationship (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) """ target_node_label: str = 'SubResource' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, ) direction: LinkDirection = LinkDirection.INWARD @@ -43,43 +44,43 @@ class InterestingAssetToSubResourceRel(CartographyRelSchema): properties: InterestingAssetToSubResourceRelProps = InterestingAssetToSubResourceRelProps() -@dataclass +@dataclass(frozen=True) class InterestingAssetToHelloAssetRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) -@dataclass +@dataclass(frozen=True) class InterestingAssetToHelloAssetRel(CartographyRelSchema): """ Define an additional relationship (:InterestingAsset)-[:ASSOCIATED_WITH]->(:HelloAsset) """ target_node_label: str = 'HelloAsset' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher({'id': PropertyRef('hello_asset_id')}) + target_node_matcher: TargetNodeMatcher = make_target_node_matcher({'id': PropertyRef('hello_asset_id')}) direction: LinkDirection = LinkDirection.OUTWARD rel_label: str = "ASSOCIATED_WITH" properties: InterestingAssetToHelloAssetRelProps = InterestingAssetToHelloAssetRelProps() -@dataclass +@dataclass(frozen=True) class InterestingAssetToWorldAssetRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) -@dataclass +@dataclass(frozen=True) class InterestingAssetToWorldAssetRel(CartographyRelSchema): """ Define yet another relationship. (:InterestingAsset)<-[:CONNECTED]-(:WorldAsset) """ target_node_label: str = 'WorldAsset' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher({'id': PropertyRef('world_asset_id')}) + target_node_matcher: TargetNodeMatcher = make_target_node_matcher({'id': PropertyRef('world_asset_id')}) direction: LinkDirection = LinkDirection.INWARD rel_label: str = "CONNECTED" properties: InterestingAssetToWorldAssetRelProps = InterestingAssetToWorldAssetRelProps() -@dataclass +@dataclass(frozen=True) class InterestingAssetSchema(CartographyNodeSchema): extra_node_labels: Optional[ExtraNodeLabels] = ExtraNodeLabels(['AnotherNodeLabel', 'YetAnotherNodeLabel']) label: str = 'InterestingAsset' diff --git a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py index db5ca5831..6c9bb65ad 100644 --- a/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py +++ b/tests/data/graph/querybuilder/sample_models/multiple_attr_match.py @@ -6,23 +6,24 @@ from cartography.graph.model import CartographyRelProperties from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection +from cartography.graph.model import make_target_node_matcher from cartography.graph.model import OtherRelationships from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher -@dataclass +@dataclass(frozen=True) class TestComputerToPersonRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) -@dataclass +@dataclass(frozen=True) class TestComputerToPersonRel(CartographyRelSchema): """ (:TestComputer)<-[:OWNS]-(:Person) """ target_node_label: str = 'Person' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( { 'first_name': PropertyRef('FirstName'), 'last_name': PropertyRef('LastName'), @@ -34,7 +35,7 @@ class TestComputerToPersonRel(CartographyRelSchema): # Test defining a simple node with no relationships. -@dataclass +@dataclass(frozen=True) class TestComputerProperties(CartographyNodeProperties): id: PropertyRef = PropertyRef('Id') lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @@ -43,7 +44,7 @@ class TestComputerProperties(CartographyNodeProperties): name: PropertyRef = PropertyRef('name') -@dataclass +@dataclass(frozen=True) class TestComputer(CartographyNodeSchema): label: str = 'TestComputer' properties: TestComputerProperties = TestComputerProperties() diff --git a/tests/data/graph/querybuilder/sample_models/simple_node.py b/tests/data/graph/querybuilder/sample_models/simple_node.py index 5ec7d3aec..d5b3cd2f1 100644 --- a/tests/data/graph/querybuilder/sample_models/simple_node.py +++ b/tests/data/graph/querybuilder/sample_models/simple_node.py @@ -5,12 +5,13 @@ from cartography.graph.model import CartographyRelProperties from cartography.graph.model import CartographyRelSchema from cartography.graph.model import LinkDirection +from cartography.graph.model import make_target_node_matcher from cartography.graph.model import PropertyRef from cartography.graph.model import TargetNodeMatcher # Test defining a simple node with no relationships. -@dataclass +@dataclass(frozen=True) class SimpleNodeProperties(CartographyNodeProperties): id: PropertyRef = PropertyRef('Id') lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) @@ -18,22 +19,22 @@ class SimpleNodeProperties(CartographyNodeProperties): property2: PropertyRef = PropertyRef('property2') -@dataclass +@dataclass(frozen=True) class SimpleNodeSchema(CartographyNodeSchema): label: str = 'SimpleNode' properties: SimpleNodeProperties = SimpleNodeProperties() # Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) -@dataclass +@dataclass(frozen=True) class SimpleNodeToSubResourceRelProps(CartographyRelProperties): lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) -@dataclass +@dataclass(frozen=True) class SimpleNodeToSubResourceRel(CartographyRelSchema): target_node_label: str = 'SubResource' - target_node_matcher: TargetNodeMatcher = TargetNodeMatcher( + target_node_matcher: TargetNodeMatcher = make_target_node_matcher( {'id': PropertyRef('sub_resource_id', set_in_kwargs=True)}, ) direction: LinkDirection = LinkDirection.INWARD @@ -41,7 +42,7 @@ class SimpleNodeToSubResourceRel(CartographyRelSchema): properties: SimpleNodeToSubResourceRelProps = SimpleNodeToSubResourceRelProps() -@dataclass +@dataclass(frozen=True) class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): label: str = 'SimpleNode' properties: SimpleNodeProperties = SimpleNodeProperties() diff --git a/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py new file mode 100644 index 000000000..1c22ca76f --- /dev/null +++ b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py @@ -0,0 +1,109 @@ +from cartography.client.core.tx import load_graph_data +from cartography.graph.querybuilder import build_ingestion_query +from tests.data.graph.querybuilder.sample_data.partial_relationships import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS +from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_SUB_RESOURCE_QUERY +from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_WORLD_ASSET_QUERY +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToSubResourceRel +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToWorldAssetRel + + +def test_load_graph_extra_node_labels_and_no_relationships(neo4j_session): + """ + Test that multiple labels defined on a CartographyNodeSchema are properly recorded to the graph. + """ + # Act + query = build_ingestion_query(InterestingAssetSchema(), set()) + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + lastupdated=1, + ) + + # Assert that the labels exist + expected = { + 'AnotherNodeLabel', + 'InterestingAsset', + 'YetAnotherNodeLabel', + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset) RETURN labels(n1) AS labels; + """, + ) + actual = {label for label in result.data()[0]['labels']} + assert actual == expected + + +def test_load_graph_data_with_sub_rel_selected(neo4j_session): + """ + TODO comments + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_WORLD_ASSET_QUERY) + + # Act + query = build_ingestion_query( + InterestingAssetSchema(), selected_relationships={ + InterestingAssetToSubResourceRel(), + }, + ) + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + lastupdated=1, + sub_resource_id='sub-resource-id', + ) + + # Assert that the InterestingAsset to SubResource relationship exists + expected = { + ('interesting-node-id', 'sub-resource-id'), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected + + +def test_load_graph_data_with_worldasset_rel_selected(neo4j_session): + """ + TODO comments + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_WORLD_ASSET_QUERY) + + # Act + query = build_ingestion_query( + InterestingAssetSchema(), selected_relationships={ + InterestingAssetToWorldAssetRel(), + }, + ) + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + lastupdated=1, + ) + + # Assert that the InterestingAsset to SubResource relationship exists + expected = { + ('interesting-node-id', 'the-worldasset-id-1'), + } + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; + """, + ) + actual = { + (r['n1.id'], r['n2.id']) for r in result + } + assert actual == expected From 5dd60a2436bc4bf75b80c459cf7f43fc35c4397b Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 14 Dec 2022 09:51:04 -0800 Subject: [PATCH 23/27] Docstrings --- cartography/graph/model.py | 14 +++++++------- cartography/graph/querybuilder.py | 14 ++++++++++++-- .../test_querybuilder_labels_and_var_num_rels.py | 3 ++- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 84746523b..01ac13b5d 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -226,7 +226,7 @@ def other_relationships(self) -> Optional[OtherRelationships]: """ Optional. Allows subclasses to specify additional cartography relationships on the node. - :return: None if not overriden. Else return an OtherRelationships object. + :return: None if not overriden. Else return the node's OtherRelationships. """ return None @@ -235,15 +235,15 @@ def extra_node_labels(self) -> Optional[ExtraNodeLabels]: """ Optional. Allows specifying extra labels on the node. - :return: None if not overriden. Else return a str list of the extra labels specified on the node. + :return: None if not overriden. Else return the ExtraNodeLabels specified on the node. """ return None def make_target_node_matcher(key_ref_dict: Dict[str, PropertyRef]) -> TargetNodeMatcher: + """ + :param key_ref_dict: A Dict mapping keys present on the node to PropertyRef objects. + :return: A TargetNodeMatcher used for CartographyRelSchema to match with other nodes. + """ fields = [(key, PropertyRef, field(default=prop_ref)) for key, prop_ref in key_ref_dict.items()] - return make_dataclass( - TargetNodeMatcher.__name__, - fields, - frozen=True, - )() + return make_dataclass(TargetNodeMatcher.__name__, fields, frozen=True)() diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index f0775add6..bcf0c4b46 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -101,6 +101,8 @@ def _build_rel_properties_statement(rel_var: str, rel_property_map: Optional[Dic def _build_match_clause(matcher: TargetNodeMatcher) -> str: """ Generate a Neo4j match statement on one or more keys and values for a given node. + :param matcher: A TargetNodeMatcher object + :return: a Neo4j match clause """ match = Template("$Key: $PropRef") matcher_asdict = asdict(matcher) @@ -263,10 +265,18 @@ def _filter_selected_relationships( node_schema: CartographyNodeSchema, selected_relationships: Set[CartographyRelSchema], ) -> Tuple[Optional[CartographyRelSchema], Optional[OtherRelationships]]: - # Empty set means no relationships are selected + """ + Ensures that selected relationships specified to build_ingestion_query() are actually present on the node_schema. + :param node_schema: The node schema object to filter relationships against + :param selected_relationships: The set of relationships to check if they exist in the node schema. If empty set, + this means that no relationships have been selected. None is not an accepted value here. + :return: a tuple of the (sub resource relationship, OtherRelationships that have not been filtered out). + """ + # The empty set means no relationships are selected if selected_relationships == set(): return None, None + # Collect the node's sub resource rel and OtherRelationships together in one set for easy comparison all_rels_on_node = {node_schema.sub_resource_relationship} if node_schema.other_relationships: for rel in node_schema.other_relationships.rels: @@ -300,7 +310,7 @@ def build_ingestion_query( cartography module authors don't need to handwrite their own queries. :param node_schema: The CartographyNodeSchema object to build a Neo4j query from. :param selected_relationships: If specified, generates a query that attaches only the relationships in this optional - set of CartographyRelSchema. The RelSchema specified here _must_ be preset in node_schema.sub_resource_relationship + set of CartographyRelSchema. The RelSchema specified here _must_ be present in node_schema.sub_resource_relationship or node_schema.other_relationships. If None (default), then we create a query using all RelSchema in node_schema.sub_resource_relationship + node_schema.other_relationships. diff --git a/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py index 1c22ca76f..cf6fe8560 100644 --- a/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py +++ b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py @@ -83,7 +83,8 @@ def test_load_graph_data_with_worldasset_rel_selected(neo4j_session): # Act query = build_ingestion_query( - InterestingAssetSchema(), selected_relationships={ + InterestingAssetSchema(), + selected_relationships={ InterestingAssetToWorldAssetRel(), }, ) From f6fea7595cec4a504f542df4ce08094cf7ae9653 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 14 Dec 2022 21:14:17 -0800 Subject: [PATCH 24/27] Comments in tests --- .../sample_data/helloworld_relationships.py | 42 ++++++ .../sample_data/partial_relationships.py | 22 ---- ...st_querybuilder_labels_and_var_num_rels.py | 122 +++++++++++++----- .../graph/test_querybuilder_rel_subsets.py | 10 +- 4 files changed, 137 insertions(+), 59 deletions(-) create mode 100644 tests/data/graph/querybuilder/sample_data/helloworld_relationships.py delete mode 100644 tests/data/graph/querybuilder/sample_data/partial_relationships.py diff --git a/tests/data/graph/querybuilder/sample_data/helloworld_relationships.py b/tests/data/graph/querybuilder/sample_data/helloworld_relationships.py new file mode 100644 index 000000000..4eaf902e8 --- /dev/null +++ b/tests/data/graph/querybuilder/sample_data/helloworld_relationships.py @@ -0,0 +1,42 @@ +MERGE_SUB_RESOURCE_QUERY = """ +MERGE (s:SubResource{id: "sub-resource-id"}) +ON CREATE SET s.lastupdated = 1 +""" + + +MERGE_HELLO_ASSET_QUERY = """ +MERGE (h:HelloAsset{id: "the-helloasset-id-1"}) +ON CREATE SET h.lastupdated = 1 +""" + + +MERGE_WORLD_ASSET_QUERY = """ +MERGE (w:WorldAsset{id: "the-worldasset-id-1"}) +ON CREATE SET w.lastupdated = 1 +""" + + +# This dataset shows an InterestingNode attached to a WorldAsset but no other relationships. +INTERESTING_NODE_WITH_PARTIAL_RELS = [ + { + 'Id': 'interesting-node-id', + 'property1': 'b', + 'property2': 'c', + 'AnotherField': 'd', + 'YetAnotherRelField': 'e', + 'world_asset_id': 'the-worldasset-id-1', + }, +] + +# This dataset shows an InterestingNode attached to a HelloAsset and a WorldAsset. +INTERESTING_NODE_WITH_ALL_RELS = [ + { + 'Id': 'interesting-node-id', + 'property1': 'b', + 'property2': 'c', + 'AnotherField': 'd', + 'YetAnotherRelField': 'e', + 'world_asset_id': 'the-worldasset-id-1', + 'hello_asset_id': 'the-helloasset_id-1', + }, +] diff --git a/tests/data/graph/querybuilder/sample_data/partial_relationships.py b/tests/data/graph/querybuilder/sample_data/partial_relationships.py deleted file mode 100644 index 03b922c4a..000000000 --- a/tests/data/graph/querybuilder/sample_data/partial_relationships.py +++ /dev/null @@ -1,22 +0,0 @@ -MERGE_SUB_RESOURCE_QUERY = """ -MERGE (s:SubResource{id: "sub-resource-id"}) -ON CREATE SET s.lastupdated = 1 -""" - -MERGE_WORLD_ASSET_QUERY = """ -MERGE (w:WorldAsset{id: "the-worldasset-id-1"}) -ON CREATE SET w.lastupdated = 1 -""" - - -# This dataset shows an InterestingNode attached to a WorldAsset but no other relationships. -INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS = [ - { - 'Id': 'interesting-node-id', - 'property1': 'b', - 'property2': 'c', - 'AnotherField': 'd', - 'YetAnotherRelField': 'e', - 'world_asset_id': 'the-worldasset-id-1', - }, -] diff --git a/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py index cf6fe8560..aa9d3c359 100644 --- a/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py +++ b/tests/integration/cartography/graph/test_querybuilder_labels_and_var_num_rels.py @@ -1,32 +1,35 @@ from cartography.client.core.tx import load_graph_data from cartography.graph.querybuilder import build_ingestion_query -from tests.data.graph.querybuilder.sample_data.partial_relationships import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS -from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_SUB_RESOURCE_QUERY -from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_WORLD_ASSET_QUERY +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import INTERESTING_NODE_WITH_ALL_RELS +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import MERGE_HELLO_ASSET_QUERY +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import MERGE_SUB_RESOURCE_QUERY +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import MERGE_WORLD_ASSET_QUERY from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToSubResourceRel from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToWorldAssetRel -def test_load_graph_extra_node_labels_and_no_relationships(neo4j_session): +def test_load_graph_data_extra_node_labels_and_no_relationships(neo4j_session): """ - Test that multiple labels defined on a CartographyNodeSchema are properly recorded to the graph. + Test that + - multiple labels defined on a CartographyNodeSchema are properly recorded to the graph. + - we are able to generate a query that includes no relationships in build_ingestion_query()'s + `selected_relationships` parameter. """ - # Act - query = build_ingestion_query(InterestingAssetSchema(), set()) + # Act: specify the empty set as selected_relationships to build_ingestion_query(). + query = build_ingestion_query(InterestingAssetSchema(), selected_relationships=set()) + + # Act: call `load_graph_data()` without specifying `sub_resource` or any other kwargs that were present on + # InterestingAsset's attached RelSchema. load_graph_data( neo4j_session, query, - INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + INTERESTING_NODE_WITH_ALL_RELS, lastupdated=1, ) # Assert that the labels exist - expected = { - 'AnotherNodeLabel', - 'InterestingAsset', - 'YetAnotherNodeLabel', - } + expected = {'AnotherNodeLabel', 'InterestingAsset', 'YetAnotherNodeLabel'} result = neo4j_session.run( """ MATCH (n1:InterestingAsset) RETURN labels(n1) AS labels; @@ -38,73 +41,128 @@ def test_load_graph_extra_node_labels_and_no_relationships(neo4j_session): def test_load_graph_data_with_sub_rel_selected(neo4j_session): """ - TODO comments + Test generating and running a query that includes only InterestingAssetSchema.sub_resource_relationship in + build_ingestion_query()'s selected_relationships parameter. """ # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_HELLO_ASSET_QUERY) neo4j_session.run(MERGE_WORLD_ASSET_QUERY) # Act query = build_ingestion_query( - InterestingAssetSchema(), selected_relationships={ + InterestingAssetSchema(), + selected_relationships={ InterestingAssetToSubResourceRel(), }, ) load_graph_data( neo4j_session, query, - INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + INTERESTING_NODE_WITH_ALL_RELS, lastupdated=1, sub_resource_id='sub-resource-id', ) - # Assert that the InterestingAsset to SubResource relationship exists - expected = { - ('interesting-node-id', 'sub-resource-id'), - } + # Assert that the InterestingAsset to SubResource relationship exists. + expected = {('interesting-node-id', 'sub-resource-id')} result = neo4j_session.run( """ MATCH (n1:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; """, ) - actual = { - (r['n1.id'], r['n2.id']) for r in result - } + actual = {(r['n1.id'], r['n2.id']) for r in result} assert actual == expected def test_load_graph_data_with_worldasset_rel_selected(neo4j_session): """ - TODO comments + Test generating and running a query that specifies only 1 of 2 of the rels in + InterestingAssetSchema.other_relationships to build_ingestion_query()'s selected_relationships parameter. + """ + # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph + neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_HELLO_ASSET_QUERY) + neo4j_session.run(MERGE_WORLD_ASSET_QUERY) + + # Act + query = build_ingestion_query( + InterestingAssetSchema(), + selected_relationships={ + InterestingAssetToWorldAssetRel(), + }, + ) + load_graph_data( + neo4j_session, + query, + INTERESTING_NODE_WITH_ALL_RELS, + lastupdated=1, + ) + + # Assert that the InterestingAsset to WorldAsset relationship exists + expected = {('interesting-node-id', 'the-worldasset-id-1')} + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; + """, + ) + actual = {(r['n1.id'], r['n2.id']) for r in result} + assert actual == expected + + +def test_load_graph_data_with_sub_resource_and_worldasset_rel_selected(neo4j_session): + """ + Test generating and running a query that includes InterestingAssetSchema.sub_resource_relationship + only 1 of 2 of + the rels in InterestingAssetSchema.other_relationships to build_ingestion_query()'s selected_relationships + parameter. """ # Arrange: add (:SubResource{id:sub-resource-id}) and (:WorldAsset{id: world-asset-id}) to the test graph neo4j_session.run(MERGE_SUB_RESOURCE_QUERY) + neo4j_session.run(MERGE_HELLO_ASSET_QUERY) neo4j_session.run(MERGE_WORLD_ASSET_QUERY) # Act query = build_ingestion_query( InterestingAssetSchema(), selected_relationships={ + InterestingAssetToSubResourceRel(), InterestingAssetToWorldAssetRel(), }, ) load_graph_data( neo4j_session, query, - INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + INTERESTING_NODE_WITH_ALL_RELS, lastupdated=1, + sub_resource_id='sub-resource-id', ) - # Assert that the InterestingAsset to SubResource relationship exists - expected = { - ('interesting-node-id', 'the-worldasset-id-1'), - } + # Assert that the InterestingAsset to WorldAsset relationship exists + expected = {('interesting-node-id', 'the-worldasset-id-1')} result = neo4j_session.run( """ MATCH (n1:InterestingAsset)<-[:CONNECTED]-(n2:WorldAsset) RETURN n1.id, n2.id; """, ) - actual = { - (r['n1.id'], r['n2.id']) for r in result - } + actual = {(r['n1.id'], r['n2.id']) for r in result} + assert actual == expected + + # Assert that the InterestingAsset to SubResource relationship exists. + expected = {('interesting-node-id', 'sub-resource-id')} + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(n2:SubResource) RETURN n1.id, n2.id; + """, + ) + actual = {(r['n1.id'], r['n2.id']) for r in result} + assert actual == expected + + # Assert that the InterestingAsset to Hello relationships does NOT exist. + expected = set() + result = neo4j_session.run( + """ + MATCH (n1:InterestingAsset)-[:ASSOCIATED_WITH]->(n2:HelloAsset) RETURN n1.id, n2.id; + """, + ) + actual = {(r['n1.id'], r['n2.id']) for r in result} assert actual == expected diff --git a/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py index 9f824b1b3..5fa3b1ddc 100644 --- a/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py +++ b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py @@ -1,8 +1,8 @@ from cartography.client.core.tx import load_graph_data from cartography.graph.querybuilder import build_ingestion_query -from tests.data.graph.querybuilder.sample_data.partial_relationships import INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS -from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_SUB_RESOURCE_QUERY -from tests.data.graph.querybuilder.sample_data.partial_relationships import MERGE_WORLD_ASSET_QUERY +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import INTERESTING_NODE_WITH_PARTIAL_RELS +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import MERGE_SUB_RESOURCE_QUERY +from tests.data.graph.querybuilder.sample_data.helloworld_relationships import MERGE_WORLD_ASSET_QUERY from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetSchema @@ -25,7 +25,7 @@ def test_load_graph_data_subset_of_relationships(neo4j_session): load_graph_data( neo4j_session, query, - INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + INTERESTING_NODE_WITH_PARTIAL_RELS, lastupdated=1, sub_resource_id='sub-resource-id', ) @@ -88,7 +88,7 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session load_graph_data( neo4j_session, query, - INTERESTING_NODE_WITH_PARTIAL_RELATIONSHIPS, + INTERESTING_NODE_WITH_PARTIAL_RELS, lastupdated=1, sub_resource_id='sub-resource-id', ) From fea03084434c76343e0a4cbc94783a12ff811cd6 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 14 Dec 2022 21:59:19 -0800 Subject: [PATCH 25/27] Better comments --- cartography/graph/model.py | 34 +++++++++++-------- cartography/graph/querybuilder.py | 7 ++-- cartography/intel/aws/emr.py | 5 --- .../graph/test_querybuilder_rel_subsets.py | 7 ++-- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/cartography/graph/model.py b/cartography/graph/model.py index 01ac13b5d..c222cf1c8 100644 --- a/cartography/graph/model.py +++ b/cartography/graph/model.py @@ -40,7 +40,7 @@ class PropertyRef: cartography takes lists of Python dicts and loads them to Neo4j. PropertyRefs allow our dynamically generated Neo4j ingestion queries to set values for a given node or relationship property from (A) a field on the dict being - processed (PropertyRef.set_in_kwargs=False, default), or (B) from a single variable provided by a keyword argument + processed (PropertyRef.set_in_kwargs=False; default), or (B) from a single variable provided by a keyword argument (PropertyRef.set_in_kwargs=True). """ @@ -60,9 +60,16 @@ def _parameterize_name(self) -> str: def __repr__(self) -> str: """ - By default, the querybuilder will render an UNWIND query so that - the value for this property will come from the dict being processed. - If set_in_kwargs is True, then the value will instead come from kwargs. + `querybuilder.build_ingestion_query()`, generates a Neo4j batched ingestion query of the form + `UNWIND $DictList AS item [...]`. + + If set_in_kwargs is False (default), we instruct the querybuilder to get the value for this given property from + the dict being processed. To do this, this function returns "item.". This is used for loading + in lists of nodes. + + On the other hand if set_in_kwargs is True, then the value will instead come from kwargs passed to + querybuilder.build_ingestion_query(). This is used for things like applying the same update tag to all nodes of + a given run. """ return f"item.{self.name}" if not self.set_in_kwargs else self._parameterize_name() @@ -70,16 +77,16 @@ def __repr__(self) -> str: @dataclass(frozen=True) class CartographyNodeProperties(abc.ABC): """ - Abstract base dataclass that represents the properties on a CartographyNodeSchema. This is intended to enforce that - all subclasses will have an id and a lastupdated field defined on their resulting nodes. + Abstract base dataclass that represents the properties on a CartographyNodeSchema. This class is abstract so that we + can enforce that all subclasses have an id and a lastupdated field. """ id: PropertyRef = field(init=False) lastupdated: PropertyRef = field(init=False) def __post_init__(self): """ - Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a - dataclass. + Designed to prevent direct instantiation. This workaround is needed since this is a dataclass and an abstract + class without an abstract method defined. See https://stackoverflow.com/q/60590442. """ if self.__class__ == CartographyNodeProperties: @@ -96,8 +103,8 @@ class CartographyRelProperties(abc.ABC): def __post_init__(self): """ - Designed to prevent direct instantiation. This workaround is needed since this is both an abstract class and a - dataclass. + Designed to prevent direct instantiation. This workaround is needed since this is a dataclass and an abstract + class without an abstract method defined. """ if self.__class__ == CartographyRelProperties: raise TypeError("Cannot instantiate abstract class.") @@ -110,11 +117,10 @@ class TargetNodeMatcher: Keys: one or more attribute names on the target_node_label used to uniquely identify what node to connect to. Values: The value of the target_node_key used to uniquely identify what node to connect to. This is given as a PropertyRef. - This is needed because we need to include this in the CartographyRelSchema dataclass but dicts are mutable, so we - need to do this wrapping. + This is used to ensure dataclass immutability when composed as part of a CartographyNodeSchema object. + See `make_target_node_matcher()`. """ pass - # key_refs: Dict[str, PropertyRef] @dataclass(frozen=True) @@ -153,7 +159,7 @@ def target_node_matcher(self) -> TargetNodeMatcher: @abc.abstractmethod def rel_label(self) -> str: """ - :return: The str label of the relationship. + :return: The string label of the relationship. """ pass diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index bcf0c4b46..7cf65bad0 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -312,9 +312,9 @@ def build_ingestion_query( :param selected_relationships: If specified, generates a query that attaches only the relationships in this optional set of CartographyRelSchema. The RelSchema specified here _must_ be present in node_schema.sub_resource_relationship or node_schema.other_relationships. - If None (default), then we create a query using all RelSchema in node_schema.sub_resource_relationship + - node_schema.other_relationships. - If equal to the empty set (set()), we create a query with no relationships at all. + If selected_relationships is None (default), then we create a query using all RelSchema specified in + node_schema.sub_resource_relationship + node_schema.other_relationships. + If selected_relationships is the empty set, we create a query with no relationship attachments at all. :return: An optimized Neo4j query that can be used to ingest nodes and relationships. Important notes: - The resulting query uses the UNWIND + MERGE pattern (see @@ -322,6 +322,7 @@ def build_ingestion_query( load the data for speed. - The query assumes that a list of dicts will be passed to it through parameter $DictList. - The query sets `firstseen` attributes on all the nodes and relationships that it creates. + - The query is intended to be supplied as input to cartography.core.client.tx.load_graph_data(). """ query_template = Template( """ diff --git a/cartography/intel/aws/emr.py b/cartography/intel/aws/emr.py index 1aac6dfb4..4220686bc 100644 --- a/cartography/intel/aws/emr.py +++ b/cartography/intel/aws/emr.py @@ -95,11 +95,6 @@ class EMRClusterToAwsAccountRelProperties(CartographyRelProperties): # (:EMRCluster)<-[:RESOURCE]-(:AWSAccount) class EMRClusterToAWSAccount(CartographyRelSchema): target_node_label: str = 'AWSAccount' - # target_node_matcher: TargetNodeMatcher = make_dataclass( - # 'TargetNodeMatcher', - # fields=[('id', PropertyRef, field(default=PropertyRef('AccountId', set_in_kwargs=True)))], - # frozen=True, - # )() target_node_matcher: TargetNodeMatcher = make_target_node_matcher( {'id': PropertyRef('AccountId', set_in_kwargs=True)}, ) diff --git a/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py index 5fa3b1ddc..d3dbdd663 100644 --- a/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py +++ b/tests/integration/cartography/graph/test_querybuilder_rel_subsets.py @@ -8,7 +8,8 @@ def test_load_graph_data_subset_of_relationships(neo4j_session): """ - Test load_graph_data() if a schema defines multiple relationships but only a subset of them are present in our data. + Test load_graph_data() if a schema defines multiple relationships but only a subset of them are possible to create + given our data. In this test case, the following relationships are possible: (:InterestingAsset)<-[:RELATIONSHIP_LABEL]-(:SubResource) @@ -51,7 +52,7 @@ def test_load_graph_data_subset_of_relationships(neo4j_session): result = neo4j_session.run( """ MATCH (n1:InterestingAsset) - OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) + OPTIONAL MATCH (n1)--(n2:HelloAsset) RETURN n1.id, n2.id; """, ) @@ -114,7 +115,7 @@ def test_load_graph_data_subset_of_relationships_only_sub_resource(neo4j_session result = neo4j_session.run( """ MATCH (n1:InterestingAsset) - OPTIONAL MATCH (n1)<-[:ASSOCIATED_WITH]-(n2:HelloAsset) + OPTIONAL MATCH (n1)--(n2:HelloAsset) RETURN n1.id, n2.id; """, ) From 7b3a2687ef4e913a22c577a0e9396dc52ee7ddd1 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Wed, 14 Dec 2022 23:04:53 -0800 Subject: [PATCH 26/27] Test for exception conditions --- cartography/graph/querybuilder.py | 52 +++++++----- setup.cfg | 4 + .../querybuilder/sample_models/simple_node.py | 2 +- ...st_querybuilder_build_attach_links_excs.py | 80 +++++++++++++++++++ .../test_querybuilder_filter_selected_rels.py | 17 ++++ 5 files changed, 132 insertions(+), 23 deletions(-) create mode 100644 tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py create mode 100644 tests/unit/cartography/graph/test_querybuilder_filter_selected_rels.py diff --git a/cartography/graph/querybuilder.py b/cartography/graph/querybuilder.py index 7cf65bad0..0e8032832 100644 --- a/cartography/graph/querybuilder.py +++ b/cartography/graph/querybuilder.py @@ -109,6 +109,25 @@ def _build_match_clause(matcher: TargetNodeMatcher) -> str: return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher_asdict.items()) +def _asdict_with_validate_relprops(link: CartographyRelSchema) -> Dict[str, PropertyRef]: + """ + Give a helpful error message when forgetting to put `()` when instantiating a CartographyRelSchema, as this + isn't always caught by IDEs. + """ + try: + rel_props_as_dict: Dict[str, PropertyRef] = asdict(link.properties) + except TypeError as e: + if e.args and e.args[0] and e.args == 'asdict() should be called on dataclass instances': + logger.error( + f'TypeError thrown when trying to draw relation "{link.rel_label}" to a "{link.target_node_label}" ' + f'node. Please make sure that you did not forget to write `()` when specifying `properties` in the' + f'dataclass. ' + f'For example, do `properties: RelProp = RelProp()`; NOT `properties: RelProp = RelProp`.', + ) + raise + return rel_props_as_dict + + def _build_attach_sub_resource_statement(sub_resource_link: Optional[CartographyRelSchema] = None) -> str: """ Generates a Neo4j statement to attach a sub resource to a node. A 'sub resource' is a term we made up to describe @@ -143,7 +162,7 @@ def _build_attach_sub_resource_statement(sub_resource_link: Optional[Cartography rel_merge_clause = rel_merge_template.safe_substitute(SubResourceRelLabel=sub_resource_link.rel_label) - rel_props_as_dict: Dict[str, PropertyRef] = asdict(sub_resource_link.properties) + rel_props_as_dict: Dict[str, PropertyRef] = _asdict_with_validate_relprops(sub_resource_link) attach_sub_resource_statement = sub_resource_attach_template.safe_substitute( SubResourceLabel=sub_resource_link.target_node_label, @@ -197,19 +216,7 @@ def _build_attach_additional_links_statement( node_var=node_var, ) - # Give a helpful error message when forgetting to put `()` when instantiating a CartographyRelSchema, as this - # isn't always caught by IDEs like PyCharm. - try: - rel_props_as_dict: Dict[str, PropertyRef] = asdict(link.properties) - except TypeError as e: - if e.args and e.args[0] and e.args == 'asdict() should be called on dataclass instances': - logger.error( - f'TypeError thrown when trying to draw relation "{link.rel_label}" to a "{link.target_node_label}" ' - f'node. Please make sure that you did not forget to write `()` when specifying `properties` in the' - f'dataclass. ' - f'For example, do `properties: RelProp = RelProp()`; NOT `properties: RelProp = RelProp`.', - ) - raise + rel_props_as_dict = _asdict_with_validate_relprops(link) additional_ref = additional_links_template.safe_substitute( AddlLabel=link.target_node_label, @@ -230,12 +237,11 @@ def _build_attach_relationships_statement( ) -> str: """ Use Neo4j subqueries to attach sub resource and/or other relationships. - Subqueries allow the query to continue to run even if we only have data for some but not all of the - relationships defined by a schema. - For example, if an EC2Instance has attachments to NetworkInterfaces and AWSAccounts but our data - only includes EC2Instance to AWSAccount information, structuring the ingestion query with sub- - queries allows us to build a query that will ignore the null relationships and build the ones that - exist. + Subqueries allow the query to continue to run even if we only have data for some but not all the relationships + defined by a schema. + For example, if an EC2Instance has attachments to NetworkInterfaces and AWSAccounts, but our data only includes + EC2Instance to AWSAccount information, structuring the ingestion query with subqueries allows us to build a query + that will ignore the null relationships and continue to MERGE the ones that exist. """ if not sub_resource_relationship and not other_relationships: return "" @@ -266,11 +272,13 @@ def _filter_selected_relationships( selected_relationships: Set[CartographyRelSchema], ) -> Tuple[Optional[CartographyRelSchema], Optional[OtherRelationships]]: """ - Ensures that selected relationships specified to build_ingestion_query() are actually present on the node_schema. + Ensures that selected relationships specified to build_ingestion_query() are actually present on + node_schema.sub_resource_relationship and node_schema.other_relationships. :param node_schema: The node schema object to filter relationships against :param selected_relationships: The set of relationships to check if they exist in the node schema. If empty set, this means that no relationships have been selected. None is not an accepted value here. - :return: a tuple of the (sub resource relationship, OtherRelationships that have not been filtered out). + :return: a tuple of the (sub resource rel [if present in selected_relationships], an OtherRelationships object + containing all values of node_schema.other_relationships that are present in selected_relationships) """ # The empty set means no relationships are selected if selected_relationships == set(): diff --git a/setup.cfg b/setup.cfg index b364e22b4..312318941 100644 --- a/setup.cfg +++ b/setup.cfg @@ -63,5 +63,9 @@ ignore_errors = true disallow_untyped_defs = false allow_redefinition = true +# Intentional TypeErrors are here because we are testing if the code gives a helpful error message to the module author. +[mypy-tests.unit.cartography.graph.test_querybuilder_build_attach_links_excs] +ignore_errors = true + [coverage:report] fail_under = 30 diff --git a/tests/data/graph/querybuilder/sample_models/simple_node.py b/tests/data/graph/querybuilder/sample_models/simple_node.py index d5b3cd2f1..7c8a12278 100644 --- a/tests/data/graph/querybuilder/sample_models/simple_node.py +++ b/tests/data/graph/querybuilder/sample_models/simple_node.py @@ -46,4 +46,4 @@ class SimpleNodeToSubResourceRel(CartographyRelSchema): class SimpleNodeWithSubResourceSchema(CartographyNodeSchema): label: str = 'SimpleNode' properties: SimpleNodeProperties = SimpleNodeProperties() - sub_resource_relationship: CartographyRelSchema = SimpleNodeToSubResourceRel() + sub_resource_relationship: SimpleNodeToSubResourceRel = SimpleNodeToSubResourceRel() diff --git a/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py b/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py new file mode 100644 index 000000000..165acb71a --- /dev/null +++ b/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py @@ -0,0 +1,80 @@ +# Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) +from dataclasses import dataclass + +from pytest import raises + +from cartography.graph.model import CartographyNodeProperties +from cartography.graph.model import CartographyNodeSchema +from cartography.graph.model import CartographyRelProperties +from cartography.graph.model import CartographyRelSchema +from cartography.graph.model import LinkDirection +from cartography.graph.model import make_target_node_matcher +from cartography.graph.model import OtherRelationships +from cartography.graph.model import PropertyRef +from cartography.graph.model import TargetNodeMatcher +from cartography.graph.querybuilder import _build_attach_additional_links_statement +from cartography.graph.querybuilder import _build_attach_sub_resource_statement + + +@dataclass(frozen=True) +class MyNodeToBillingUnitRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class MyNodeToBillingUnitRel(CartographyRelSchema): + target_node_label: str = 'BillingUnit' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher({'id': PropertyRef('billing_unit_id')}) + direction: LinkDirection = LinkDirection.OUTWARD + rel_label: str = "BILLING_UNIT" + # This is intentionally missing "()" at the end. This will raise an exception! + properties: MyNodeToBillingUnitRelProps = MyNodeToBillingUnitRelProps + + +@dataclass(frozen=True) +class MyNodeToOtherNodeRelProps(CartographyRelProperties): + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class MyNodeToOtherNodeRel(CartographyRelSchema): + target_node_label: str = 'OtherNode' + target_node_matcher: TargetNodeMatcher = make_target_node_matcher({'id': PropertyRef('other_node_id')}) + direction: LinkDirection = LinkDirection.INWARD + rel_label: str = "REL_LABEL_GOES_HERE" + # This is intentionally missing "()" at the end. This will raise an exception! + properties: MyNodeToOtherNodeRelProps = MyNodeToOtherNodeRelProps + + +@dataclass(frozen=True) +class MyNodeProperties(CartographyNodeProperties): + id: PropertyRef = PropertyRef('Id') + lastupdated: PropertyRef = PropertyRef('lastupdated', set_in_kwargs=True) + + +@dataclass(frozen=True) +class MyNodeSchema(CartographyNodeSchema): + label: str = 'MyNode' + properties: MyNodeProperties = MyNodeProperties() + sub_resource_relationship: CartographyRelSchema = MyNodeToBillingUnitRel() + other_relationships: OtherRelationships = OtherRelationships([MyNodeToOtherNodeRel()]) + + +def test_build_attach_addl_links_raises_typeerror(): + """ + _build_attach_additional_links_statement calls asdict() on each rel in node_schema.other_relationships. If the + module author forgot to put `()` at the end of each RelSchema, Python will treat it as a "type" and not a + dataclass, so asdict() will fail with a typeerror. + This test ensures that we raise a helpful error message for this situation, because IDEs don't always catch this + mistake. + """ + with raises(TypeError): + _ = _build_attach_additional_links_statement(MyNodeSchema().other_relationships) + + +def test_build_attach_sub_resource_stmt_raises_typeerror(): + """ + Same test logic as test_build_attach_addl_links_raises_typeerror above but for _build_attach_sub_resource_statement. + """ + with raises(TypeError): + _ = _build_attach_sub_resource_statement(MyNodeSchema().sub_resource_relationship) diff --git a/tests/unit/cartography/graph/test_querybuilder_filter_selected_rels.py b/tests/unit/cartography/graph/test_querybuilder_filter_selected_rels.py new file mode 100644 index 000000000..a76d497af --- /dev/null +++ b/tests/unit/cartography/graph/test_querybuilder_filter_selected_rels.py @@ -0,0 +1,17 @@ +from pytest import raises + +from cartography.graph.querybuilder import _filter_selected_relationships +from tests.data.graph.querybuilder.sample_models.interesting_asset import InterestingAssetToSubResourceRel +from tests.data.graph.querybuilder.sample_models.simple_node import SimpleNodeSchema + + +def test_filter_selected_rels_raises_value_err(): + """ + Specify a RelSchema that is not present on a given NodeSchema -> expect exception + """ + # Act and assert + with raises(ValueError): + _, _ = _filter_selected_relationships( + SimpleNodeSchema(), + selected_relationships={InterestingAssetToSubResourceRel()}, + ) From ab0af7b448dcb772c7be497bcb510ce8513c9069 Mon Sep 17 00:00:00 2001 From: Alex Chantavy Date: Thu, 15 Dec 2022 22:25:20 -0800 Subject: [PATCH 27/27] Remove irrelevant comment --- .../graph/test_querybuilder_build_attach_links_excs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py b/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py index 165acb71a..24c1e12ca 100644 --- a/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py +++ b/tests/unit/cartography/graph/test_querybuilder_build_attach_links_excs.py @@ -1,4 +1,3 @@ -# Test defining a simple node with a sub resource rel: (:SimpleNode)<-[:RESOURCE]-(:SubResource) from dataclasses import dataclass from pytest import raises