Skip to content

Commit

Permalink
close #537 by creating an importer for the new format (#538)
Browse files Browse the repository at this point in the history
* close #537 by creating an importer for the new format

* import from export format

* web handler for importing CREs and standards

* move in memory graph to its own module, add to database module methods for path retrieval and hierarchy retrieval

* remove irrelevant methods from export format, add a check for duplicate links in add_link

* add support for exporting in the new export format

* test for getting cre hierarchy

* add tests for import/export of new format

* add support for new format in main

* web main changes to support new import/export formats

* fix db cre hierarchy retrieval, detect non existent cre

* rm deprecated recursive retrieval tests  from cres, CREs are 1 level deep only

* add test and fix for single cre retrieval

* make write_csv pickup all possible keys from all dicts as a header, add support for new export format to main

* make the memory graph self-initialize, move functionality of get_standard_by_db_id to get_nodes

* update cre exporting method to support new export format not being an enum
  • Loading branch information
northdpole committed Aug 14, 2024
1 parent 9620177 commit 95ace6b
Show file tree
Hide file tree
Showing 18 changed files with 1,567 additions and 1,625 deletions.
44 changes: 22 additions & 22 deletions application/cmd/cre_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
register_node(node=link.document, collection=collection)

elif type(link.document).__name__ == defs.CRE.__name__:
# dbcre = register_cre(link.document, collection) # CREs are idempotent
# dbcre,_ = register_cre(link.document, collection) # CREs are idempotent
c = collection.get_CREs(name=link.document.name)[0]
dbcre = db.dbCREfromCRE(c)
collection.add_link(dbcre, linked_node, type=link.ltype)
Expand All @@ -109,14 +109,19 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
return linked_node


def register_cre(cre: defs.CRE, collection: db.Node_collection) -> db.CRE:
def register_cre(cre: defs.CRE, collection: db.Node_collection) -> Tuple[db.CRE, bool]:
existing = False
if collection.get_CREs(name=cre.id):
existing = True

dbcre: db.CRE = collection.add_cre(cre)
for link in cre.links:
if type(link.document) == defs.CRE:
logger.info(f"{link.document.id} {link.ltype} {cre.id}")
lower_cre, _ = register_cre(link.document, collection)
collection.add_internal_link(
higher=dbcre,
lower=register_cre(link.document, collection),
lower=lower_cre,
type=link.ltype,
)
else:
Expand All @@ -125,7 +130,7 @@ def register_cre(cre: defs.CRE, collection: db.Node_collection) -> db.CRE:
node=register_node(node=link.document, collection=collection),
type=link.ltype,
)
return dbcre
return dbcre, existing


def parse_file(
Expand Down Expand Up @@ -209,7 +214,7 @@ def parse_file(

def register_standard(
standard_entries: List[defs.Standard],
collection: db.Node_collection,
collection: db.Node_collection = None,
generate_embeddings=True,
calculate_gap_analysis=True,
db_connection_str: str = "",
Expand All @@ -218,15 +223,17 @@ def register_standard(
generate_embeddings = False

if not standard_entries:
logger.warning("register_standard() calleed with no standard_entries")
logger.warning("register_standard() called with no standard_entries")
return
if not collection:

if collection is None:
collection = db_connect(path=db_connection_str)

conn = redis.connect()
ph = prompt_client.PromptHandler(database=collection)
importing_name = standard_entries[0].name
standard_hash = gap_analysis.make_resources_key([importing_name])
if conn.get(standard_hash):
if calculate_gap_analysis and conn.get(standard_hash):
logger.info(
f"Standard importing job with info-hash {standard_hash} has already returned, skipping"
)
Expand All @@ -248,11 +255,12 @@ def register_standard(
generate_embeddings = False
if generate_embeddings and importing_name:
ph.generate_embeddings_for(importing_name)
populate_neo4j_db(collection)
# calculate gap analysis
jobs = []
pending_stadards = collection.standards()

if calculate_gap_analysis and not os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"):
# calculate gap analysis
populate_neo4j_db(collection)
jobs = []
pending_stadards = collection.standards()
for standard_name in pending_stadards:
if standard_name == importing_name:
continue
Expand Down Expand Up @@ -297,13 +305,7 @@ def parse_standards_from_spreadsheeet(
) -> None:
"""given a yaml with standards, build a list of standards in the db"""
collection = db_connect(cache_location)
if "CRE:name" in cre_file[0].keys():
collection = collection.with_graph()
documents = spreadsheet_parsers.parse_export_format(cre_file)
register_cre(documents, collection)
pass

elif any(key.startswith("CRE hierarchy") for key in cre_file[0].keys()):
if any(key.startswith("CRE hierarchy") for key in cre_file[0].keys()):
conn = redis.connect()
collection = collection.with_graph()
redis.empty_queues(conn)
Expand Down Expand Up @@ -659,9 +661,7 @@ def create_spreadsheet(
) -> Any:
"""Reads cre docs exported from a standards_collection.export()
dumps each doc into a workbook"""
flat_dicts = sheet_utils.prepare_spreadsheet(
collection=collection, docs=exported_documents
)
flat_dicts = sheet_utils.prepare_spreadsheet(docs=exported_documents)
return sheet_utils.write_spreadsheet(
title=title, docs=flat_dicts, emails=share_with
)
Expand Down
Loading

0 comments on commit 95ace6b

Please sign in to comment.