Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix the api part of #531 by providing an api call to export all CREs … #532

Merged
merged 2 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion application/defs/cre_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ExportFormat(
Enum
): # TODO: this can likely be replaced with a method that iterates over an object's vars and formats headers to
# <doctype>:<name>:<varname>
separator = ":"
separator = "|"
section = "section"
subsection = "subsection"
hyperlink = "hyperlink"
Expand Down
2 changes: 1 addition & 1 deletion application/frontend/src/pages/Explorer/explorer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndica
import { TYPE_CONTAINS, TYPE_LINKED_TO } from '../../const';
import { useDataStore } from '../../providers/DataProvider';
import { LinkedTreeDocument, TreeDocument } from '../../types';
import { LinkedStandards } from './LinkedStandards';
import { getDocumentDisplayName } from '../../utils';
import { getInternalUrl } from '../../utils/document';
import { LinkedStandards } from './LinkedStandards';

export const Explorer = () => {
const { dataLoading, dataTree } = useDataStore();
Expand Down
59 changes: 58 additions & 1 deletion application/tests/spreadsheet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
from application import create_app, sqla # type: ignore
from application.database import db
from application.defs import cre_defs as defs
from application.utils.spreadsheet import prepare_spreadsheet
from application.utils.spreadsheet import (
prepare_spreadsheet,
generate_mapping_template_file,
)


class TestDB(unittest.TestCase):
Expand Down Expand Up @@ -530,6 +533,60 @@ def test_prepare_spreadsheet_simple(self) -> None:

self.assertCountEqual(result, expected)

def test_generate_mapping_template_file(self) -> None:
"""
Given: a CRE structure with 4 depth levels and 2 root cres
prepare a staggered csv accordingly
"""
# empty string means temporary db
collection = db.Node_collection().with_graph()
roots = []
for j in range(2):
root = defs.CRE(description=f"root{j}", name=f"root{j}", id=f"123-30{j}")
db_root = collection.add_cre(root)
roots.append(root)
previous_db = db_root
previous_cre = root

for i in range(4):
c = defs.CRE(
description=f"CREdesc{j}-{i}",
name=f"CREname{j}-{i}",
id=f"123-4{j}{i}",
)
dbcre = collection.add_cre(c)
collection.add_internal_link(
higher=previous_db, lower=dbcre, type=defs.LinkTypes.Contains
)
previous_cre.add_link(
defs.Link(document=c, ltype=defs.LinkTypes.Contains)
)
previous_cre = c
previous_db = dbcre
csv = generate_mapping_template_file(database=collection, docs=roots)
self.assertEqual(
csv,
[
{
"CRE 0": "",
"CRE 1": "",
"CRE 2": "",
"CRE 3": "",
"CRE 4": "",
},
{"CRE 0": "123-300|root0"},
{"CRE 1": "123-400|CREname0-0"},
{"CRE 2": "123-401|CREname0-1"},
{"CRE 3": "123-402|CREname0-2"},
{"CRE 4": "123-403|CREname0-3"},
{"CRE 0": "123-301|root1"},
{"CRE 1": "123-410|CREname1-0"},
{"CRE 2": "123-411|CREname1-1"},
{"CRE 3": "123-412|CREname1-2"},
{"CRE 4": "123-413|CREname1-3"},
],
)


if __name__ == "__main__":
unittest.main()
65 changes: 61 additions & 4 deletions application/tests/web_main_test.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
import io
import csv
import random
import string
import re
import json
import unittest
from unittest.mock import patch

import redis
import rq
import os

from application import create_app, sqla # type: ignore
from application.database import db
from application.defs import cre_defs as defs
from application.defs import osib_defs
from application.web import web_main
from application.utils.gap_analysis import GAP_ANALYSIS_TIMEOUT

import os
from application.utils import spreadsheet


class MockJob:
Expand Down Expand Up @@ -886,3 +885,61 @@ def test_all_cres(self, db_mock) -> None:
{"data": expected, "page": 1, "total_pages": 1},
json.loads(response.data),
)

def test_get_cre_csv(self) -> None:
# empty string means temporary db
collection = db.Node_collection().with_graph()
roots = []
for j in range(2):
root = defs.CRE(description=f"root{j}", name=f"root{j}", id=f"123-30{j}")
db_root = collection.add_cre(root)
roots.append(root)
previous_db = db_root
previous_cre = root

for i in range(4):
c = defs.CRE(
description=f"CREdesc{i}-{j}",
name=f"CREname{i}-{j}",
id=f"123-4{j}{i}",
)
dbcre = collection.add_cre(c)
collection.add_internal_link(
higher=previous_db, lower=dbcre, type=defs.LinkTypes.Contains
)
previous_cre.add_link(
defs.Link(document=c, ltype=defs.LinkTypes.Contains)
)
previous_cre = c
previous_db = dbcre

with self.app.test_client() as client:
response = client.get(
"/rest/v1/cre_csv",
headers={"Content-Type": "application/json"},
)
self.assertEqual(200, response.status_code)
expected_out = [
{
"CRE 0": "",
"CRE 1": "",
"CRE 2": "",
"CRE 3": "",
"CRE 4": "",
},
{"CRE 0": "123-300|root0"},
{"CRE 1": "123-400|CREname0-0"},
{"CRE 2": "123-401|CREname0-1"},
{"CRE 3": "123-402|CREname0-2"},
{"CRE 4": "123-403|CREname0-3"},
{"CRE 0": "123-301|root1"},
{"CRE 1": "123-410|CREname1-0"},
{"CRE 2": "123-411|CREname1-1"},
{"CRE 3": "123-412|CREname1-2"},
{"CRE 4": "123-413|CREname1-3"},
]
data = spreadsheet.write_csv(expected_out)
self.assertEqual(
data.getvalue(),
response.data.decode(),
)
62 changes: 61 additions & 1 deletion application/utils/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import io
import logging
from copy import deepcopy
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Set
import os
import gspread
import yaml
Expand Down Expand Up @@ -237,3 +237,63 @@ def write_spreadsheet(title: str, docs: List[Dict[str, Any]], emails: List[str])
for email in emails:
sh.share(email, perm_type="user", role="writer")
return "https://docs.google.com/spreadsheets/d/%s" % sh.id


def generate_mapping_template_file(
database: db.Node_collection, docs: List[defs.CRE]
) -> str:
maxOffset = 0
related = set()

def add_offset_cre(
cre: defs.CRE, database: db.Node_collection, offset: int, visited_cres: Set
) -> List[Dict[str, str]]:
nonlocal maxOffset, related
maxOffset = max(maxOffset, offset)
rows = []

rows.append(
{f"CRE {offset}": f"{cre.id}{defs.ExportFormat.separator.value}{cre.name}"}
)
visited_cres.add(cre.id)
dbcre = database.get_CREs(external_id=cre.id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this name get_CREs is unclear about what you're getting and so is dbcre. You're looking for linked CREs, right? Rename?

if not dbcre:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is not dbcre true if dbcre is empty or if there has been an error in get_CREs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dbcre should be None in case of error

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does not None resolve to true?

raise ValueError(f"CRE with id {cre.id} not found in the database")
cre = dbcre[0]
for link in cre.links:
if (
link.document.doctype == defs.Credoctypes.CRE
and link.document.id not in visited_cres
):
if link.ltype == defs.LinkTypes.Contains:
rows.extend(
add_offset_cre(
cre=link.document,
database=database,
offset=offset + 1,
visited_cres=visited_cres,
)
)
elif link.ltype == defs.LinkTypes.Related:
related.add(link.document.id)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently this code is doing nothing with related CRES, but we need to have them in the template at some point, for the template to be usable for changing the catalog. We have two options: add them as id|name list in one extra column, or add them as rows, just like we're adding the children (contains) as rows. I think the latter is easier to read but more confusing. So I vote for one column for the related CRES. This would require building up a dictionary of the related id|name pairs, and then adding them to rows, together with the CRE in a dictionary of two (or one if there is no related CRES.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently this code is doing nothing with related CRES, but we need to have them in the template at some point, for the template to be usable for changing the catalog.

I agree

We have two options: add them as id|name list in one extra column, or add them as rows, just like we're adding the children (contains) as rows

We could have a column named "Related CRE" sure,
then we can add the CRE listed in this column with a related type link,
During importing tehn we have the hierarchy on the left, then one column with related CREs, then an arbitrary number of standard entries. Do we link each standard to both the normal cre and the related one? This seems a bit confusing for people.

Since we want to be capturing more complex relationships in a 2 dimensional matrix I think we should consider a more verbose and less context-based format.

e.g.

  • perhaps there should be 2 formats, 1 for mapping standards and one for links between cres
  • alternatively perhaps everything should be considered a standard, CREs-included, we forget the hierarchy and just support a link between only 2 standards for each CSV, these 2 standards are CRE and any standard, for this we need to explicitly include the relationship type in a column.

In any case, this is beyond the scope of this particular pull request, let's think about this a bit more, let me know when you are free for a brainstorming session

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mapping template specifies with which CRE to link standard entries. The related CREs will not get the standard entries aside. I see your point. I guess it can be an option that is off by default to include the related CREs column in the template. So yes: two formats. and indeed not for this PR.

return rows

visited_cres = set()
csv: List[Dict[str, str]] = []

for cre in docs:
csv.extend(
add_offset_cre(
cre=cre, database=database, offset=0, visited_cres=visited_cres
)
)
result = [{f"CRE {offset}": "" for offset in range(0, maxOffset + 1)}]
result.extend(csv)

orphaned_documents = [doc for doc in related if doc not in visited_cres]
if len(orphaned_documents):
raise ValueError(
"found CREs with only related links not provided in the root_cre list, unless you are really sure for this use case, this is a bug"
)

return result
26 changes: 26 additions & 0 deletions application/web/web_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import logging
import os
import io
import pathlib
import urllib.parse
from typing import Any
Expand All @@ -29,6 +30,7 @@
send_from_directory,
url_for,
session,
send_file,
)
from google.oauth2 import id_token
from google_auth_oauthlib.flow import Flow
Expand Down Expand Up @@ -684,6 +686,30 @@ def all_cres() -> Any:
abort(404)


@app.route("/rest/v1/cre_csv", methods=["GET"])
def get_cre_csv() -> Any:
database = db.Node_collection()
root_cres = database.get_root_cres()
if root_cres:
docs = sheet_utils.generate_mapping_template_file(
database=database, docs=root_cres
)
csvVal = write_csv(docs=docs).getvalue().encode("utf-8")

# Creating the byteIO object from the StringIO Object
mem = io.BytesIO()
mem.write(csvVal)
mem.seek(0)

return send_file(
mem,
as_attachment=True,
download_name="CRE-Catalogue.csv",
mimetype="text/csv",
)
abort(404)


# @app.route("/rest/v1/all_nodes", methods=["GET"])
# def all_nodes() -> Any:
# database = db.Node_collection()
Expand Down
Loading