From ecd9883a8cdc74bd110fcfea3d32e6c4614502d3 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Fri, 14 Jun 2024 21:49:22 +0200
Subject: [PATCH] Add supplement_biosphere_edges

---
 bw_simapro_csv/blocks/process.py | 29 +++++++++++++++--
 bw_simapro_csv/constants.py      | 13 ++++++++
 bw_simapro_csv/csv_reader.py     |  2 +-
 bw_simapro_csv/main.py           |  3 +-
 tests/conftest.py                |  2 +-
 tests/test_csv_reader.py         |  5 +--
 tests/test_process.py            | 53 ++++++++++++++++++++++++++++++++
 tests/test_utils.py              | 23 +++++++-------
 8 files changed, 109 insertions(+), 21 deletions(-)
 create mode 100644 bw_simapro_csv/constants.py
 create mode 100644 tests/test_process.py

diff --git a/bw_simapro_csv/blocks/process.py b/bw_simapro_csv/blocks/process.py
index 3a6f148..4a07072 100644
--- a/bw_simapro_csv/blocks/process.py
+++ b/bw_simapro_csv/blocks/process.py
@@ -1,5 +1,6 @@
 from bw2parameters import Interpreter, ParameterSet
 
+from ..constants import CONTEXT_MAPPING, MAGIC
 from ..parameters import (
     FormulaSubstitutor,
     add_prefix_to_uppercase_input_parameters,
@@ -10,7 +11,7 @@
 from ..utils import asboolean, asdate, get_key_multiline_values, jump_to_nonempty
 from .base import SimaProCSVBlock
 from .calculated_parameters import DatasetCalculatedParameters
-from .generic_biosphere import GenericUncertainBiosphere
+from .generic_biosphere import GenericBiosphere, GenericUncertainBiosphere
 from .parameters import DatasetInputParameters
 from .products import Products
 from .technosphere_edges import TechnosphereEdges
@@ -93,7 +94,7 @@ def pull_metadata_pair(self, block: list[list], header: dict) -> (str, str):
             self.index += 2
         else:
             value = (
-                " ⧺ ".join([elem for elem in block[self.index + 1][1] if elem])
+                MAGIC.join([elem for elem in block[self.index + 1][1] if elem])
                 if block[self.index + 1][1]
                 else ""
             )
@@ -157,3 +158,27 @@ def resolve_local_parameters(self, global_params: dict, substitutes: dict) -> No
                         distribution(decimal_separator=self.header["decimal_separator"], **obj)
                     )
                     clean_simapro_uncertainty_fields(obj)
+
+    def supplement_biosphere_edges(self, blocks: list[SimaProCSVBlock]) -> None:
+        """Add comments and CAS numbers from the metadata blocks"""
+        for block in filter(lambda x: isinstance(x, GenericBiosphere), blocks):
+            try:
+                correspondent = self.blocks[CONTEXT_MAPPING[block.category]]
+            except KeyError:
+                continue
+
+            data_dict = {o["name"]: o for o in block.parsed}
+
+            for edge in correspondent.parsed:
+                try:
+                    partner = data_dict[edge["name"]]
+                except KeyError:
+                    continue
+
+                if partner.get("cas_number"):
+                    edge["cas_number"] = partner["cas_number"]
+                if partner.get("comment"):
+                    if edge.get("comment"):
+                        edge["comment"] += MAGIC + partner["comment"]
+                    else:
+                        edge["comment"] = partner["comment"]
diff --git a/bw_simapro_csv/constants.py b/bw_simapro_csv/constants.py
new file mode 100644
index 0000000..dcac481
--- /dev/null
+++ b/bw_simapro_csv/constants.py
@@ -0,0 +1,13 @@
+# Map from the context terms used in LCIA and metadata to the terms used in unit processes
+CONTEXT_MAPPING = {
+    "Non material emissions": "Non material emissions",
+    "Airborne emissions": "Emissions to air",
+    "Waterborne emissions": "Emissions to water",
+    "Raw materials": "Resources",
+    "Final waste flows": "Final waste flows",
+    "Emissions to soil": "Emissions to soil",
+    "Social issues": "Social issues",
+    "Economic issues": "Economic issues",
+}
+
+MAGIC = " ⧺ "
diff --git a/bw_simapro_csv/csv_reader.py b/bw_simapro_csv/csv_reader.py
index acb057f..ec23282 100644
--- a/bw_simapro_csv/csv_reader.py
+++ b/bw_simapro_csv/csv_reader.py
@@ -1,7 +1,7 @@
 import itertools
+import re
 from collections.abc import Iterator
 from typing import List
-import re
 
 import ftfy
 
diff --git a/bw_simapro_csv/main.py b/bw_simapro_csv/main.py
index b813fb8..907fa0a 100644
--- a/bw_simapro_csv/main.py
+++ b/bw_simapro_csv/main.py
@@ -30,6 +30,7 @@
     SystemDescription,
     Units,
 )
+from .csv_reader import BeKindRewind
 from .errors import IndeterminateBlockEnd
 from .header import parse_header
 from .parameters import (
@@ -40,7 +41,6 @@
     substitute_in_formulas,
 )
 from .units import normalize_units
-from .csv_reader import BeKindRewind
 
 
 def dummy(data, *args):
@@ -279,3 +279,4 @@ def resolve_parameters(self) -> None:
 
         for block in filter(lambda b: isinstance(b, Process), self):
             block.resolve_local_parameters(global_params=global_params, substitutes=substitutes)
+            block.supplement_biosphere_edges(blocks=self.blocks)
diff --git a/tests/conftest.py b/tests/conftest.py
index 18b7021..ea8af20 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,8 @@
 """Fixtures for bw_simapro_csv"""
 
 from pathlib import Path
-import platformdirs
 
+import platformdirs
 import pytest
 
 FIXTURES_DIR = Path(__file__).parent / "fixtures"
diff --git a/tests/test_csv_reader.py b/tests/test_csv_reader.py
index f23a9ba..f5288ac 100644
--- a/tests/test_csv_reader.py
+++ b/tests/test_csv_reader.py
@@ -1,9 +1,6 @@
 import pytest
 
-from bw_simapro_csv.csv_reader import (
-    BeKindRewind,
-    clean,
-)
+from bw_simapro_csv.csv_reader import BeKindRewind, clean
 
 
 def test_rewindable_generator():
diff --git a/tests/test_process.py b/tests/test_process.py
new file mode 100644
index 0000000..552820a
--- /dev/null
+++ b/tests/test_process.py
@@ -0,0 +1,53 @@
+from bw_simapro_csv.blocks import GenericBiosphere, Process
+
+
+def test_supplement_biosphere_edges():
+    class O:
+        pass
+
+    class P(Process):
+        def __init__(self):
+            pass
+
+    class B(GenericBiosphere):
+        def __init__(self):
+            pass
+
+    o = O()
+    o.category = "Emissions to air"
+    o.parsed = [
+        {"name": "first", "data": "yes please"},
+        {"name": "second", "unit": "something", "comment": "already here"},
+        {
+            "name": "third",
+            "lonely": True,
+        },
+    ]
+
+    p = P()
+    p.blocks = {"Emissions to air": o}
+
+    b = B()
+    b.category = "Airborne emissions"
+    b.parsed = [
+        {"name": "first", "cas_number": True},
+        {"name": "second", "comment": "this"},
+        {"name": "third", "comment": "hi mom"},
+    ]
+
+    expected = [
+        {
+            "name": "first",
+            "data": "yes please",
+            "cas_number": True,
+        },
+        {"name": "second", "unit": "something", "comment": "already here ⧺ this"},
+        {
+            "name": "third",
+            "lonely": True,
+            "comment": "hi mom",
+        },
+    ]
+
+    p.supplement_biosphere_edges([b])
+    assert p.blocks["Emissions to air"].parsed == expected
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0e90ef9..5b6a880 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -125,19 +125,18 @@ def test_get_key_multilines_value_stop_on_empty_block():
 
 
 def test_get_numbers_re():
-    assert get_numbers_re(",").match('1,11657894165076E-9')
-    assert get_numbers_re(";").match('1;11657894165076E-9')
-    assert get_numbers_re(".").match('1.11657894165076E-9')
+    assert get_numbers_re(",").match("1,11657894165076E-9")
+    assert get_numbers_re(";").match("1;11657894165076E-9")
+    assert get_numbers_re(".").match("1.11657894165076E-9")
 
-    assert get_numbers_re(",").match('1,11657894165076e-9')
-    assert get_numbers_re(";").match('1;11657894165076e-9')
-    assert get_numbers_re(".").match('1.11657894165076e-9')
+    assert get_numbers_re(",").match("1,11657894165076e-9")
+    assert get_numbers_re(";").match("1;11657894165076e-9")
+    assert get_numbers_re(".").match("1.11657894165076e-9")
 
-    assert get_numbers_re(",").match('1,11657894165076e9')
-    assert get_numbers_re(";").match('1;11657894165076e9')
-    assert get_numbers_re(".").match('1.11657894165076e9')
+    assert get_numbers_re(",").match("1,11657894165076e9")
+    assert get_numbers_re(";").match("1;11657894165076e9")
+    assert get_numbers_re(".").match("1.11657894165076e9")
 
-    assert get_numbers_re(",").match(' \t1,11657894165076E-9\n')
-
-    assert not get_numbers_re(",").match('e1234')
+    assert get_numbers_re(",").match(" \t1,11657894165076E-9\n")
 
+    assert not get_numbers_re(",").match("e1234")