From ecd9883a8cdc74bd110fcfea3d32e6c4614502d3 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Fri, 14 Jun 2024 21:49:22 +0200 Subject: [PATCH] Add supplement_biosphere_edges --- bw_simapro_csv/blocks/process.py | 29 +++++++++++++++-- bw_simapro_csv/constants.py | 13 ++++++++ bw_simapro_csv/csv_reader.py | 2 +- bw_simapro_csv/main.py | 3 +- tests/conftest.py | 2 +- tests/test_csv_reader.py | 5 +-- tests/test_process.py | 53 ++++++++++++++++++++++++++++++++ tests/test_utils.py | 23 +++++++------- 8 files changed, 109 insertions(+), 21 deletions(-) create mode 100644 bw_simapro_csv/constants.py create mode 100644 tests/test_process.py diff --git a/bw_simapro_csv/blocks/process.py b/bw_simapro_csv/blocks/process.py index 3a6f148..4a07072 100644 --- a/bw_simapro_csv/blocks/process.py +++ b/bw_simapro_csv/blocks/process.py @@ -1,5 +1,6 @@ from bw2parameters import Interpreter, ParameterSet +from ..constants import CONTEXT_MAPPING, MAGIC from ..parameters import ( FormulaSubstitutor, add_prefix_to_uppercase_input_parameters, @@ -10,7 +11,7 @@ from ..utils import asboolean, asdate, get_key_multiline_values, jump_to_nonempty from .base import SimaProCSVBlock from .calculated_parameters import DatasetCalculatedParameters -from .generic_biosphere import GenericUncertainBiosphere +from .generic_biosphere import GenericBiosphere, GenericUncertainBiosphere from .parameters import DatasetInputParameters from .products import Products from .technosphere_edges import TechnosphereEdges @@ -93,7 +94,7 @@ def pull_metadata_pair(self, block: list[list], header: dict) -> (str, str): self.index += 2 else: value = ( - " ⧺ ".join([elem for elem in block[self.index + 1][1] if elem]) + MAGIC.join([elem for elem in block[self.index + 1][1] if elem]) if block[self.index + 1][1] else "" ) @@ -157,3 +158,27 @@ def resolve_local_parameters(self, global_params: dict, substitutes: dict) -> No distribution(decimal_separator=self.header["decimal_separator"], **obj) ) clean_simapro_uncertainty_fields(obj) + + def supplement_biosphere_edges(self, blocks: list[SimaProCSVBlock]) -> None: + """Add comments and CAS numbers from the metadata blocks""" + for block in filter(lambda x: isinstance(x, GenericBiosphere), blocks): + try: + correspondent = self.blocks[CONTEXT_MAPPING[block.category]] + except KeyError: + continue + + data_dict = {o["name"]: o for o in block.parsed} + + for edge in correspondent.parsed: + try: + partner = data_dict[edge["name"]] + except KeyError: + continue + + if partner.get("cas_number"): + edge["cas_number"] = partner["cas_number"] + if partner.get("comment"): + if edge.get("comment"): + edge["comment"] += MAGIC + partner["comment"] + else: + edge["comment"] = partner["comment"] diff --git a/bw_simapro_csv/constants.py b/bw_simapro_csv/constants.py new file mode 100644 index 0000000..dcac481 --- /dev/null +++ b/bw_simapro_csv/constants.py @@ -0,0 +1,13 @@ +# Map from the context terms used in LCIA and metadata to the terms used in unit processes +CONTEXT_MAPPING = { + "Non material emissions": "Non material emissions", + "Airborne emissions": "Emissions to air", + "Waterborne emissions": "Emissions to water", + "Raw materials": "Resources", + "Final waste flows": "Final waste flows", + "Emissions to soil": "Emissions to soil", + "Social issues": "Social issues", + "Economic issues": "Economic issues", +} + +MAGIC = " ⧺ " diff --git a/bw_simapro_csv/csv_reader.py b/bw_simapro_csv/csv_reader.py index acb057f..ec23282 100644 --- a/bw_simapro_csv/csv_reader.py +++ b/bw_simapro_csv/csv_reader.py @@ -1,7 +1,7 @@ import itertools +import re from collections.abc import Iterator from typing import List -import re import ftfy diff --git a/bw_simapro_csv/main.py b/bw_simapro_csv/main.py index b813fb8..907fa0a 100644 --- a/bw_simapro_csv/main.py +++ b/bw_simapro_csv/main.py @@ -30,6 +30,7 @@ SystemDescription, Units, ) +from .csv_reader import BeKindRewind from .errors import IndeterminateBlockEnd from .header import parse_header from .parameters import ( @@ -40,7 +41,6 @@ substitute_in_formulas, ) from .units import normalize_units -from .csv_reader import BeKindRewind def dummy(data, *args): @@ -279,3 +279,4 @@ def resolve_parameters(self) -> None: for block in filter(lambda b: isinstance(b, Process), self): block.resolve_local_parameters(global_params=global_params, substitutes=substitutes) + block.supplement_biosphere_edges(blocks=self.blocks) diff --git a/tests/conftest.py b/tests/conftest.py index 18b7021..ea8af20 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,8 @@ """Fixtures for bw_simapro_csv""" from pathlib import Path -import platformdirs +import platformdirs import pytest FIXTURES_DIR = Path(__file__).parent / "fixtures" diff --git a/tests/test_csv_reader.py b/tests/test_csv_reader.py index f23a9ba..f5288ac 100644 --- a/tests/test_csv_reader.py +++ b/tests/test_csv_reader.py @@ -1,9 +1,6 @@ import pytest -from bw_simapro_csv.csv_reader import ( - BeKindRewind, - clean, -) +from bw_simapro_csv.csv_reader import BeKindRewind, clean def test_rewindable_generator(): diff --git a/tests/test_process.py b/tests/test_process.py new file mode 100644 index 0000000..552820a --- /dev/null +++ b/tests/test_process.py @@ -0,0 +1,53 @@ +from bw_simapro_csv.blocks import GenericBiosphere, Process + + +def test_supplement_biosphere_edges(): + class O: + pass + + class P(Process): + def __init__(self): + pass + + class B(GenericBiosphere): + def __init__(self): + pass + + o = O() + o.category = "Emissions to air" + o.parsed = [ + {"name": "first", "data": "yes please"}, + {"name": "second", "unit": "something", "comment": "already here"}, + { + "name": "third", + "lonely": True, + }, + ] + + p = P() + p.blocks = {"Emissions to air": o} + + b = B() + b.category = "Airborne emissions" + b.parsed = [ + {"name": "first", "cas_number": True}, + {"name": "second", "comment": "this"}, + {"name": "third", "comment": "hi mom"}, + ] + + expected = [ + { + "name": "first", + "data": "yes please", + "cas_number": True, + }, + {"name": "second", "unit": "something", "comment": "already here ⧺ this"}, + { + "name": "third", + "lonely": True, + "comment": "hi mom", + }, + ] + + p.supplement_biosphere_edges([b]) + assert p.blocks["Emissions to air"].parsed == expected diff --git a/tests/test_utils.py b/tests/test_utils.py index 0e90ef9..5b6a880 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -125,19 +125,18 @@ def test_get_key_multilines_value_stop_on_empty_block(): def test_get_numbers_re(): - assert get_numbers_re(",").match('1,11657894165076E-9') - assert get_numbers_re(";").match('1;11657894165076E-9') - assert get_numbers_re(".").match('1.11657894165076E-9') + assert get_numbers_re(",").match("1,11657894165076E-9") + assert get_numbers_re(";").match("1;11657894165076E-9") + assert get_numbers_re(".").match("1.11657894165076E-9") - assert get_numbers_re(",").match('1,11657894165076e-9') - assert get_numbers_re(";").match('1;11657894165076e-9') - assert get_numbers_re(".").match('1.11657894165076e-9') + assert get_numbers_re(",").match("1,11657894165076e-9") + assert get_numbers_re(";").match("1;11657894165076e-9") + assert get_numbers_re(".").match("1.11657894165076e-9") - assert get_numbers_re(",").match('1,11657894165076e9') - assert get_numbers_re(";").match('1;11657894165076e9') - assert get_numbers_re(".").match('1.11657894165076e9') + assert get_numbers_re(",").match("1,11657894165076e9") + assert get_numbers_re(";").match("1;11657894165076e9") + assert get_numbers_re(".").match("1.11657894165076e9") - assert get_numbers_re(",").match(' \t1,11657894165076E-9\n') - - assert not get_numbers_re(",").match('e1234') + assert get_numbers_re(",").match(" \t1,11657894165076E-9\n") + assert not get_numbers_re(",").match("e1234")