Skip to content

Commit

Permalink
Updated scenarios (#1025)
Browse files Browse the repository at this point in the history
* Solution to logging for the import errors from local database files (primarily excel).

* Updating some help/tool tips in the AB to provide more explicit messages for the user

* Extra checks for scenario imports. 

* Improved structure of loading, fixes use of Exceptions for scenario parameter imports.

* Provides a save functionality for the combined scenario dataframes within the AB.

* Changes the location of error reports during the linking of activities in the scenario files. Improves the messaging and text provided in the popup and warning messages.

* Drops the index of the scenario dataframe when exporting this to the file system.

* Updates to the dataframe function for updating database names in scenario files.

* Bug correction for OverrideCursor.

* Adds corrections to the dataframe modules scenario_replace_databases function and the ABPopup save_dataframe method.

* Finalizing corrections to the dataframe module scenario_replace_databases function.

* Corrects and improves on the current checks for both the absence of, and use of bad keys for accessing activities from the BW SQL databases. 

* Updates to the handling of poor process keys in the scenario files. 

* Corrections and adjustments to procedures for importing scenario difference files.

* Added conditions and exceptions to avoid minor bugs related to the retention of the loading cursor symbol.

* bug fix removing the object fields from the scenario dataframe that will be incorporated in the LCA analysis

* Bug fix for importing feather files that should correctly convert the keys to tuples.

* Documentation improvements to changes in the scenario checking code.

* Adds checks to the exchange amounts from the scenario difference files and the associated errors.

* Adds a warning for unalignable scenario columns.

* Adds a new warning to the bwutils.errors module 'UnalignableScenarioColumnsWarning'. Contains significant changes to the manager module: Changes the execution of checking the scenario exchange values. Changes the use of scenario expansion errors. Vectorizes the approach used in merge_flows_to_self.

* Final corrections to the updated scenario code.

* Removes usage of the standard python print statement

* Corrections to merge_flows_to_self and 'error' messages related to parameter scenario files
  • Loading branch information
Zoophobus authored Aug 30, 2023
1 parent 4488bb8 commit 84c9b15
Show file tree
Hide file tree
Showing 25 changed files with 1,233 additions and 294 deletions.
2 changes: 2 additions & 0 deletions activity_browser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

def run_activity_browser():
qapp = QApplication(sys.argv)

# qapp.setFont(default_font)
# qapp.setStyleSheet(
# '''
Expand All @@ -41,6 +42,7 @@ def run_activity_browser():
# QWidget>QTableWidget { background-color: #FFFFFF; color: #101010 }
# '''
# )

application = Application()
application.show()
log.info("Qt Version:", qt_version)
Expand Down
13 changes: 13 additions & 0 deletions activity_browser/bwutils/calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
SuperstructureContributions, SuperstructureMLCA,
)
from bw2calc.errors import BW2CalcError
from .errors import ScenarioExchangeNotFoundError

from .errors import CriticalCalculationError


import logging
from activity_browser.logger import ABHandler
Expand All @@ -32,16 +36,25 @@ def do_LCA_calculations(data: dict):
contributions = SuperstructureContributions(mlca)
except AssertionError as e:
# This occurs if the superstructure itself detects something is wrong.
QApplication.restoreOverrideCursor()
raise BW2CalcError("Scenario LCA failed.", str(e)).with_traceback(e.__traceback__)
except ValueError as e:
# This occurs if the LCA matrix does not contain any of the
# exchanges mentioned in the superstructure data.
QApplication.restoreOverrideCursor()
raise BW2CalcError(
"Scenario LCA failed.",
"Constructed LCA matrix does not contain any exchanges from the superstructure"
).with_traceback(e.__traceback__)
except KeyError as e:
QApplication.restoreOverrideCursor()
raise BW2CalcError("LCA Failed", str(e)).with_traceback(e.__traceback__)
except CriticalCalculationError as e:
QApplication.restoreOverrideCursor()
raise Exception(e)
except ScenarioExchangeNotFoundError as e:
QApplication.restoreOverrideCursor()
raise CriticalCalculationError
else:
log.error('Calculation type must be: simple or scenario. Given:', cs_name)
raise ValueError
Expand Down
32 changes: 32 additions & 0 deletions activity_browser/bwutils/errors.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
# -*- coding: utf-8 -*-

"""
A series of defined Errors and Warnings for the Activity Browser
Both Warnings and Exceptions are customized to enable custom handling (in bulk) of non-critical
errors.
"""


class ABError(Exception):
"""To be used as a generic Activity-Browser Error that will not lead to the AB crashing out"""
pass


class ABWarning(Warning):
"""To be used as a generic Activity-Browser Warning"""
pass

class ImportCanceledError(ABError):
"""Import of data was cancelled by the user."""
Expand Down Expand Up @@ -59,10 +70,31 @@ class DuplicatedScenarioExchangeWarning(ABWarning):
class CriticalCalculationError(ABError):
"""Should be raised if some action during the running of the calculation causes a critical Exception that will fail
the calculation. This is intended to be used with a Popup warning system that catches the original exception."""
pass


class CriticalScenarioExtensionError(ABError):
"""Should be raised when combinging multiple scenario files by extension leads to zero scenario columns. Due to no
scenario columns being found in common between the scenario files."""
pass


class ScenarioDatabaseNotFoundError(ABError):
"""Should be raised when looking up one of the processes in an SDF file and the values used don't match those
present in the local AB/BW databases."""
pass


class ScenarioExchangeNotFoundError(ABError):
"""Should be raised when looking up a process key from the metadata in a scenario difference file, if THAT process
key cannot be located in the local databases."""
pass


class ScenarioExchangeDataNotFoundError(ABError):
"""Should be raised if no actual quantities for the exchanges can be found in the scenario difference file"""
pass


class UnalignableScenarioColumnsWarning(ABWarning):
"""Should be raised if there is a mismatch between the scenario columns from multiple scenario difference files"""
6 changes: 4 additions & 2 deletions activity_browser/bwutils/multilca.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ def get_labelled_contribution_dict(self, cont_dict, x_fields=None,
index = df.loc[df.index.difference(special_keys)].replace(0, np.nan).dropna(how='all').index.union(special_keys)
df = df.loc[index]

joined = None
if not mask:
joined = self.join_df_with_metadata(
df, x_fields=x_fields, y_fields=y_fields,
Expand All @@ -549,8 +550,9 @@ def get_labelled_contribution_dict(self, cont_dict, x_fields=None,
df = df.reindex(combined_keys, axis="index", fill_value=0.0)
df.index = self.get_labels(df.index, mask=mask)
joined = df

return joined.reset_index(drop=False)
if joined is not None:
return joined.reset_index(drop=False)
return

@staticmethod
def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame:
Expand Down
3 changes: 2 additions & 1 deletion activity_browser/bwutils/superstructure/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from .dataframe import (
scenario_names_from_df, superstructure_from_arrays
scenario_names_from_df, superstructure_from_arrays,
scenario_replace_databases
)
from .file_imports import (
ABFeatherImporter, ABCSVImporter, ABFileImporter
Expand Down
44 changes: 40 additions & 4 deletions activity_browser/bwutils/superstructure/activities.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def process_ad_flow(row) -> tuple:
return match, key


def constuct_ad_data(row) -> tuple:
def construct_ad_data(row) -> tuple:
"""Take a namedtuple from the method below and convert it into two tuples.
Used to fill out missing information in the superstructure.
Expand All @@ -64,8 +64,8 @@ def data_from_index(index: tuple) -> dict:
from it.
"""
from_key, to_key = index[0], index[1]
from_key, from_data = constuct_ad_data(ActivityDataset.get(database=from_key[0], code=from_key[1]))
to_key, to_data = constuct_ad_data(ActivityDataset.get(database=to_key[0], code=to_key[1]))
from_key, from_data = construct_ad_data(ActivityDataset.get(database=from_key[0], code=from_key[1]))
to_key, to_data = construct_ad_data(ActivityDataset.get(database=to_key[0], code=to_key[1]))
return {
"from activity name": from_data[0],
"from reference product": from_data[1],
Expand Down Expand Up @@ -123,7 +123,6 @@ def get_relevant_flows(df: pd.DataFrame, part: str = "from") -> dict:
flows = dict(map(process_ad_flow, query.iterator()))
return flows


def match_fields_for_key(df: pd.DataFrame, matchbook: dict) -> pd.Series:
def build_match(row):
if row.iat[4] == bw.config.biosphere:
Expand All @@ -142,3 +141,40 @@ def fill_df_keys_with_fields(df: pd.DataFrame) -> pd.DataFrame:
matches.update(get_relevant_activities(df, "to"))
df["to key"] = match_fields_for_key(df.loc[:, TO_ALL], matches)
return df


def get_activities_from_keys(df: pd.DataFrame, db: str = bw.config.biosphere) -> pd.DataFrame:
"""
Uses the BW SQL database to generate a list of Activities from the input dataframe.
Returns a pandas dataframe that contains any keys that do not identify to an Activity in BW.
parameters
----------
df: pandas dataframe for a scenario
db: the database name to check the Activities from the dataframe to
"""
data_f = df.loc[(df['from database'] == db)]
data_t = df.loc[(df['to database'] == db)]
flows = set()
if not data_f.empty:
f_db, f_keys = zip(*data_f.loc[:, 'from key'])# extract just the key, avoiding the database
fqry = (ActivityDataset
.select(ActivityDataset.code, ActivityDataset.database)
.where((ActivityDataset.database==db) &
(ActivityDataset.code.in_(set(f_keys)))).namedtuples()) # produces an iterator for the activities
flows.update(set(map(lambda row: (row.database, row.code), fqry.iterator())))
if not data_t.empty:
t_db, t_keys = zip(*data_t.loc[:, 'to key'])# look at the above code block
tqry = (ActivityDataset
.select(ActivityDataset.code, ActivityDataset.database)
.where((ActivityDataset.database==db) &
(ActivityDataset.code.in_(set(t_keys)))).namedtuples())

flows.update(set(map(lambda row: (row.database, row.code), tqry.iterator())))
absent = pd.concat([data_f.loc[~(data_f['from key'].isin(flows)) & (data_f['from database'] == db)],
data_t.loc[~(data_t['to key'].isin(flows)) & (data_t['to database'] == db)]], ignore_index=False, axis=0)
# absent includes those exchanges where one of the keys was not found in the respective database
return absent


147 changes: 145 additions & 2 deletions activity_browser/bwutils/superstructure/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
# -*- coding: utf-8 -*-
from typing import List, Tuple
from PySide2.QtWidgets import QApplication, QPushButton
from PySide2.QtCore import Qt
import sys
import ast

import brightway2 as bw
import numpy as np
import pandas as pd

from ..metadata import AB_metadata
from ..utils import Index
from .activities import data_from_index
from .utils import SUPERSTRUCTURE

from .file_dialogs import ABPopup
from ..errors import ScenarioDatabaseNotFoundError

def superstructure_from_arrays(samples: np.ndarray, indices: np.ndarray, names: List[str] = None) -> pd.DataFrame:
"""Process indices into the superstructure itself, the samples represent
Expand Down Expand Up @@ -64,3 +69,141 @@ def scenario_names_from_df(df: pd.DataFrame) -> List[str]:
return [
str(x).replace("\n", " ").replace("\r", "") for x in cols
]


def scenario_replace_databases(df_: pd.DataFrame, replacements: dict) -> pd.DataFrame:
""" For a provided dataframe the function will check for the presence of a unidentified database for all rows.
If an unidentified database is found as a key in the replacements argument the corresponding value provided is used
to provide an alternative database. The corresponding key for the activity from the unidentified database is
collected from the provided alternative.
If an activity cannot be identified within the provided database a warning message is provided. The process can
either be terminated, or can proceed without replacement of those activities not identified (the unidentified
database names in these instances will be retained)
Raises
------
ScenarioDatabaseNotFoundError
Parameters
----------
df_ : the dataframe that is produced from the supplied scenario files provided to the AB
replacements : a dictionary of key-value pairs where the key corresponds to the database in the supplied dataframe
and the value corresponds to the respective database in the local brightway environment
bw_dbs : a list of Brightway databases held locally
Returns
-------
"""
def exchange_replace_database(ds: pd.Series, replacements: dict, critical: list, idx: pd.Index) -> tuple:
"""
For a row in the scenario dataframe check the databases involved for whether replacement is required.
If so use the key-value pair within the replacements dictionary to replace the dictionary names
and obtain the correct activity key
Raises
------
No exception however it creates a store of five values that indicate non-linkable flows with the
new database
Parameters
----------
ds: dataseries from a pandas dataframe containing the data from the scenario difference file
replacements: a key -- value pair containing the old -- new database names
critical: an initially empty list that is filled with dataseries that fail in the relinking process
idx: the index for the dataseries object, in the "parent" dataframe
"""
ds_ = ds.copy()
for i, field in enumerate([FROM_FIELDS, TO_FIELDS]):
db_name = ds_[['from database', 'to database'][i]]
# check for the relevance of the particular field
if db_name not in replacements.keys():
continue
try:
# try to find the matching records (after loaded into the metadata)
if isinstance(ds_[field[1]], float):
# try to find a technosphere record
key = metadata[(metadata[DB_FIELDS[0]] == ds_[field[0]]) &
(metadata[DB_FIELDS[2]] == ds_[field[2]]) &
(metadata[DB_FIELDS[3]] == ds_[field[3]])].copy()
else:
# try to find a biosphere record
if isinstance(ds_[field[1]], str):
categories = ast.literal_eval(ds_[field[1]])
else:
categories = ds_[field[1]]
key = metadata[(metadata[DB_FIELDS[0]] == ds_[field[0]]) &
(metadata[DB_FIELDS[1]] == categories)].copy()
# replace the records that can be found
for j, col in enumerate([['from key', 'from database'], ['to key', 'to database']][i]):
ds_.loc[col] = (key['database'][0], key['code'][0]) if j == 0 else key['database'][0]
except Exception as e:
# if the record cannot be found add an exception (to a maximum of five)
if len(critical['from database']) <= 5:
critical['index'].append(idx)
critical['from database'].append(ds_['from database'])
critical['from activity name'].append(ds_['from activity name'])
critical['to database'].append(ds_['to database'])
critical['to activity name'].append(ds_['to activity name'])
return ds_

# Create a new database from those records in the scenario files that include exchanges where a replacement database
# is required
df = df_.loc[(df_['from database'].isin(replacements.keys())) | (df_['to database'].isin(replacements.keys()))].copy(True)

# A LIST OF FIELDS FOR ITERATION
FROM_FIELDS = pd.Index([
"from activity name", "from categories",
"from reference product", "from location",
])
TO_FIELDS = pd.Index(["to activity name", "to categories",
"to reference product", "to location"
])
DB_FIELDS = ['name', 'categories', 'reference product', 'location']

# setting up the variables in case some exchanges cannot be relinked
critical = {'index': [], 'from database': [], 'from activity name': [], 'to database': [], 'to activity name': []} # To be used in the exchange_replace_database internal method scope
changes = ['from database', 'from key', 'to database', 'to key']

# Load all required databases into the metadata
AB_metadata.add_metadata(replacements.values())
metadata = AB_metadata.dataframe

for idx in df.index:
df.loc[idx, changes] = exchange_replace_database(df.loc[idx, :], replacements, critical, idx)[changes]
sys.stdout.write("\r{}".format(idx/df.shape[0]))# TODO check adaptation for the logger
sys.stdout.flush()

if critical['from database']:
# prepare a warning message in case unlinkable activities were found in the scenario dataframe
QApplication.restoreOverrideCursor()
if len(critical['from database']) > 1:
msg = f"Multiple activities could not be \"relinked\" to the local database.<br> The first five are provided. " \
f"If you want to save the dataframe you can either save those scenario exchanges where relinking failed "\
f"(check the excerpt box), or save the entire dataframe with a new column indicating failed relinking."\
f"<br> To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activities not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
else:
msg = f"An activity could not be \"relinked\" to the local database.<br> Some additional information is " \
f"provided. If you want to save the dataframe you can either save those scenario exchanges where " \
f"relinking failed (check the excerpt box), or save the entire dataframe with a new column indicating" \
f" failed relinking.<br>To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activity not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
raise ScenarioDatabaseNotFoundError("Incompatible Databases in the scenario file, unable to complete further checks on the file")
else:
df_.loc[df.index] = df
return df_
2 changes: 1 addition & 1 deletion activity_browser/bwutils/superstructure/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_header_index(document_path: Union[str, Path], import_sheet: int):
wb.close()
raise IndexError("Expected headers not found in file").with_traceback(e.__traceback__)
except UnicodeDecodeError as e:
print("Given document uses an unknown encoding: {}".format(e))
log.error("Given document uses an unknown encoding: {}".format(e))
wb.close()
raise ValueError("Could not find required headers in given document sheet.")

Expand Down
Loading

0 comments on commit 84c9b15

Please sign in to comment.