Skip to content

Commit

Permalink
Scenario bug and error fixes (#1028)
Browse files Browse the repository at this point in the history
Adds file type checks for scenario imports. Adds scenario exchange number type checks. (Both including popups and associated Error types). 
Provides more data for Error and Warning popup windows.
Moves Excel file checks to the same location as other file types. Fixes tables for the popup dialog.
  • Loading branch information
Zoophobus authored Sep 1, 2023
1 parent 84c9b15 commit 692feaf
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 44 deletions.
13 changes: 12 additions & 1 deletion activity_browser/bwutils/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,16 @@ class ScenarioExchangeDataNotFoundError(ABError):
pass


class ScenarioExchangeDataNonNumericError(ABError):
"""Should be raised if non-numeric data is provided for the exchanges in a scenario difference file."""
pass


class UnalignableScenarioColumnsWarning(ABWarning):
"""Should be raised if there is a mismatch between the scenario columns from multiple scenario difference files"""
"""Should be raised if there is a mismatch between the scenario columns from multiple scenario difference files"""


class WrongFileTypeImportError(ABError):
"""Should be raised when a user tries to import the wrong type of file for the import in question.
For example a database file with the scenario import dialog, or vice versa."""
pass
5 changes: 4 additions & 1 deletion activity_browser/bwutils/superstructure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from .file_imports import (
ABFeatherImporter, ABCSVImporter, ABFileImporter
)
from .file_dialogs import (
ABPopup
)
from .excel import import_from_excel, get_sheet_names
from .manager import SuperstructureManager
from .mlca import SuperstructureMLCA, SuperstructureContributions
from .utils import SUPERSTRUCTURE, _time_it_
from .utils import SUPERSTRUCTURE, _time_it_, edit_superstructure_for_string
6 changes: 2 additions & 4 deletions activity_browser/bwutils/superstructure/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,7 @@ def exchange_replace_database(ds: pd.Series, replacements: dict, critical: list,
f"<br> To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activities not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe(df.loc[critical['index'], :], SUPERSTRUCTURE)
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
else:
Expand All @@ -198,8 +197,7 @@ def exchange_replace_database(ds: pd.Series, replacements: dict, critical: list,
f" failed relinking.<br>To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activity not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe(df.loc[critical['index'], :], SUPERSTRUCTURE)
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand Down
34 changes: 19 additions & 15 deletions activity_browser/bwutils/superstructure/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,22 @@ def import_from_excel(document_path: Union[str, Path], import_sheet: int = 1) ->
'usecols' is used to exclude specific columns from the excel document.
'comment' is used to exclude specific rows from the excel document.
"""
header_idx = get_header_index(document_path, import_sheet)
data = pd.read_excel(
document_path, sheet_name=import_sheet, header=header_idx,
usecols=valid_cols, comment="*", na_values="", keep_default_na=False,
engine="openpyxl"
)
diff = SUPERSTRUCTURE.difference(data.columns)
if not diff.empty:
raise ValueError("Missing required column(s) for superstructure: {}".format(diff.to_list()))

# Convert specific columns that may have tuples as strings
columns = ["from categories", "from key", "to categories", "to key"]
data.loc[:, columns] = data[columns].applymap(convert_tuple_str)

return data
data = pd.DataFrame({})
try:
header_idx = get_header_index(document_path, import_sheet)
data = pd.read_excel(
document_path, sheet_name=import_sheet, header=header_idx,
usecols=valid_cols, comment="*", na_values="", keep_default_na=False,
engine="openpyxl"
)
diff = SUPERSTRUCTURE.difference(data.columns)
if not diff.empty:
raise ValueError("Missing required column(s) for superstructure: {}".format(diff.to_list()))

# Convert specific columns that may have tuples as strings
columns = ["from categories", "from key", "to categories", "to key"]
data.loc[:, columns] = data[columns].applymap(convert_tuple_str)
except:
# skip the error checks here, these now occur in the calling layout.tabs.LCA_setup module
pass
return data
12 changes: 9 additions & 3 deletions activity_browser/bwutils/superstructure/file_dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ProblemDataModel(QtCore.QAbstractTableModel):
A simple table model for use in the ABPopup dialogs for error reporting.
Intentionally coupled with the ABPopup class and not intended for use externally.
"""
updated = QtCore.Signal()
def __init__(self):
Expand All @@ -46,9 +47,13 @@ def sync(self, *args, **kwargs) -> None:
assert('dataframe' in kwargs and 'columns in kwargs')
self.columns = kwargs['columns']
data = kwargs['dataframe']
self._dataframe = pd.DataFrame(data,columns=self.columns)
self._dataframe = pd.DataFrame(data, columns=self.columns)
self.updated.emit()

def headerData(self, section, orientation, role = QtCore.Qt.DisplayRole):
if orientation == QtCore.Qt.Horizontal and role == QtCore.Qt.DisplayRole:
return self.columns[section]


class ProblemDataFrame(QtWidgets.QTableView):
"""
Expand Down Expand Up @@ -116,13 +121,14 @@ def dataframe(self, data: pd.DataFrame, columns: list = None):
Arguments
---------
data: the dataframe that generates the error
data: a dataframe with the exchanges/rows that generate the error
columns: a list of columns to provide the dataframe with for the popup message
"""
dataframe = data
cols = pd.Index(columns)
dataframe = dataframe.loc[:, columns]
dataframe.index = dataframe.index.astype(str)
if not isinstance(dataframe.index, pd.MultiIndex):
dataframe.index = dataframe.index.astype(str)
self.data_frame.update(dataframe, cols)
self.data_frame.setHidden(False)
self.updateGeometry()
Expand Down
50 changes: 32 additions & 18 deletions activity_browser/bwutils/superstructure/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype, is_number
from PySide2.QtWidgets import QApplication, QPushButton
from PySide2.QtCore import Qt
from typing import Union, Optional
Expand All @@ -11,7 +12,7 @@

from .activities import fill_df_keys_with_fields, get_activities_from_keys
from .dataframe import scenario_columns
from .utils import guess_flow_type, SUPERSTRUCTURE, _time_it_
from .utils import guess_flow_type, SUPERSTRUCTURE, _time_it_, edit_superstructure_for_string

import logging
from activity_browser.logger import ABHandler
Expand All @@ -22,7 +23,7 @@
from .file_dialogs import ABPopup
from ..errors import (CriticalScenarioExtensionError, ScenarioExchangeNotFoundError,
ImportCanceledError, ScenarioExchangeDataNotFoundError,
UnalignableScenarioColumnsWarning,
UnalignableScenarioColumnsWarning, ScenarioExchangeDataNonNumericError
)


Expand Down Expand Up @@ -376,7 +377,6 @@ def fill_empty_process_keys_in_exchanges(df: pd.DataFrame) -> pd.DataFrame:
_df = df.loc[df.loc[:, EXCHANGE_KEYS].isna().any(axis=1)]
if not _df.empty:
sdf_keys = SuperstructureManager.exchangesPopup()
sdf_keys.save_options()
sdf_keys.dataframe_to_file(df, _df.index)
QApplication.restoreOverrideCursor()
sdf_keys.exec_()
Expand Down Expand Up @@ -407,10 +407,8 @@ def verify_scenario_process_keys(df: pd.DataFrame) -> pd.DataFrame:
_ = get_activities_from_keys(df, db)
df_ = pd.concat([df_, _], axis=0, ignore_index=False)
if not df_.empty:
errors_df = pd.DataFrame(df_, index=None, columns=['from key', 'to key', 'flow type'])
sdf_keys = SuperstructureManager.exchangesPopup()
sdf_keys.save_options()
sdf_keys.dataframe(errors_df, errors_df.columns)
sdf_keys.dataframe(df_, SUPERSTRUCTURE)
sdf_keys.dataframe_to_file(df, df_.index)
QApplication.restoreOverrideCursor()
sdf_keys.exec_()
Expand All @@ -426,6 +424,7 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
Raises
------
A ScenarioExchangeDataNotFoundError if no valid values are found in the scenario 'amounts'
A ScenarioExchangeDataNonNumericError if non-numeric values are found for the scenario 'amounts'
A logged warning before replacement of invalid scenario values
Parameters
Expand All @@ -435,8 +434,9 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
cols: a pandas index that indicates the scenario columns holding the 'amounts' to be used in the scenario
calculations
"""
_df = df.copy()
assert len(cols) > 0
nas = df.loc[:, cols].isna()
nas = _df.loc[:, cols].isna()
if nas.all(axis=0).all():
msg = "<p>No exchange values could be observed in the last loaded scenario file. " + \
"Exchange values must be recorded in a labelled scenario column with a name distinguishable from the" + \
Expand All @@ -450,6 +450,29 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
raise ScenarioExchangeDataNotFoundError
elif nas.any(axis=0).any():
log.warning("Replacing empty values from the last loaded scenario difference file")
if not is_numeric_dtype(np.array(_df.loc[:, cols])):
# converting to numeric only works on lists and with the coercive option
# any errors convert to np.nan and can then only be excluded if previous
# NaNs are masked by conversion to numeric values
_df.loc[:, cols].fillna(0, inplace=True)
bad_entries = pd.DataFrame(index=_df.index)
for col in cols:
bad_entries[col] = pd.to_numeric(df.loc[:, col], errors='coerce')
msg = "<p>Non-numeric data is present in the scenario exchange columns.</p><p> The Activity-Browser can "\
"only deal with numeric data for the calculations. To resolve this corrections will need to be made "\
"to these values in the scenario file.</p>"
critical = ABPopup.abCritical(
"Bad (non-numeric) input data",
msg,
QPushButton('Save'),
QPushButton('Cancel')
)
critical.dataframe(df[bad_entries.isna().any(axis=1)], SUPERSTRUCTURE)
critical.save_options()
critical.dataframe_to_file(df, bad_entries.isna().any(axis=1))
QApplication.restoreOverrideCursor()
critical.exec_()
raise ScenarioExchangeDataNonNumericError()

@staticmethod
@_time_it_
Expand Down Expand Up @@ -497,7 +520,7 @@ def check_duplicates(data: Optional[Union[pd.DataFrame, list]],
for file, frame in duplicated.items():
frame.insert(0, 'File', file, allow_duplicates=True)
warning = ABPopup.abWarning('Duplicate flow exchanges', msg, QPushButton('Ok'), QPushButton('Cancel'))
warning.dataframe(pd.concat([file for file in duplicated.values()]), index)
warning.dataframe(pd.concat([file for file in duplicated.values()]), ['File'] + SUPERSTRUCTURE)
QApplication.restoreOverrideCursor()
response = warning.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand Down Expand Up @@ -532,8 +555,7 @@ def _check_duplicate(data: pd.DataFrame, index: list = ['to key', 'from key', 'f
"scenarios for the same flow. The AB can deal with this by discarding all but the last row for this " \
"exchange.</p> <p>Press 'Ok' to proceed, press 'Cancel' to abort.</p>"
warning = ABPopup.abWarning('Duplicate flow exchanges', msg, QPushButton('Ok'), QPushButton('Cancel'))
warning.dataframe(df.loc[duplicates], index)

warning.dataframe(df.loc[duplicates], SUPERSTRUCTURE)
QApplication.restoreOverrideCursor()
response = warning.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand All @@ -542,11 +564,3 @@ def _check_duplicate(data: pd.DataFrame, index: list = ['to key', 'from key', 'f
data.drop_duplicates(index, keep='last', inplace=True)
return data

@staticmethod
def edit_superstructure_for_string():
text_list = ""
for field in SUPERSTRUCTURE:
text_list+= f"{field} <br>"
return text_list


24 changes: 23 additions & 1 deletion activity_browser/bwutils/superstructure/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@
"flow type",
])

def edit_superstructure_for_string(superstructure=SUPERSTRUCTURE, sep="<br>", fhighlight=""):
"""
Produces a string format for the essential columns for the scenario difference files with html
style formatting. Allows for different defined structures.
Parameters
----------
superstructure: the list of superstructure column headers (by default set to the SUPERSTRUCTURE index,
this needs to have a defined __str__ operator
sep: a short string that defines the separator for the column headers, by default this is the html line
break <br>
fhighlight: this is provided as a means to highlight the fields, by default this is empty (SHOULD NOT BE
SET TO None), but could be set to "[]", where the first and last elements enclose the field
Returns
-------
A formatted strign with the required file fields
"""
text_list = ""
for field in superstructure:
text_list += f"{fhighlight[0]}{field}{fhighlight[-1]}{sep}" if fhighlight else f"{field}{sep}"
return text_list

def guess_flow_type(row: pd.Series) -> str:
"""Given a series of input- and output keys, make a guess on the flow type.
Expand All @@ -47,4 +69,4 @@ def wrapper(*args):
result = func(*args)
log.info(f"{func} -- " + str(time.time() - now))
return result
return wrapper
return wrapper
16 changes: 15 additions & 1 deletion activity_browser/layouts/tabs/LCA_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ...settings import ab_settings
from ...bwutils.errors import (CriticalScenarioExtensionError, ScenarioExchangeNotFoundError,
ScenarioDatabaseNotFoundError, ImportCanceledError, ScenarioExchangeDataNotFoundError,
UnalignableScenarioColumnsWarning,)
UnalignableScenarioColumnsWarning, ScenarioExchangeDataNonNumericError,)
from ...signals import signals
from ...ui.icons import qicons
from ...ui.style import horizontal_line, header, style_group_box
Expand All @@ -26,6 +26,7 @@
)
from ...ui.widgets import ExcelReadDialog, ScenarioDatabaseDialog
from .base import BaseRightTab
from activity_browser.bwutils.superstructure import ABPopup, edit_superstructure_for_string

import logging
from activity_browser.logger import ABHandler
Expand Down Expand Up @@ -542,6 +543,19 @@ def load_action(self) -> None:
if query == QtWidgets.QMessageBox.No:
include_default = False
signals.parameter_scenario_sync.emit(self.index, df, include_default)
else:
# this is a wrong file type
msg = "The Activity-Browser is attempting to import a scenario file.<p>During the attempted import"\
" another file type was detected. Please check the file type of the attempted import, if it is"\
" a scenario file make sure it contains a valid format.</p>"\
"<p>A flow exchange scenario file requires the following headers:<br>" +\
edit_superstructure_for_string(sep=", ", fhighlight='"') + "</p>"\
"<p>A parameter scenario file requires the following:<br>" + edit_superstructure_for_string(
["name", "group"], sep=", ", fhighlight='"') + "</p>"
critical = ABPopup.abCritical("Wrong file type", msg, QtWidgets.QPushButton("Cancel"))
QtWidgets.QApplication.restoreOverrideCursor()
critical.exec_()
return
except CriticalScenarioExtensionError as e:
# Triggered when combining different scenario files by extension leads to no scenario columns
QtWidgets.QApplication.restoreOverrideCursor()
Expand Down

0 comments on commit 692feaf

Please sign in to comment.