Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional scenario checks #1028

Merged
merged 1 commit into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion activity_browser/bwutils/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,16 @@ class ScenarioExchangeDataNotFoundError(ABError):
pass


class ScenarioExchangeDataNonNumericError(ABError):
"""Should be raised if non-numeric data is provided for the exchanges in a scenario difference file."""
pass


class UnalignableScenarioColumnsWarning(ABWarning):
"""Should be raised if there is a mismatch between the scenario columns from multiple scenario difference files"""
"""Should be raised if there is a mismatch between the scenario columns from multiple scenario difference files"""


class WrongFileTypeImportError(ABError):
"""Should be raised when a user tries to import the wrong type of file for the import in question.
For example a database file with the scenario import dialog, or vice versa."""
pass
5 changes: 4 additions & 1 deletion activity_browser/bwutils/superstructure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from .file_imports import (
ABFeatherImporter, ABCSVImporter, ABFileImporter
)
from .file_dialogs import (
ABPopup
)
from .excel import import_from_excel, get_sheet_names
from .manager import SuperstructureManager
from .mlca import SuperstructureMLCA, SuperstructureContributions
from .utils import SUPERSTRUCTURE, _time_it_
from .utils import SUPERSTRUCTURE, _time_it_, edit_superstructure_for_string
6 changes: 2 additions & 4 deletions activity_browser/bwutils/superstructure/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,7 @@ def exchange_replace_database(ds: pd.Series, replacements: dict, critical: list,
f"<br> To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activities not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe(df.loc[critical['index'], :], SUPERSTRUCTURE)
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
else:
Expand All @@ -198,8 +197,7 @@ def exchange_replace_database(ds: pd.Series, replacements: dict, critical: list,
f" failed relinking.<br>To abort the process press \'Cancel\'"
critical_message = ABPopup.abCritical("Activity not found", msg, QPushButton('Save'), QPushButton('Cancel'), default=2)
critical_message.save_options()
critical_message.dataframe(pd.DataFrame(critical),
['from database', 'from activity name', 'to database', 'to activity name'])
critical_message.dataframe(df.loc[critical['index'], :], SUPERSTRUCTURE)
critical_message.dataframe_to_file(df_, critical['index'])
response = critical_message.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand Down
34 changes: 19 additions & 15 deletions activity_browser/bwutils/superstructure/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,22 @@ def import_from_excel(document_path: Union[str, Path], import_sheet: int = 1) ->
'usecols' is used to exclude specific columns from the excel document.
'comment' is used to exclude specific rows from the excel document.
"""
header_idx = get_header_index(document_path, import_sheet)
data = pd.read_excel(
document_path, sheet_name=import_sheet, header=header_idx,
usecols=valid_cols, comment="*", na_values="", keep_default_na=False,
engine="openpyxl"
)
diff = SUPERSTRUCTURE.difference(data.columns)
if not diff.empty:
raise ValueError("Missing required column(s) for superstructure: {}".format(diff.to_list()))

# Convert specific columns that may have tuples as strings
columns = ["from categories", "from key", "to categories", "to key"]
data.loc[:, columns] = data[columns].applymap(convert_tuple_str)

return data
data = pd.DataFrame({})
try:
header_idx = get_header_index(document_path, import_sheet)
data = pd.read_excel(
document_path, sheet_name=import_sheet, header=header_idx,
usecols=valid_cols, comment="*", na_values="", keep_default_na=False,
engine="openpyxl"
)
diff = SUPERSTRUCTURE.difference(data.columns)
if not diff.empty:
raise ValueError("Missing required column(s) for superstructure: {}".format(diff.to_list()))

# Convert specific columns that may have tuples as strings
columns = ["from categories", "from key", "to categories", "to key"]
data.loc[:, columns] = data[columns].applymap(convert_tuple_str)
except:
# skip the error checks here, these now occur in the calling layout.tabs.LCA_setup module
pass
return data
12 changes: 9 additions & 3 deletions activity_browser/bwutils/superstructure/file_dialogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ProblemDataModel(QtCore.QAbstractTableModel):
A simple table model for use in the ABPopup dialogs for error reporting.

Intentionally coupled with the ABPopup class and not intended for use externally.

"""
updated = QtCore.Signal()
def __init__(self):
Expand All @@ -46,9 +47,13 @@ def sync(self, *args, **kwargs) -> None:
assert('dataframe' in kwargs and 'columns in kwargs')
self.columns = kwargs['columns']
data = kwargs['dataframe']
self._dataframe = pd.DataFrame(data,columns=self.columns)
self._dataframe = pd.DataFrame(data, columns=self.columns)
self.updated.emit()

def headerData(self, section, orientation, role = QtCore.Qt.DisplayRole):
if orientation == QtCore.Qt.Horizontal and role == QtCore.Qt.DisplayRole:
return self.columns[section]


class ProblemDataFrame(QtWidgets.QTableView):
"""
Expand Down Expand Up @@ -116,13 +121,14 @@ def dataframe(self, data: pd.DataFrame, columns: list = None):

Arguments
---------
data: the dataframe that generates the error
data: a dataframe with the exchanges/rows that generate the error
columns: a list of columns to provide the dataframe with for the popup message
"""
dataframe = data
cols = pd.Index(columns)
dataframe = dataframe.loc[:, columns]
dataframe.index = dataframe.index.astype(str)
if not isinstance(dataframe.index, pd.MultiIndex):
dataframe.index = dataframe.index.astype(str)
self.data_frame.update(dataframe, cols)
self.data_frame.setHidden(False)
self.updateGeometry()
Expand Down
50 changes: 32 additions & 18 deletions activity_browser/bwutils/superstructure/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype, is_number
from PySide2.QtWidgets import QApplication, QPushButton
from PySide2.QtCore import Qt
from typing import Union, Optional
Expand All @@ -11,7 +12,7 @@

from .activities import fill_df_keys_with_fields, get_activities_from_keys
from .dataframe import scenario_columns
from .utils import guess_flow_type, SUPERSTRUCTURE, _time_it_
from .utils import guess_flow_type, SUPERSTRUCTURE, _time_it_, edit_superstructure_for_string

import logging
from activity_browser.logger import ABHandler
Expand All @@ -22,7 +23,7 @@
from .file_dialogs import ABPopup
from ..errors import (CriticalScenarioExtensionError, ScenarioExchangeNotFoundError,
ImportCanceledError, ScenarioExchangeDataNotFoundError,
UnalignableScenarioColumnsWarning,
UnalignableScenarioColumnsWarning, ScenarioExchangeDataNonNumericError
)


Expand Down Expand Up @@ -376,7 +377,6 @@ def fill_empty_process_keys_in_exchanges(df: pd.DataFrame) -> pd.DataFrame:
_df = df.loc[df.loc[:, EXCHANGE_KEYS].isna().any(axis=1)]
if not _df.empty:
sdf_keys = SuperstructureManager.exchangesPopup()
sdf_keys.save_options()
sdf_keys.dataframe_to_file(df, _df.index)
QApplication.restoreOverrideCursor()
sdf_keys.exec_()
Expand Down Expand Up @@ -407,10 +407,8 @@ def verify_scenario_process_keys(df: pd.DataFrame) -> pd.DataFrame:
_ = get_activities_from_keys(df, db)
df_ = pd.concat([df_, _], axis=0, ignore_index=False)
if not df_.empty:
errors_df = pd.DataFrame(df_, index=None, columns=['from key', 'to key', 'flow type'])
sdf_keys = SuperstructureManager.exchangesPopup()
sdf_keys.save_options()
sdf_keys.dataframe(errors_df, errors_df.columns)
sdf_keys.dataframe(df_, SUPERSTRUCTURE)
sdf_keys.dataframe_to_file(df, df_.index)
QApplication.restoreOverrideCursor()
sdf_keys.exec_()
Expand All @@ -426,6 +424,7 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
Raises
------
A ScenarioExchangeDataNotFoundError if no valid values are found in the scenario 'amounts'
A ScenarioExchangeDataNonNumericError if non-numeric values are found for the scenario 'amounts'
A logged warning before replacement of invalid scenario values

Parameters
Expand All @@ -435,8 +434,9 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
cols: a pandas index that indicates the scenario columns holding the 'amounts' to be used in the scenario
calculations
"""
_df = df.copy()
assert len(cols) > 0
nas = df.loc[:, cols].isna()
nas = _df.loc[:, cols].isna()
if nas.all(axis=0).all():
msg = "<p>No exchange values could be observed in the last loaded scenario file. " + \
"Exchange values must be recorded in a labelled scenario column with a name distinguishable from the" + \
Expand All @@ -450,6 +450,29 @@ def check_scenario_exchange_values(df: pd.DataFrame, cols: pd.Index):
raise ScenarioExchangeDataNotFoundError
elif nas.any(axis=0).any():
log.warning("Replacing empty values from the last loaded scenario difference file")
if not is_numeric_dtype(np.array(_df.loc[:, cols])):
# converting to numeric only works on lists and with the coercive option
# any errors convert to np.nan and can then only be excluded if previous
# NaNs are masked by conversion to numeric values
_df.loc[:, cols].fillna(0, inplace=True)
bad_entries = pd.DataFrame(index=_df.index)
for col in cols:
bad_entries[col] = pd.to_numeric(df.loc[:, col], errors='coerce')
msg = "<p>Non-numeric data is present in the scenario exchange columns.</p><p> The Activity-Browser can "\
"only deal with numeric data for the calculations. To resolve this corrections will need to be made "\
"to these values in the scenario file.</p>"
critical = ABPopup.abCritical(
"Bad (non-numeric) input data",
msg,
QPushButton('Save'),
QPushButton('Cancel')
)
critical.dataframe(df[bad_entries.isna().any(axis=1)], SUPERSTRUCTURE)
critical.save_options()
critical.dataframe_to_file(df, bad_entries.isna().any(axis=1))
QApplication.restoreOverrideCursor()
critical.exec_()
raise ScenarioExchangeDataNonNumericError()

@staticmethod
@_time_it_
Expand Down Expand Up @@ -497,7 +520,7 @@ def check_duplicates(data: Optional[Union[pd.DataFrame, list]],
for file, frame in duplicated.items():
frame.insert(0, 'File', file, allow_duplicates=True)
warning = ABPopup.abWarning('Duplicate flow exchanges', msg, QPushButton('Ok'), QPushButton('Cancel'))
warning.dataframe(pd.concat([file for file in duplicated.values()]), index)
warning.dataframe(pd.concat([file for file in duplicated.values()]), ['File'] + SUPERSTRUCTURE)
QApplication.restoreOverrideCursor()
response = warning.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand Down Expand Up @@ -532,8 +555,7 @@ def _check_duplicate(data: pd.DataFrame, index: list = ['to key', 'from key', 'f
"scenarios for the same flow. The AB can deal with this by discarding all but the last row for this " \
"exchange.</p> <p>Press 'Ok' to proceed, press 'Cancel' to abort.</p>"
warning = ABPopup.abWarning('Duplicate flow exchanges', msg, QPushButton('Ok'), QPushButton('Cancel'))
warning.dataframe(df.loc[duplicates], index)

warning.dataframe(df.loc[duplicates], SUPERSTRUCTURE)
QApplication.restoreOverrideCursor()
response = warning.exec_()
QApplication.setOverrideCursor(Qt.WaitCursor)
Expand All @@ -542,11 +564,3 @@ def _check_duplicate(data: pd.DataFrame, index: list = ['to key', 'from key', 'f
data.drop_duplicates(index, keep='last', inplace=True)
return data

@staticmethod
def edit_superstructure_for_string():
text_list = ""
for field in SUPERSTRUCTURE:
text_list+= f"{field} <br>"
return text_list


24 changes: 23 additions & 1 deletion activity_browser/bwutils/superstructure/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@
"flow type",
])

def edit_superstructure_for_string(superstructure=SUPERSTRUCTURE, sep="<br>", fhighlight=""):
"""
Produces a string format for the essential columns for the scenario difference files with html
style formatting. Allows for different defined structures.

Parameters
----------
superstructure: the list of superstructure column headers (by default set to the SUPERSTRUCTURE index,
this needs to have a defined __str__ operator
sep: a short string that defines the separator for the column headers, by default this is the html line
break <br>
fhighlight: this is provided as a means to highlight the fields, by default this is empty (SHOULD NOT BE
SET TO None), but could be set to "[]", where the first and last elements enclose the field

Returns
-------
A formatted strign with the required file fields
"""
text_list = ""
for field in superstructure:
text_list += f"{fhighlight[0]}{field}{fhighlight[-1]}{sep}" if fhighlight else f"{field}{sep}"
return text_list

def guess_flow_type(row: pd.Series) -> str:
"""Given a series of input- and output keys, make a guess on the flow type.
Expand All @@ -47,4 +69,4 @@ def wrapper(*args):
result = func(*args)
log.info(f"{func} -- " + str(time.time() - now))
return result
return wrapper
return wrapper
16 changes: 15 additions & 1 deletion activity_browser/layouts/tabs/LCA_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ...settings import ab_settings
from ...bwutils.errors import (CriticalScenarioExtensionError, ScenarioExchangeNotFoundError,
ScenarioDatabaseNotFoundError, ImportCanceledError, ScenarioExchangeDataNotFoundError,
UnalignableScenarioColumnsWarning,)
UnalignableScenarioColumnsWarning, ScenarioExchangeDataNonNumericError,)
from ...signals import signals
from ...ui.icons import qicons
from ...ui.style import horizontal_line, header, style_group_box
Expand All @@ -26,6 +26,7 @@
)
from ...ui.widgets import ExcelReadDialog, ScenarioDatabaseDialog
from .base import BaseRightTab
from activity_browser.bwutils.superstructure import ABPopup, edit_superstructure_for_string

import logging
from activity_browser.logger import ABHandler
Expand Down Expand Up @@ -542,6 +543,19 @@ def load_action(self) -> None:
if query == QtWidgets.QMessageBox.No:
include_default = False
signals.parameter_scenario_sync.emit(self.index, df, include_default)
else:
# this is a wrong file type
msg = "The Activity-Browser is attempting to import a scenario file.<p>During the attempted import"\
" another file type was detected. Please check the file type of the attempted import, if it is"\
" a scenario file make sure it contains a valid format.</p>"\
"<p>A flow exchange scenario file requires the following headers:<br>" +\
edit_superstructure_for_string(sep=", ", fhighlight='"') + "</p>"\
"<p>A parameter scenario file requires the following:<br>" + edit_superstructure_for_string(
["name", "group"], sep=", ", fhighlight='"') + "</p>"
critical = ABPopup.abCritical("Wrong file type", msg, QtWidgets.QPushButton("Cancel"))
QtWidgets.QApplication.restoreOverrideCursor()
critical.exec_()
return
except CriticalScenarioExtensionError as e:
# Triggered when combining different scenario files by extension leads to no scenario columns
QtWidgets.QApplication.restoreOverrideCursor()
Expand Down
Loading