Skip to content

Commit

Permalink
Overhaul the flow for importing excel files (#447)
Browse files Browse the repository at this point in the history
* New strategy for the ABExcelImporter

* Customize the automated import method further

* Feed the database name into the importer and signal that params have (possibly) changed

* Add classmethod for asking for any kind of db

* Add excel fixing method for unlinked exchanges

* Add wiring and alter method for linking

* Cleanup of unused code
  • Loading branch information
dgdekoning authored Sep 25, 2020
1 parent 1d5aa87 commit 2412f9f
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 84 deletions.
80 changes: 40 additions & 40 deletions activity_browser/app/bwutils/importers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# -*- coding: utf-8 -*-
import functools
from time import time
import warnings

import brightway2 as bw
from bw2io import ExcelImporter
from bw2io.errors import InvalidPackage, StrategyError
from bw2io.importers.excel import valid_first_cell
from bw2io.strategies import (
csv_restore_tuples, csv_restore_booleans, csv_numerize,
csv_drop_unknown, csv_add_missing_exchanges_section,
Expand All @@ -20,7 +18,7 @@
convert_activity_parameters_to_list
)

from .strategies import relink_exchanges_bw2package
from .strategies import relink_exchanges_bw2package, alter_database_name


INNER_FIELDS = ("name", "unit", "database", "location")
Expand All @@ -30,8 +28,26 @@
class ABExcelImporter(ExcelImporter):
"""Customized Excel importer for the AB."""

def __init__(self, filepath):
self.strategies = [
def write_database(self, **kwargs):
"""Go to the parent of the ExcelImporter class, not the ExcelImporter itself.
This is important because we want to return a Database instance
"""
kwargs['activate_parameters'] = kwargs.get('activate_parameters', True)
return super(ExcelImporter, self).write_database(**kwargs)

@classmethod
def simple_automated_import(cls, filepath, db_name: str, relink: dict = None) -> list:
"""Handle a lot of the customizable things that can happen
when doing an import in a script or notebook.
"""
obj = cls(filepath)
obj.strategies = [
functools.partial(
alter_database_name,
old=obj.db_name,
new=db_name
),
csv_restore_tuples,
csv_restore_booleans,
csv_numerize,
Expand All @@ -56,47 +72,31 @@ def __init__(self, filepath):
convert_uncertainty_types_to_integers,
convert_activity_parameters_to_list,
]
start = time()
data = self.extractor.extract(filepath)
data = [(x, y) for x, y in data if valid_first_cell(x, y)]
print("Extracted {} worksheets in {:.2f} seconds".format(
len(data), time() - start))
if data and any(line for line in data):
self.db_name, self.metadata = self.get_database(data)
self.project_parameters = self.get_project_parameters(data)
self.database_parameters = self.get_database_parameters(data)
self.data = self.process_activities(data)
else:
warnings.warn("No data in workbook found")
obj.db_name = db_name

def write_database(self, **kwargs):
"""Go to the parent of the ExcelImporter class, not the ExcelImporter itself.
This is important because we want to return a Database instance
"""
kwargs['activate_parameters'] = kwargs.get('activate_parameters', True)
return super(ExcelImporter, self).write_database(**kwargs)
# Test if the import contains any parameters.
has_params = any([
obj.project_parameters, obj.database_parameters,
any("parameters" in ds for ds in obj.data)
])

@classmethod
def simple_automated_import(cls, filepath, overwrite: bool = True, purge: bool = False,
linker: str = None, **kwargs) -> list:
"""Handle a lot of the customizable things that can happen
when doing an import in a script or notebook.
"""
obj = cls(filepath)
if obj.project_parameters:
obj.write_project_parameters(delete_existing=purge)
obj.write_project_parameters(delete_existing=False)
obj.apply_strategies()
if any(obj.unlinked) and linker:
# First try and match on the database field as well.
obj.link_to_technosphere(linker, fields=INNER_FIELDS)
# If there are still unlinked, use a rougher link.
if any(obj.unlinked):
obj.link_to_technosphere(linker)
if any(obj.unlinked) and relink:
for db in relink:
# First try and match on the database field as well.
obj.link_to_technosphere(db, fields=INNER_FIELDS)
# If there are still unlinked, use a rougher link.
if any(obj.unlinked):
obj.link_to_technosphere(db)
if any(obj.unlinked):
# Still have unlinked fields? Raise exception.
raise StrategyError([exc for exc in obj.unlinked])
db = obj.write_database(delete_existing=overwrite, activate_parameters=True)
excs = [exc for exc in obj.unlinked][:10]
raise StrategyError(excs)
db = obj.write_database(delete_existing=True, activate_parameters=True)
if has_params:
bw.parameters.recalculate()
return [db]

def link_to_technosphere(self, db_name: str, fields: tuple = None) -> None:
Expand Down
20 changes: 20 additions & 0 deletions activity_browser/app/bwutils/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,23 @@ def relink_exchanges_existing_db(db: bw.Database, other: bw.Database) -> None:
# this updates the 'depends' in metadata
db.process()
print("Finished relinking database, {} exchanges altered.".format(altered))


def alter_database_name(data: list, old: str, new: str) -> list:
"""For ABExcelImporter, go through data and replace all instances
of the `old` database name with `new`.
"""
if old == new:
return data # Avoid doing any work if the two are equal.
for ds in data:
# Alter db on activities.
ds["database"] = new
for exc in ds.get('exchanges', []):
# Note: this will only alter database if the field exists in the exchange.
if exc.get("database") == old:
exc["database"] = new
for p, d in ds.get("parameters", {}).items():
# Any parameters found here are activity parameters and we can
# overwrite the database without issue.
d["database"] = new
return data
14 changes: 14 additions & 0 deletions activity_browser/app/ui/widgets/dialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ class DatabaseRelinkDialog(QtWidgets.QDialog):
"Relink exchanges from database '{}' with another database?"
"\n\nLink with:"
)
LINK_UNKNOWN = (
"Link exchanges from database '{}' with another database?"
"\n\nLink with:"
)

def __init__(self, parent=None):
super().__init__(parent)
Expand Down Expand Up @@ -287,3 +291,13 @@ def relink_existing(cls, parent: QtWidgets.QWidget, db: str, options: List[str])
obj.choice.addItems(options)
obj.choice.setEnabled(True)
return obj

@classmethod
def link_new(cls, parent, db: str, options: List[str]) -> 'DatabaseRelinkDialog':
obj = cls(parent)
obj.setWindowTitle("Database Linking")
obj.label.setText(cls.LINK_UNKNOWN.format(db))
obj.choice.clear()
obj.choice.addItems(options)
obj.choice.setEnabled(True)
return obj
81 changes: 37 additions & 44 deletions activity_browser/app/ui/wizards/db_import_wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from PySide2 import QtWidgets, QtCore
from PySide2.QtCore import Signal, Slot

from ...bwutils.commontasks import is_technosphere_db
from ...bwutils.importers import ABExcelImporter, ABPackage
from ...signals import signals
from ..style import style_group_box
Expand Down Expand Up @@ -355,7 +354,7 @@ def initializePage(self):

def validatePage(self):
db_name = self.name_edit.text()
if db_name in bw.databases and not self.field("overwrite_db"):
if db_name in bw.databases:
warning = 'Database <b>{}</b> already exists in project <b>{}</b>!'.format(
db_name, bw.projects.current)
QtWidgets.QMessageBox.warning(self, 'Database exists!', warning)
Expand Down Expand Up @@ -484,6 +483,7 @@ def __init__(self, parent=None):
import_signals.download_complete.connect(self.update_download)
import_signals.unarchive_finished.connect(self.update_unarchive)
import_signals.missing_dbs.connect(self.fix_db_import)
import_signals.links_required.connect(self.fix_excel_import)

# Threads
self.main_worker_thread = MainWorkerThread(self.wizard.downloader, self)
Expand Down Expand Up @@ -534,9 +534,6 @@ def initializePage(self):
"archive_path": self.field("archive_path"),
"use_local": True,
"relink": self.relink_data,
"overwrite": self.field("overwrite_db"),
"purge": self.field("purge_params"),
"linker": self.field("link_db") if self.field("do_link") else None,
}
self.main_worker_thread.update(**kwargs)
else:
Expand Down Expand Up @@ -592,6 +589,8 @@ def update_download(self) -> None:
def fix_db_import(self, missing: set) -> None:
"""Halt and delete the importing thread, ask the user for input
and restart the worker thread with the new information.
Customized for ABPackage problems
"""
self.main_worker_thread.exit(1)

Expand All @@ -613,6 +612,33 @@ def fix_db_import(self, missing: set) -> None:
# Restart the page
self.initializePage()

@Slot(object, name="fixExcelImport")
def fix_excel_import(self, exchanges: list) -> None:
"""Halt and delete the importing thread, ask the user for input
and restart the worker thread with the new information.
Customized for ABExcelImporter problems
"""
self.main_worker_thread.exit(1)

# Iterate through the missing databases, asking user input.
linker = DatabaseRelinkDialog.link_new(
self, self.field("db_name"), bw.databases.list
)
if linker.exec_() == DatabaseRelinkDialog.Accepted:
self.relink_data[linker.new_db] = linker.new_db
else:
msg = QtWidgets.QMessageBox(
QtWidgets.QMessageBox.Warning, "Unlinked exchanges",
"Excel data contains exchanges that could not be linked.",
QtWidgets.QMessageBox.Ok, self
)
msg.setDetailedText("\n\n".join(str(e) for e in exchanges))
msg.exec_()
return
# Restart the page
self.initializePage()


class MainWorkerThread(QtCore.QThread):
def __init__(self, downloader, parent=None):
Expand All @@ -625,17 +651,15 @@ def __init__(self, downloader, parent=None):
self.use_forwast = None
self.use_local = None
self.relink = {}
self.kwargs = {}

def update(self, db_name: str, archive_path=None, datasets_path=None,
use_forwast=False, use_local=False, relink=None, **kwargs) -> None:
use_forwast=False, use_local=False, relink=None) -> None:
self.db_name = db_name
self.archive_path = archive_path
self.datasets_path = datasets_path
self.use_forwast = use_forwast
self.use_local = use_local
self.relink = relink or {}
self.kwargs = kwargs

def run(self):
if self.use_forwast:
Expand Down Expand Up @@ -732,11 +756,10 @@ def run_local_import(self):
try:
import_signals.db_progress.emit(0, 0)
if os.path.splitext(self.archive_path)[1] in {".xlsx", ".xls"}:
if self.db_name in bw.databases and self.kwargs["overwrite"]:
del bw.databases[self.db_name]
result = ABExcelImporter.simple_automated_import(
self.archive_path, **self.kwargs
self.archive_path, self.db_name, self.relink
)
signals.parameters_changed.emit()
else:
result = ABPackage.import_file(self.archive_path, relink=self.relink)
if not import_signals.cancel_sentinel:
Expand Down Expand Up @@ -765,13 +788,10 @@ def run_local_import(self):
)
except StrategyError as e:
from pprint import pprint
del e.args[0][10:]
print("Could not link exchanges:")
print("Could not link exchanges, here are 10 examples.:")
pprint(e.args[0])
self.delete_canceled_db()
import_signals.import_failure.emit(
("Could not link exchanges", "One or more exchanges could not be linked.")
)
import_signals.links_required.emit(e.args[0])

def delete_canceled_db(self):
if self.db_name in bw.databases:
Expand Down Expand Up @@ -988,19 +1008,6 @@ def __init__(self, parent=None):
self.path.textChanged.connect(self.changed)
self.path_btn = QtWidgets.QPushButton("Browse")
self.path_btn.clicked.connect(self.browse)
self.overwrite_db = QtWidgets.QCheckBox("Overwrite database.")
self.overwrite_db.setToolTip("Will overwrite existing databases with the same name.")
self.overwrite_db.setChecked(True)
self.purge_params = QtWidgets.QCheckBox("Remove existing parameters from project.")
self.purge_params.setToolTip("Will only remove parameters of the type found in the file.")
self.purge_params.setChecked(False)
self.link_option = QtWidgets.QCheckBox("Link against existing technosphere.")
self.link_option.setToolTip("Attempts to find unlinked exchanges in the selected database.")
self.link_option.setChecked(False)
self.link_choice = QtWidgets.QComboBox()
self.link_choice.addItems([db for db in bw.databases if is_technosphere_db(db)])
self.link_choice.setHidden(True)
self.link_option.toggled.connect(self.toggle_dropdown)
self.complete = False

option_box = QtWidgets.QGroupBox("Import excel database file:")
Expand All @@ -1009,27 +1016,16 @@ def __init__(self, parent=None):
grid_layout.addWidget(QtWidgets.QLabel("Path to file*"), 0, 0, 1, 1)
grid_layout.addWidget(self.path, 0, 1, 1, 2)
grid_layout.addWidget(self.path_btn, 0, 3, 1, 1)
grid_layout.addWidget(self.overwrite_db, 1, 0, 1, 3)
grid_layout.addWidget(self.purge_params, 2, 0, 1, 3)
grid_layout.addWidget(self.link_option, 3, 0, 1, 2)
grid_layout.addWidget(self.link_choice, 3, 2, 1, 2)
option_box.setLayout(grid_layout)
option_box.setStyleSheet(style_group_box.border_title)
layout.addWidget(option_box)
self.setLayout(layout)

# Register field to ensure user cannot advance without selecting file.
self.registerField("excel_path*", self.path)
self.registerField("overwrite_db", self.overwrite_db)
self.registerField("purge_params", self.purge_params)
self.registerField("do_link", self.link_option)
self.registerField("link_db", self.link_choice, "currentText")

def initializePage(self):
self.path.clear()
self.overwrite_db.setChecked(True)
self.purge_params.setChecked(False)
self.link_option.setChecked(False)

def nextId(self):
self.wizard.setField("archive_path", self.path.text())
Expand Down Expand Up @@ -1058,10 +1054,6 @@ def changed(self) -> None:
self.complete = all([exists, valid])
self.completeChanged.emit()

@Slot(bool, name="toggleDropdown")
def toggle_dropdown(self, toggle: bool) -> None:
self.link_choice.setHidden(not toggle)

def isComplete(self):
return self.complete

Expand Down Expand Up @@ -1138,6 +1130,7 @@ class ImportSignals(QtCore.QObject):
connection_problem = Signal(tuple)
# Allow transmission of missing databases
missing_dbs = Signal(object)
links_required = Signal(object)


import_signals = ImportSignals()
Expand Down

0 comments on commit 2412f9f

Please sign in to comment.