-
Notifications
You must be signed in to change notification settings - Fork 52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG] Fix GUI MPI available cores #871
base: master
Are you sure you want to change the base?
Changes from all commits
9efe7f7
5e717b3
bf002ce
ea55a18
acaca23
a385faa
0954871
b5a07da
6159e1f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,9 @@ | |
import io | ||
import logging | ||
import mimetypes | ||
import multiprocessing | ||
import numpy as np | ||
import platform | ||
import psutil | ||
import sys | ||
import json | ||
import urllib.parse | ||
|
@@ -35,6 +36,7 @@ | |
get_L5Pyr_params_default) | ||
from hnn_core.hnn_io import dict_to_network, write_network_configuration | ||
from hnn_core.cells_default import _exp_g_at_dist | ||
from hnn_core.parallel_backends import _has_mpi4py, _has_psutil | ||
|
||
hnn_core_root = Path(hnn_core.__file__).parent | ||
default_network_configuration = (hnn_core_root / 'param' / | ||
|
@@ -320,6 +322,9 @@ def __init__(self, theme_color="#802989", | |
# load default parameters | ||
self.params = self.load_parameters(network_configuration) | ||
|
||
# Number of available cores | ||
self.n_cores = self._available_cores() | ||
|
||
# In-memory storage of all simulation and visualization related data | ||
self.simulation_data = defaultdict(lambda: dict(net=None, dpls=list())) | ||
|
||
|
@@ -336,15 +341,16 @@ def __init__(self, theme_color="#802989", | |
placeholder='ID of your simulation', | ||
description='Name:', | ||
disabled=False) | ||
self.widget_backend_selection = Dropdown(options=[('Joblib', 'Joblib'), | ||
('MPI', 'MPI')], | ||
value='Joblib', | ||
description='Backend:') | ||
self.widget_backend_selection = ( | ||
Dropdown(options=[('Joblib', 'Joblib'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't bother exposing this to user |
||
('MPI', 'MPI')], | ||
value=self._check_backend(), | ||
description='Backend:')) | ||
self.widget_mpi_cmd = Text(value='mpiexec', | ||
placeholder='Fill if applies', | ||
description='MPI cmd:', disabled=False) | ||
self.widget_n_jobs = BoundedIntText(value=1, min=1, | ||
max=multiprocessing.cpu_count(), | ||
max=self.n_cores, | ||
description='Cores:', | ||
disabled=False) | ||
self.load_data_button = FileUpload( | ||
|
@@ -427,6 +433,30 @@ def __init__(self, theme_color="#802989", | |
self._init_ui_components() | ||
self.add_logging_window_logger() | ||
|
||
@staticmethod | ||
def _available_cores(): | ||
"""Return the number of available cores to the process. | ||
|
||
This is important for systems where the number of available cores is | ||
partitioned such as on HPC systems. Linux and Windows can return cpu | ||
affinity, which is the number of available cores. MacOS can only return | ||
total system cores. | ||
""" | ||
# For macos | ||
if platform.system() == 'Darwin': | ||
return psutil.cpu_count() | ||
# For Linux and Windows | ||
else: | ||
return len(psutil.Process().cpu_affinity()) | ||
|
||
@staticmethod | ||
def _check_backend(): | ||
"""Checks for MPI and returns the default backend name""" | ||
default_backend = 'Joblib' | ||
if _has_mpi4py() and _has_psutil(): | ||
default_backend = 'MPI' | ||
return default_backend | ||
|
||
def get_cell_parameters_dict(self): | ||
"""Returns the number of elements in the | ||
cell_parameters_dict dictionary. | ||
|
@@ -576,8 +606,9 @@ def _run_button_clicked(b): | |
self.widget_simulation_name, self._log_out, self.drive_widgets, | ||
self.data, self.widget_dt, self.widget_tstop, | ||
self.widget_ntrials, self.widget_backend_selection, | ||
self.widget_mpi_cmd, self.widget_n_jobs, self.params, | ||
self._simulation_status_bar, self._simulation_status_contents, | ||
self.widget_mpi_cmd, self.widget_n_jobs, | ||
self.params, self._simulation_status_bar, | ||
self._simulation_status_contents, | ||
self.connectivity_widgets, self.viz_manager, | ||
self.simulation_list_widget, self.cell_pameters_widgets) | ||
|
||
|
@@ -663,6 +694,10 @@ def compose(self, return_layout=True): | |
self.widget_ntrials, self.widget_backend_selection, | ||
self._backend_config_out]), | ||
], layout=self.layout['config_box']) | ||
# Displays the default backend options | ||
handle_backend_change(self.widget_backend_selection.value, | ||
self._backend_config_out, self.widget_mpi_cmd, | ||
self.widget_n_jobs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was actually suggesting something simpler ... hnn-core already defaults to JoblibBackend if MPI is not available, so all you had to do was expose MPIBackend and get rid of the JoblibBackend. Anyhow @dylansdaniels do you want this merged for the GUI tutorial today? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That handler in the gui changes the displayed options based on the backend drop-down. It doesn't switch between backends. In hnn-core MPIBackend only switches to joblib if it's a single process, I don't think it checks for MPI and reverts to joblib if it can't find it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevermind, you're right. It does revert to joblib. Though it runs on joblib with only 1 processor, is there a reason why it only uses 1? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's because specifying multiple processes with the joblib backend, unlike the MPI backend, deosn't make sense for all types of simulations. Since joblib parallelizes over multiple trials, it only works for simulations that include multiple trials. Plus parallel joblib processes are memory intensive, so silently switching from MPI to joblib can produce errors if the user wasn't anticipating needing the extra resources needed for a joblib task with many trials. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @rythorpe . I would say the GUI should just use MPIBackend, and if not available, just run everything in a single core. It's probably a misnomer to call it "JoblibBackend" if it's running on a single core. Hence, I wouldn't bother exposing it to user. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we know if anyone who is actually using Your work @ntolley is unique in that your questions require lots of parameter sweeps, which are best served by using the API directly. Note that limited RAM is only an issue for Joblib, which I'm arguing should essentially default to running a single job within the context of the GUI. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty sure a lot of Darcy's initial work compared over multiple trials, although I'll concede she's using the API now. If you're doing 1-1 comparisons of ERP's for instance it's definitely more robust to compute an average over multiple trials. In tutorials we use 1 trial because it's efficient, but that isn't necessarily how you should fit these signals to data. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again I totally agree with making MPI with oversubscription the default, but I genuinely think trial-based parallelism is a useful feature even in the GUI setting! These are my cards on the table 😄, but if I get outvoted I'll concede There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough. If GUI users use it, we should support it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally feel that once a feature is added, users find ways to justify keeping it. That's why one should start with minimal features first and then add based on necessity and feedback from user base. Anyway, call is yours @ntolley ! |
||
|
||
connectivity_configuration = Tab() | ||
|
||
|
@@ -1915,7 +1950,8 @@ def run_button_clicked(widget_simulation_name, log_out, drive_widgets, | |
print("start simulation") | ||
if backend_selection.value == "MPI": | ||
backend = MPIBackend( | ||
n_procs=multiprocessing.cpu_count() - 1, mpi_cmd=mpi_cmd.value) | ||
n_procs=n_jobs.value, | ||
mpi_cmd=mpi_cmd.value) | ||
else: | ||
backend = JoblibBackend(n_jobs=n_jobs.value) | ||
print(f"Using Joblib with {n_jobs.value} core(s).") | ||
|
@@ -2209,7 +2245,7 @@ def handle_backend_change(backend_type, backend_config, mpi_cmd, n_jobs): | |
backend_config.clear_output() | ||
with backend_config: | ||
if backend_type == "MPI": | ||
display(mpi_cmd) | ||
display(VBox(children=[n_jobs, mpi_cmd])) | ||
elif backend_type == "Joblib": | ||
display(n_jobs) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Usually, I've been told that one shouldn't use all the cores available when doing multiprocessing ... the user should be told how many cores are available and how many of those they want to use.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes agreed. I documented the GUI's behavior in issue #870
Using the joblib backend the number of cores is exposed and adjustable. With the MPI backend it is not. MPI uses the total number of cores-1. I was curious if there was a reason it was designed this way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought you could adjust the number of cores in MPIBackend too. Is that not the case? I think n_cores - 1 may not be the best default ... but I'm not an MPI expert. I know it's a bad idea with joblibs
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would just set it to 1 rather than attempt to guess the total number of cores. During the tutorial, the user can be instructed to use more cores. The less magic there is in the code, the less likely it will fail.