forked from lava-nc/lava
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor methods without arguments into functions (lava-nc#685)
* Refactoring methods without arguments into functions. Signed-off-by: Mathis Richter <[email protected]> * Reverting unintentional change to tutorial. Signed-off-by: Mathis Richter <[email protected]> * Added new set of functions by @tim-shea, refactored. Signed-off-by: Mathis Richter <[email protected]> * Fixed linter error: unused import. Signed-off-by: Mathis Richter <[email protected]> * Fixed linter and security errors. Signed-off-by: Mathis Richter <[email protected]> * Fixed linter errors. Signed-off-by: Mathis Richter <[email protected]> * Trying to make nosec work. Signed-off-by: Mathis Richter <[email protected]> * Moved nosec back; does not seem to make a difference. Signed-off-by: Mathis Richter <[email protected]> * Unit tests for some of the new functions in the slurm module. Signed-off-by: Mathis Richter <[email protected]> * Full unit tests for SLURM module. Signed-off-by: Mathis Richter <[email protected]> * Full unit tests for lava_loihi module. Signed-off-by: Mathis Richter <[email protected]> * Deactivated linter error. Signed-off-by: Mathis Richter <[email protected]> * Trying different nosec variants. Signed-off-by: Mathis Richter <[email protected]> * Trying different nosec variants. Signed-off-by: Mathis Richter <[email protected]> * Trying different nosec variants. Signed-off-by: Mathis Richter <[email protected]> * Redesigned the API for lava.utils.{slurm,loihi}. Signed-off-by: Mathis Richter <[email protected]> * Another attempt at fixing linting. Signed-off-by: Mathis Richter <[email protected]> * Trying with noqa Signed-off-by: Mathis Richter <[email protected]> * Fixed module error in test patch Signed-off-by: Mathis Richter <[email protected]> * Renamed is_lava_loihi_installed to is_installed. Signed-off-by: Mathis Richter <[email protected]> * Fixed code duplication of patch decorators. Signed-off-by: Mathis Richter <[email protected]> * Added doc string to patch decorator. Signed-off-by: Mathis Richter <[email protected]> * Finished loihi and slurm docstrings. --------- Signed-off-by: Mathis Richter <[email protected]> Co-authored-by: Timothy Shea <[email protected]> Co-authored-by: Tim Shea <[email protected]>
- Loading branch information
1 parent
bcbd465
commit baed791
Showing
8 changed files
with
1,048 additions
and
417 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# Copyright (C) 2022 Intel Corporation | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# See: https://spdx.org/licenses/ | ||
|
||
import os | ||
import typing as ty | ||
import importlib.util | ||
import enum | ||
|
||
from lava.utils import slurm | ||
|
||
|
||
host: ty.Optional[str] = None | ||
|
||
|
||
class ChipGeneration(enum.Enum): | ||
""" ChipGeneration enumerates the valid Loihi chip generations. """ | ||
N2A2 = "N2A2" | ||
N3B2 = "N3B2" | ||
N3B3 = "N3B3" # Most Loihi 2 systems available to the INRC are N3B3. | ||
N3C1 = "N3C1" # Some Loihi 2 systems provided to INRC members are N3C1. | ||
N3D1 = "N3D1" | ||
|
||
|
||
def use_slurm_host( | ||
partition: ty.Optional[str] = None, | ||
board: ty.Optional[str] = None, | ||
loihi_gen: ty.Optional[ChipGeneration] = ChipGeneration.N3B3 | ||
) -> None: | ||
""" Use SLURM to run Lava models on Loihi 2. This function should be | ||
called prior to running models on the Intel neuromorphic research cloud, | ||
or if you have setup a SLURM scheduler on your local infrastructure. | ||
This function checks whether Lava-Loihi is installed and raises an | ||
ImportError if it is not found. | ||
Parameters | ||
---------- | ||
partition : Optional[str], default = None | ||
The SLURM partition from which a suitable node should be selected. If | ||
partition is specified, board should be None. | ||
board : Optional[str], default = None | ||
The SLURM board (node name) on which any Lava process should run. If | ||
board is specified, partition should be None. | ||
loihi_gen : Optional[str], default = ChipGeneration.N3B3 | ||
The Loihi chip generation needed for the Lava processes. | ||
""" | ||
if not is_installed(): | ||
raise ImportError("Attempting to use SLURM for Loihi2 but " | ||
"Lava-Loihi is not installed.") | ||
|
||
slurm.enable() | ||
|
||
os.environ["LOIHI_GEN"] = loihi_gen.value | ||
|
||
slurm.set_board(board, partition) | ||
slurm.set_partition(partition) | ||
|
||
global host | ||
host = "SLURM" | ||
|
||
|
||
def use_ethernet_host( | ||
host_address: str, | ||
host_binary_path: ty.Optional[str] = "nxcore/bin/nx_driver_server", | ||
loihi_gen: ty.Optional[ChipGeneration] = ChipGeneration.N3B3 | ||
) -> None: | ||
"""Set environment to connect directly to an Oheo Gulch host on the network. | ||
This should be used to run on Kapoho Point and Kapoho Point SC systems when | ||
not using SLURM. | ||
This function checks whether Lava-Loihi is installed and raises an | ||
ImportError if it is not found. | ||
This function attempts to ping the host address to ensure that the | ||
host is running and accessible. If ping fails, it raises a ValueError. | ||
Call slurm.is_available() to determine whether SLURM is available. | ||
Parameters | ||
---------- | ||
host_address : str | ||
The IP address of the host system to use. | ||
host_binary_path : str | ||
The path to the nxcore binary on the host. | ||
loihi_gen : ChipGeneration | ||
The generation of the Loihi board to compile. Supported | ||
values are N3B2, N3B3, and N3C1. | ||
""" | ||
if not is_installed(): | ||
raise ImportError("Attempting to use Loihi2 but Lava-Loihi is " | ||
"not installed.") | ||
|
||
if not slurm.try_run_command(["ping", host_address, "-c 1"]): | ||
raise ValueError(f"Attempting to use ethernet host for Loihi2 " | ||
f"but `ping {host_address}` failed.") | ||
|
||
slurm.disable() | ||
|
||
os.environ["NXSDKHOST"] = host_address | ||
os.environ["HOST_BINARY"] = host_binary_path | ||
os.environ["LOIHI_GEN"] = loihi_gen.value | ||
|
||
global host | ||
host = "ETHERNET" | ||
|
||
|
||
def is_installed(module_name: str = "lava.utils.loihi2_profiler") -> bool: | ||
"""Returns whether the Lava extension for Loihi is installed. | ||
Parameters | ||
---------- | ||
module_name : Optional[str] | ||
Name of the module to check for checking install. | ||
Returns | ||
------- | ||
bool | ||
True iff lava-loihi can be imported in this Python environment. | ||
""" | ||
spec = importlib.util.find_spec(module_name) | ||
|
||
return False if spec is None else True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
# Copyright (C) 2022 Intel Corporation | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# See: https://spdx.org/licenses/ | ||
|
||
from __future__ import annotations | ||
import os | ||
import subprocess # nosec - commands are trusted | ||
import typing as ty | ||
from dataclasses import dataclass | ||
|
||
|
||
def is_available() -> bool: | ||
""" Returns true iff the current system has a SLURM controller enabled.""" | ||
if not try_run_command(["sinfo"]): | ||
return False | ||
return True | ||
|
||
|
||
def enable() -> None: | ||
""" Enable the use of SLURM for Loihi. """ | ||
if not is_available(): | ||
raise ValueError("Attempting to use SLURM for Loihi but " | ||
"SLURM controller is not available.") | ||
|
||
os.environ["SLURM"] = "1" | ||
os.environ.pop("NOSLURM", None) | ||
|
||
|
||
def disable() -> None: | ||
""" Disable the use of SLURM for Loihi. """ | ||
os.environ.pop("SLURM", None) | ||
os.environ["NOSLURM"] = "1" | ||
|
||
|
||
def set_board(board: ty.Optional[str], | ||
partition: ty.Optional[str] = None) -> None: | ||
""" Set the Loihi board in SLURM to run on, or clear | ||
any board set if board is None. | ||
Use `get_boards` to see all available boards. | ||
Parameters | ||
---------- | ||
board : Optional[str] | ||
The Loihi board to use. If None, no board will be specified. | ||
partition : Optional[str], default = None | ||
The partition that is being used for SLURM jobs. If not None, | ||
it will be compared against the partition of the specified | ||
board, and a ValueError will be raised if they are not equal. | ||
""" | ||
if board is None: | ||
os.environ.pop("BOARD", None) | ||
return | ||
|
||
board_info = get_board_info(board) | ||
|
||
if board_info is None or "down" in board_info.state: | ||
raise ValueError( | ||
f"Attempting to use SLURM for Loihi but board {board} " | ||
f"is not found or board is down. Run sinfo to check " | ||
f"available boards.") | ||
|
||
if partition and partition != board_info.partition: | ||
raise ValueError( | ||
f"Attempting to use SLURM for Loihi with board {board} " | ||
f"and partition {partition} but board is not in partition. " | ||
f"Specify only board or partition.") | ||
|
||
os.environ["BOARD"] = board | ||
|
||
|
||
def set_partition(partition: ty.Optional[str]) -> None: | ||
""" Set the partition in SLURM to run on, or clear any partition | ||
set if partition is None. | ||
Parameters | ||
---------- | ||
partition : Optional[str] | ||
The partition to use. If None, no partition will be specified. | ||
""" | ||
if partition is None: | ||
os.environ.pop("PARTITION", None) | ||
return | ||
|
||
partition_info = get_partition_info(partition) | ||
|
||
if partition_info is None or "down" in partition_info.state: | ||
raise ValueError( | ||
f"Attempting to use SLURM for Loihi but partition {partition} " | ||
f"is not found or is down. Run sinfo to check available " | ||
f"partitions.") | ||
|
||
os.environ["PARTITION"] = partition | ||
|
||
|
||
def partition() -> str: | ||
"""Get the partition information.""" | ||
if "PARTITION" in os.environ.keys(): | ||
return os.environ["PARTITION"] | ||
|
||
return "Unspecified" | ||
|
||
|
||
def get_partitions() -> ty.List[PartitionInfo]: | ||
"""Returns the list of available partitions from the SLURM controller | ||
or an empty list if SLURM is not available or has no partitions. | ||
Returns | ||
------- | ||
List[PartitionInfo] | ||
A list of all available partitions. | ||
""" | ||
if not is_available(): | ||
return [] | ||
|
||
lines = try_run_command(["sinfo"]) | ||
del lines[0] # Remove header of table | ||
|
||
def parse_partition(line: str) -> PartitionInfo: | ||
fields = line.split() | ||
|
||
return PartitionInfo(name=fields[0], | ||
available=fields[1], | ||
timelimit=fields[2], | ||
nodes=fields[3], | ||
state=fields[4], | ||
nodelist=fields[5]) | ||
|
||
return [parse_partition(line) for line in lines] | ||
|
||
|
||
def get_partition_info(partition_name: str) -> ty.Optional[PartitionInfo]: | ||
"""Get the SLURM info for the specified partition, if available. | ||
Parameters | ||
---------- | ||
partition_name : str | ||
The name of the partition to return. | ||
Returns | ||
------- | ||
Optional[PartitionInfo] | ||
The partition information for the partition or None if the SLURM | ||
controller does not have the specified partition. | ||
""" | ||
matching_partitions = [p for p in get_partitions() | ||
if p.name == partition_name] | ||
|
||
return next(iter(matching_partitions), None) | ||
|
||
|
||
@dataclass | ||
class PartitionInfo: | ||
name: str = "" | ||
available: str = "" | ||
timelimit: str = "" | ||
nodes: str = "" | ||
state: str = "" | ||
nodelist: str = "" | ||
|
||
|
||
def get_boards() -> ty.List[BoardInfo]: | ||
"""Returns the list of available boards from the SLURM controller | ||
or an empty list if SLURM is not available or has no boards. | ||
Returns | ||
------- | ||
List[BoardInfo] | ||
A list of all available boards. | ||
""" | ||
if not is_available(): | ||
return [] | ||
|
||
lines = try_run_command(["sinfo", "-N"]) | ||
del lines[0] # Remove header of table | ||
|
||
def parse_board(line: str) -> BoardInfo: | ||
fields = line.split() | ||
|
||
return BoardInfo(nodename=fields[0], | ||
partition=fields[2], | ||
state=fields[3]) | ||
|
||
return [parse_board(line) for line in lines] | ||
|
||
|
||
def get_board_info(nodename: str) -> ty.Optional[BoardInfo]: | ||
"""Get the SLURM info for the specified board, if available. | ||
Parameters | ||
---------- | ||
nodename : str | ||
The name of the board to return. | ||
Returns | ||
------- | ||
Optional[BoardInfo] | ||
The information for the board or None if the SLURM | ||
controller does not have the specified board. | ||
""" | ||
matching_boards = [b for b in get_boards() | ||
if b.nodename == nodename] | ||
|
||
return next(iter(matching_boards), None) | ||
|
||
|
||
@dataclass | ||
class BoardInfo: | ||
nodename: str = "" | ||
partition: str = "" | ||
state: str = "" | ||
|
||
|
||
def try_run_command(command: ty.List[str]) -> ty.List[str]: | ||
"""Executes a command, captures the output, and splits it into a list of | ||
lines (strings). Returns an empty list if executing the command raises | ||
and exception. | ||
Parameters | ||
---------- | ||
command : List[str] | ||
Command and options, for instance 'sinfo -N' becomes ['sinfo', '-N'] | ||
Returns | ||
------- | ||
List[str] | ||
Output of stdout of the command, separated into a list of lines (str). | ||
""" | ||
try: | ||
kwargs = dict(capture_output=True, check=True, timeout=1) | ||
process = subprocess.run(command, text=True, **kwargs) # nosec # noqa | ||
return process.stdout.split("\n") | ||
|
||
except subprocess.SubprocessError: | ||
return [] |
Oops, something went wrong.