Skip to content

Commit

Permalink
Refactor methods without arguments into functions (lava-nc#685)
Browse files Browse the repository at this point in the history
* Refactoring methods without arguments into functions.

Signed-off-by: Mathis Richter <[email protected]>

* Reverting unintentional change to tutorial.

Signed-off-by: Mathis Richter <[email protected]>

* Added new set of functions by @tim-shea, refactored.

Signed-off-by: Mathis Richter <[email protected]>

* Fixed linter error: unused import.

Signed-off-by: Mathis Richter <[email protected]>

* Fixed linter and security errors.

Signed-off-by: Mathis Richter <[email protected]>

* Fixed linter errors.

Signed-off-by: Mathis Richter <[email protected]>

* Trying to make nosec work.

Signed-off-by: Mathis Richter <[email protected]>

* Moved nosec back; does not seem to make a difference.

Signed-off-by: Mathis Richter <[email protected]>

* Unit tests for some of the new functions in the slurm module.

Signed-off-by: Mathis Richter <[email protected]>

* Full unit tests for SLURM module.

Signed-off-by: Mathis Richter <[email protected]>

* Full unit tests for lava_loihi module.

Signed-off-by: Mathis Richter <[email protected]>

* Deactivated linter error.

Signed-off-by: Mathis Richter <[email protected]>

* Trying different nosec variants.

Signed-off-by: Mathis Richter <[email protected]>

* Trying different nosec variants.

Signed-off-by: Mathis Richter <[email protected]>

* Trying different nosec variants.

Signed-off-by: Mathis Richter <[email protected]>

* Redesigned the API for lava.utils.{slurm,loihi}.

Signed-off-by: Mathis Richter <[email protected]>

* Another attempt at fixing linting.

Signed-off-by: Mathis Richter <[email protected]>

* Trying with noqa

Signed-off-by: Mathis Richter <[email protected]>

* Fixed module error in test patch

Signed-off-by: Mathis Richter <[email protected]>

* Renamed is_lava_loihi_installed to is_installed.

Signed-off-by: Mathis Richter <[email protected]>

* Fixed code duplication of patch decorators.

Signed-off-by: Mathis Richter <[email protected]>

* Added doc string to patch decorator.

Signed-off-by: Mathis Richter <[email protected]>

* Finished loihi and slurm docstrings.

---------

Signed-off-by: Mathis Richter <[email protected]>
Co-authored-by: Timothy Shea <[email protected]>
Co-authored-by: Tim Shea <[email protected]>
  • Loading branch information
3 people committed Jul 18, 2023
1 parent bcbd465 commit baed791
Show file tree
Hide file tree
Showing 8 changed files with 1,048 additions and 417 deletions.
123 changes: 123 additions & 0 deletions src/lava/utils/loihi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
# See: https://spdx.org/licenses/

import os
import typing as ty
import importlib.util
import enum

from lava.utils import slurm


host: ty.Optional[str] = None


class ChipGeneration(enum.Enum):
""" ChipGeneration enumerates the valid Loihi chip generations. """
N2A2 = "N2A2"
N3B2 = "N3B2"
N3B3 = "N3B3" # Most Loihi 2 systems available to the INRC are N3B3.
N3C1 = "N3C1" # Some Loihi 2 systems provided to INRC members are N3C1.
N3D1 = "N3D1"


def use_slurm_host(
partition: ty.Optional[str] = None,
board: ty.Optional[str] = None,
loihi_gen: ty.Optional[ChipGeneration] = ChipGeneration.N3B3
) -> None:
""" Use SLURM to run Lava models on Loihi 2. This function should be
called prior to running models on the Intel neuromorphic research cloud,
or if you have setup a SLURM scheduler on your local infrastructure.
This function checks whether Lava-Loihi is installed and raises an
ImportError if it is not found.
Parameters
----------
partition : Optional[str], default = None
The SLURM partition from which a suitable node should be selected. If
partition is specified, board should be None.
board : Optional[str], default = None
The SLURM board (node name) on which any Lava process should run. If
board is specified, partition should be None.
loihi_gen : Optional[str], default = ChipGeneration.N3B3
The Loihi chip generation needed for the Lava processes.
"""
if not is_installed():
raise ImportError("Attempting to use SLURM for Loihi2 but "
"Lava-Loihi is not installed.")

slurm.enable()

os.environ["LOIHI_GEN"] = loihi_gen.value

slurm.set_board(board, partition)
slurm.set_partition(partition)

global host
host = "SLURM"


def use_ethernet_host(
host_address: str,
host_binary_path: ty.Optional[str] = "nxcore/bin/nx_driver_server",
loihi_gen: ty.Optional[ChipGeneration] = ChipGeneration.N3B3
) -> None:
"""Set environment to connect directly to an Oheo Gulch host on the network.
This should be used to run on Kapoho Point and Kapoho Point SC systems when
not using SLURM.
This function checks whether Lava-Loihi is installed and raises an
ImportError if it is not found.
This function attempts to ping the host address to ensure that the
host is running and accessible. If ping fails, it raises a ValueError.
Call slurm.is_available() to determine whether SLURM is available.
Parameters
----------
host_address : str
The IP address of the host system to use.
host_binary_path : str
The path to the nxcore binary on the host.
loihi_gen : ChipGeneration
The generation of the Loihi board to compile. Supported
values are N3B2, N3B3, and N3C1.
"""
if not is_installed():
raise ImportError("Attempting to use Loihi2 but Lava-Loihi is "
"not installed.")

if not slurm.try_run_command(["ping", host_address, "-c 1"]):
raise ValueError(f"Attempting to use ethernet host for Loihi2 "
f"but `ping {host_address}` failed.")

slurm.disable()

os.environ["NXSDKHOST"] = host_address
os.environ["HOST_BINARY"] = host_binary_path
os.environ["LOIHI_GEN"] = loihi_gen.value

global host
host = "ETHERNET"


def is_installed(module_name: str = "lava.utils.loihi2_profiler") -> bool:
"""Returns whether the Lava extension for Loihi is installed.
Parameters
----------
module_name : Optional[str]
Name of the module to check for checking install.
Returns
-------
bool
True iff lava-loihi can be imported in this Python environment.
"""
spec = importlib.util.find_spec(module_name)

return False if spec is None else True
235 changes: 235 additions & 0 deletions src/lava/utils/slurm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# Copyright (C) 2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
# See: https://spdx.org/licenses/

from __future__ import annotations
import os
import subprocess # nosec - commands are trusted
import typing as ty
from dataclasses import dataclass


def is_available() -> bool:
""" Returns true iff the current system has a SLURM controller enabled."""
if not try_run_command(["sinfo"]):
return False
return True


def enable() -> None:
""" Enable the use of SLURM for Loihi. """
if not is_available():
raise ValueError("Attempting to use SLURM for Loihi but "
"SLURM controller is not available.")

os.environ["SLURM"] = "1"
os.environ.pop("NOSLURM", None)


def disable() -> None:
""" Disable the use of SLURM for Loihi. """
os.environ.pop("SLURM", None)
os.environ["NOSLURM"] = "1"


def set_board(board: ty.Optional[str],
partition: ty.Optional[str] = None) -> None:
""" Set the Loihi board in SLURM to run on, or clear
any board set if board is None.
Use `get_boards` to see all available boards.
Parameters
----------
board : Optional[str]
The Loihi board to use. If None, no board will be specified.
partition : Optional[str], default = None
The partition that is being used for SLURM jobs. If not None,
it will be compared against the partition of the specified
board, and a ValueError will be raised if they are not equal.
"""
if board is None:
os.environ.pop("BOARD", None)
return

board_info = get_board_info(board)

if board_info is None or "down" in board_info.state:
raise ValueError(
f"Attempting to use SLURM for Loihi but board {board} "
f"is not found or board is down. Run sinfo to check "
f"available boards.")

if partition and partition != board_info.partition:
raise ValueError(
f"Attempting to use SLURM for Loihi with board {board} "
f"and partition {partition} but board is not in partition. "
f"Specify only board or partition.")

os.environ["BOARD"] = board


def set_partition(partition: ty.Optional[str]) -> None:
""" Set the partition in SLURM to run on, or clear any partition
set if partition is None.
Parameters
----------
partition : Optional[str]
The partition to use. If None, no partition will be specified.
"""
if partition is None:
os.environ.pop("PARTITION", None)
return

partition_info = get_partition_info(partition)

if partition_info is None or "down" in partition_info.state:
raise ValueError(
f"Attempting to use SLURM for Loihi but partition {partition} "
f"is not found or is down. Run sinfo to check available "
f"partitions.")

os.environ["PARTITION"] = partition


def partition() -> str:
"""Get the partition information."""
if "PARTITION" in os.environ.keys():
return os.environ["PARTITION"]

return "Unspecified"


def get_partitions() -> ty.List[PartitionInfo]:
"""Returns the list of available partitions from the SLURM controller
or an empty list if SLURM is not available or has no partitions.
Returns
-------
List[PartitionInfo]
A list of all available partitions.
"""
if not is_available():
return []

lines = try_run_command(["sinfo"])
del lines[0] # Remove header of table

def parse_partition(line: str) -> PartitionInfo:
fields = line.split()

return PartitionInfo(name=fields[0],
available=fields[1],
timelimit=fields[2],
nodes=fields[3],
state=fields[4],
nodelist=fields[5])

return [parse_partition(line) for line in lines]


def get_partition_info(partition_name: str) -> ty.Optional[PartitionInfo]:
"""Get the SLURM info for the specified partition, if available.
Parameters
----------
partition_name : str
The name of the partition to return.
Returns
-------
Optional[PartitionInfo]
The partition information for the partition or None if the SLURM
controller does not have the specified partition.
"""
matching_partitions = [p for p in get_partitions()
if p.name == partition_name]

return next(iter(matching_partitions), None)


@dataclass
class PartitionInfo:
name: str = ""
available: str = ""
timelimit: str = ""
nodes: str = ""
state: str = ""
nodelist: str = ""


def get_boards() -> ty.List[BoardInfo]:
"""Returns the list of available boards from the SLURM controller
or an empty list if SLURM is not available or has no boards.
Returns
-------
List[BoardInfo]
A list of all available boards.
"""
if not is_available():
return []

lines = try_run_command(["sinfo", "-N"])
del lines[0] # Remove header of table

def parse_board(line: str) -> BoardInfo:
fields = line.split()

return BoardInfo(nodename=fields[0],
partition=fields[2],
state=fields[3])

return [parse_board(line) for line in lines]


def get_board_info(nodename: str) -> ty.Optional[BoardInfo]:
"""Get the SLURM info for the specified board, if available.
Parameters
----------
nodename : str
The name of the board to return.
Returns
-------
Optional[BoardInfo]
The information for the board or None if the SLURM
controller does not have the specified board.
"""
matching_boards = [b for b in get_boards()
if b.nodename == nodename]

return next(iter(matching_boards), None)


@dataclass
class BoardInfo:
nodename: str = ""
partition: str = ""
state: str = ""


def try_run_command(command: ty.List[str]) -> ty.List[str]:
"""Executes a command, captures the output, and splits it into a list of
lines (strings). Returns an empty list if executing the command raises
and exception.
Parameters
----------
command : List[str]
Command and options, for instance 'sinfo -N' becomes ['sinfo', '-N']
Returns
-------
List[str]
Output of stdout of the command, separated into a list of lines (str).
"""
try:
kwargs = dict(capture_output=True, check=True, timeout=1)
process = subprocess.run(command, text=True, **kwargs) # nosec # noqa
return process.stdout.split("\n")

except subprocess.SubprocessError:
return []
Loading

0 comments on commit baed791

Please sign in to comment.