Skip to content

Commit

Permalink
Add --cluster option for multi-cluster system (#62)
Browse files Browse the repository at this point in the history
* Add cluster option

* Add cluster to test

* add test and minor fix pr62

* trim space

* fix: add test for full coverage

* chore: version bump

---------

Co-authored-by: Joel Lafond-Lapalme <[email protected]>
Co-authored-by: Troy Comi <[email protected]>
  • Loading branch information
3 people authored Oct 3, 2024
1 parent 767a06c commit b826d80
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ directory to check for slurm outputs.
sacct formats and a comma separated list of key/value pairs.
Useful in combination with the 'since' option to query a specific range.
- `--partition`: Limit results to a specific partition.
- `--cluster/-M`: Select specific cluster (for multi-cluster systems)
- `--node/-n`: Display information for multi-node jobs; requires additional
sacct fields from jobstats.
- `--node-and-gpu/-g`: Display information for multi-node jobs and GPU information;
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "reportseff"
version = "2.7.7"
version = "2.8.0"
description= "Tablular seff output"
authors = ["Troy Comi <[email protected]>"]
license = "MIT"
Expand Down
7 changes: 7 additions & 0 deletions src/reportseff/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@
default="",
help="Only include jobs with the specified partition",
)
@click.option(
"-M",
"--cluster",
default="",
help="Select specific cluster, for multi-cluster system only",
)
@click.option(
"--extra-args",
default="",
Expand Down Expand Up @@ -164,6 +170,7 @@ def get_jobs(args: ReportseffParameters) -> tuple[str, int]:
inquirer.set_until(args.until)

inquirer.set_partition(args.partition)
inquirer.set_cluster(args.cluster)

inquirer.set_extra_args(args.extra_args)

Expand Down
25 changes: 24 additions & 1 deletion src/reportseff/db_inquirer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ def set_partition(self, partition: str) -> None:
partition: partition name
"""

@abstractmethod
def set_cluster(self, cluster: str) -> None:
"""Set the collection of jobs based on the provided cluster.
Args:
cluster: cluster name
"""

@abstractmethod
def set_extra_args(self, extra_args: str) -> None:
"""Set extra arguments to be forwarded to sacct.
Expand Down Expand Up @@ -165,6 +173,7 @@ def __init__(self) -> None:
self.until: str | None = None
self.query_all_users: bool = False
self.partition: str | None = None
self.cluster: str | None = None
self.extra_args: str | None = None

def get_valid_formats(self) -> list[str]:
Expand Down Expand Up @@ -215,6 +224,8 @@ def set_sacct_args(self, jobs: list[str]) -> list[str]:
args += [f"--starttime={self.since}"]
if self.partition:
args += [f"--partition={self.partition}"]
if self.cluster:
args += [f"--cluster={self.cluster}"]
if self.until:
args += [f"--endtime={self.until}"]
if self.extra_args:
Expand Down Expand Up @@ -305,6 +316,14 @@ def set_partition(self, partition: str) -> None:
"""
self.partition = partition

def set_cluster(self, cluster: str) -> None:
"""Set the specific cluster in multi-cluster environment.
Args:
cluster: cluster name
"""
self.cluster = cluster

def set_extra_args(self, extra_args: str) -> None:
"""Set extra arguments to be forwarded to sacct.
Expand Down Expand Up @@ -438,7 +457,11 @@ def get_partition_timelimits(self) -> dict:
Raises:
RuntimeError: if scontrol raises an error
"""
command_args = "scontrol show partition".split()
args = ""
if self.cluster:
args = f"--cluster {self.cluster}"

command_args = f"scontrol {args} show partition".split()
cmd_result = subprocess.run(
args=command_args,
stdout=subprocess.PIPE,
Expand Down
1 change: 1 addition & 0 deletions src/reportseff/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class ReportseffParameters:
slurm_format: str = ""
user: str = ""
partition: str = ""
cluster: str = ""
extra_args: str = ""

def __post_init__(self) -> None:
Expand Down
47 changes: 46 additions & 1 deletion tests/test_db_inquirer.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,12 @@ def test_sacct_set_partition(sacct):
assert sacct.partition == "partition"


def test_sacct_set_cluster(sacct):
"""Can set cluster."""
sacct.set_cluster("cluster")
assert sacct.cluster == "cluster"


def test_sacct_get_db_output_partition(sacct, mocker):
"""Subprocess call is affected by partition argument."""
mock_sacct = mocker.MagicMock()
Expand Down Expand Up @@ -680,7 +686,9 @@ def test_partition_timelimit(sacct, mocker):
" AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL\n"
" MaxNodes=UNLIMITED MaxTime=12-00:00:00 MinNodes=0\n"
)
mocker.patch("reportseff.db_inquirer.subprocess.run", return_value=mock_sacct)
mock_run = mocker.patch(
"reportseff.db_inquirer.subprocess.run", return_value=mock_sacct
)

limits = sacct.get_partition_timelimits()
assert limits == {
Expand All @@ -689,6 +697,43 @@ def test_partition_timelimit(sacct, mocker):
"gpu": "12-00:00:00",
}

assert mock_run.call_args.kwargs["args"] == "scontrol show partition".split()


def test_partition_timelimit_with_cluster(sacct, mocker):
"""Can process scontrol output."""
mock_sacct = mocker.MagicMock()
mock_sacct.returncode = 0
mock_sacct.stdout = (
"PartitionName=cpu\n"
" AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL\n"
" MaxNodes=UNLIMITED MaxTime=15-00:00:00 MinNodes=0\n"
"\n"
"PartitionName=datascience\n"
" AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL\n"
" MaxNodes=UNLIMITED MaxTime=MAXTIME MinNodes=0\n"
"\n"
"PartitionName=gpu\n"
" AllowGroups=ALL AllowAccounts=ALL AllowQos=ALL\n"
" MaxNodes=UNLIMITED MaxTime=12-00:00:00 MinNodes=0\n"
)
mock_run = mocker.patch(
"reportseff.db_inquirer.subprocess.run", return_value=mock_sacct
)

sacct.set_cluster("Testing")
limits = sacct.get_partition_timelimits()
assert limits == {
"cpu": "15-00:00:00",
"datascience": "MAXTIME",
"gpu": "12-00:00:00",
}

assert (
mock_run.call_args.kwargs["args"]
== "scontrol --cluster Testing show partition".split()
)


def test_partition_timelimit_issue_11(sacct, mocker):
"""Can process scontrol output from issue 11."""
Expand Down
4 changes: 2 additions & 2 deletions tests/test_reportseff.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_simple_user(mocker, console_jobs):

@pytest.mark.usefixtures("_mock_inquirer")
def test_simple_partition(mocker, console_jobs):
"""Can limit outputs by partition."""
"""Can limit outputs by partition and cluster."""
mocker.patch("reportseff.console.which", return_value=True)
runner = CliRunner()
sub_result = mocker.MagicMock()
Expand All @@ -234,7 +234,7 @@ def test_simple_partition(mocker, console_jobs):
mocker.patch("reportseff.db_inquirer.subprocess.run", return_value=sub_result)
result = runner.invoke(
console.main,
"--no-color --partition partition 24418435 25569410 "
"--no-color --partition partition --cluster cluster 24418435 25569410 "
"--format JobID%>,State,Elapsed%>,CPUEff,MemEff".split(),
)

Expand Down

0 comments on commit b826d80

Please sign in to comment.