-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Builder: additional workflow steps #351
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,7 @@ def do_build(args: CensusBuildArgs, skip_completed_steps: bool = False) -> int: | |
do_create_reports, | ||
do_data_copy, | ||
do_the_release, | ||
do_report_copy, | ||
do_old_release_cleanup, | ||
] | ||
try: | ||
|
@@ -137,7 +138,7 @@ def do_validate_soma(args: CensusBuildArgs) -> bool: | |
def do_create_reports(args: CensusBuildArgs) -> bool: | ||
from .census_summary import display_diff, display_summary | ||
|
||
reports_dir = args.working_dir / "reports" | ||
reports_dir = args.working_dir / args.config.reports_dir | ||
reports_dir.mkdir(parents=True, exist_ok=True) | ||
|
||
logging.info("Creating summary report") | ||
|
@@ -152,7 +153,7 @@ def do_create_reports(args: CensusBuildArgs) -> bool: | |
|
||
|
||
def do_data_copy(args: CensusBuildArgs) -> bool: | ||
"""Copy data to S3""" | ||
"""Copy data to S3, in preparation for a release""" | ||
from .data_copy import sync_to_S3 | ||
|
||
sync_to_S3( | ||
|
@@ -178,7 +179,21 @@ def do_the_release(args: CensusBuildArgs) -> bool: | |
"s3_region": "us-west-2", | ||
}, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pushing all of the data layout assumptions down to the module feels like it pollutes the abstraction. I'll add some tests for the main.do_* methods instead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added tests for all of the new workflow steps, called from the top-level |
||
make_a_release(args.config.cellxgene_census_S3_path, args.build_tag, release, make_latest=True) | ||
make_a_release( | ||
args.config.cellxgene_census_S3_path, args.build_tag, release, make_latest=True, dryrun=args.config.dryrun | ||
) | ||
return True | ||
|
||
|
||
def do_report_copy(args: CensusBuildArgs) -> bool: | ||
"""Copy build summary reports to S3 for posterity.""" | ||
from .data_copy import sync_to_S3 | ||
|
||
sync_to_S3( | ||
args.working_dir / args.config.reports_dir, | ||
urlcat(args.config.logs_S3_path, args.build_tag, args.config.reports_dir), | ||
dryrun=args.config.dryrun, | ||
) | ||
return True | ||
|
||
|
||
|
@@ -199,8 +214,8 @@ def do_log_copy(args: CensusBuildArgs) -> bool: | |
from .data_copy import sync_to_S3 | ||
|
||
sync_to_S3( | ||
args.working_dir / "logs", | ||
urlcat(args.config.logs_S3_path, args.build_tag), | ||
args.working_dir / args.config.log_dir, | ||
urlcat(args.config.logs_S3_path, args.build_tag, args.config.log_dir), | ||
dryrun=args.config.dryrun, | ||
) | ||
return True | ||
|
@@ -218,4 +233,5 @@ def create_args_parser() -> argparse.ArgumentParser: | |
return parser | ||
|
||
|
||
sys.exit(main()) | ||
if __name__ == "__main__": | ||
sys.exit(main()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,9 +25,10 @@ | |
"verbose": 1, | ||
"log_dir": "logs", | ||
"log_file": "build.log", | ||
"reports_dir": "reports", | ||
"consolidate": True, | ||
"disable_dirty_git_check": True, | ||
"dryrun": False, # if True, will disable copy of data/logs/etc to S3 buckets | ||
"dryrun": False, # if True, will disable copy of data/logs/reports/release.json to S3 buckets. Will NOT disable local build, etc. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
# | ||
# Paths and census version name determined by spec. | ||
"cellxgene_census_S3_path": "s3://cellxgene-data-public/cell-census", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
import pathlib | ||
from typing import Callable | ||
from unittest import mock | ||
|
||
import pytest | ||
from cellxgene_census_builder.__main__ import do_data_copy, do_log_copy, do_report_copy, do_the_release | ||
from cellxgene_census_builder.build_state import CensusBuildArgs, CensusBuildConfig | ||
from cellxgene_census_builder.release_manifest import CensusVersionDescription | ||
|
||
TEST_BUCKET_PATH = "s3://bucket/path" | ||
|
||
|
||
@pytest.mark.parametrize("dryrun", [True, False]) | ||
@pytest.mark.parametrize("build_tag", ["2020-10-20", "1999-12-31"]) | ||
def test_do_data_copy(tmp_path: pathlib.Path, build_tag: str, dryrun: bool) -> None: | ||
args = CensusBuildArgs( | ||
working_dir=tmp_path, | ||
config=CensusBuildConfig( | ||
build_tag=build_tag, | ||
dryrun=dryrun, | ||
cellxgene_census_S3_path=TEST_BUCKET_PATH, | ||
), | ||
) | ||
from_path = tmp_path / build_tag | ||
from_path.mkdir(exist_ok=True, parents=True) | ||
to_path = f"{TEST_BUCKET_PATH}/{build_tag}" | ||
|
||
with mock.patch("subprocess.Popen") as popen_patch: | ||
popen_patch.return_value.__enter__.return_value.returncode = 0 | ||
popen_patch.return_value.__enter__.return_value.stdout = None | ||
do_data_copy(args) | ||
|
||
assert popen_patch.call_count == 1 | ||
expect = ["aws", "s3", "sync", from_path.as_posix(), to_path, "--no-progress"] | ||
if dryrun: | ||
expect += ["--dryrun"] | ||
assert popen_patch.call_args[0][0] == expect | ||
|
||
|
||
@pytest.mark.parametrize("step_func,dir_name", [(do_report_copy, "reports"), (do_log_copy, "logs")]) | ||
@pytest.mark.parametrize("dryrun", [True, False]) | ||
@pytest.mark.parametrize("build_tag", ["2020-10-20", "1999-12-31"]) | ||
def test_do_other_copy( | ||
tmp_path: pathlib.Path, build_tag: str, dryrun: bool, step_func: Callable[[CensusBuildArgs], None], dir_name: str | ||
) -> None: | ||
args = CensusBuildArgs( | ||
working_dir=tmp_path, | ||
config=CensusBuildConfig( | ||
build_tag=build_tag, | ||
dryrun=dryrun, | ||
logs_S3_path=TEST_BUCKET_PATH, | ||
), | ||
) | ||
from_path = tmp_path / dir_name | ||
from_path.mkdir(exist_ok=True, parents=True) | ||
to_path = f"{TEST_BUCKET_PATH}/{build_tag}/{dir_name}" | ||
|
||
with mock.patch("subprocess.Popen") as popen_patch: | ||
popen_patch.return_value.__enter__.return_value.returncode = 0 | ||
popen_patch.return_value.__enter__.return_value.stdout = None | ||
step_func(args) | ||
|
||
assert popen_patch.call_count == 1 | ||
expect = ["aws", "s3", "sync", from_path.as_posix(), to_path, "--no-progress"] | ||
if dryrun: | ||
expect += ["--dryrun"] | ||
assert popen_patch.call_args[0][0] == expect | ||
|
||
|
||
@pytest.mark.parametrize("dryrun", [True, False]) | ||
def test_do_the_release(tmp_path: pathlib.Path, dryrun: bool) -> None: | ||
build_tag = "2020-02-03" | ||
args = CensusBuildArgs( | ||
working_dir=tmp_path, | ||
config=CensusBuildConfig( | ||
build_tag=build_tag, | ||
cellxgene_census_S3_path=TEST_BUCKET_PATH, | ||
dryrun=dryrun, | ||
), | ||
) | ||
|
||
with mock.patch("cellxgene_census_builder.release_manifest.make_a_release") as make_a_release_patch: | ||
do_the_release(args) | ||
|
||
expected_rls_description: CensusVersionDescription = { | ||
"release_build": build_tag, | ||
"release_date": None, | ||
"soma": {"uri": f"{TEST_BUCKET_PATH}/{build_tag}/soma/", "s3_region": "us-west-2"}, | ||
"h5ads": {"uri": f"{TEST_BUCKET_PATH}/{build_tag}/h5ads/", "s3_region": "us-west-2"}, | ||
} | ||
assert make_a_release_patch.call_count == 1 | ||
assert make_a_release_patch.call_args == ( | ||
(TEST_BUCKET_PATH, build_tag, expected_rls_description), | ||
{"make_latest": True, "dryrun": dryrun}, | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should also sync reports, and I don't think that's currently handled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this was not in the build process the last time I checked, but is easily added.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It wasn't, I just realized the omission today while doing the release follow-up tasks. Ty!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added