-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Job Submission] Add stop API to http & sdk, with better status code + stacktrace #20094
Changes from all commits
27551c6
8e470ce
686ad40
e1a3dcf
5bb8a61
15e6e93
22b524b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
from pathlib import Path | ||
import sys | ||
import tempfile | ||
import requests | ||
|
||
import pytest | ||
|
||
|
@@ -10,9 +11,9 @@ | |
wait_until_server_available) | ||
from ray._private.job_manager import JobStatus | ||
from ray.dashboard.modules.job.sdk import JobSubmissionClient | ||
from ray.dashboard.modules.job.job_head import JOBS_API_ROUTE_SUBMIT | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.DEBUG) | ||
|
||
|
||
@pytest.fixture | ||
|
@@ -31,6 +32,16 @@ def _check_job_succeeded(client: JobSubmissionClient, job_id: str) -> bool: | |
return status == JobStatus.SUCCEEDED | ||
|
||
|
||
def _check_job_failed(client: JobSubmissionClient, job_id: str) -> bool: | ||
status = client.get_job_status(job_id) | ||
return status == JobStatus.FAILED | ||
|
||
|
||
def _check_job_stopped(client: JobSubmissionClient, job_id: str) -> bool: | ||
status = client.get_job_status(job_id) | ||
return status == JobStatus.STOPPED | ||
|
||
|
||
@pytest.fixture( | ||
scope="function", | ||
params=["no_working_dir", "local_working_dir", "s3_working_dir"]) | ||
|
@@ -98,6 +109,96 @@ def test_submit_job(job_sdk_client, working_dir_option): | |
assert stderr == working_dir_option["expected_stderr"] | ||
|
||
|
||
def test_http_bad_request(job_sdk_client): | ||
""" | ||
Send bad requests to job http server and ensure right return code and | ||
error message is returned via http. | ||
""" | ||
client = job_sdk_client | ||
|
||
# 400 - HTTPBadRequest | ||
with pytest.raises(requests.exceptions.HTTPError) as e: | ||
_ = client._do_request( | ||
"POST", | ||
JOBS_API_ROUTE_SUBMIT, | ||
json_data={"key": "baaaad request"}, | ||
) | ||
|
||
ex_message = str(e.value) | ||
assert "400 Client Error" in ex_message | ||
assert "TypeError: __init__() got an unexpected keyword argument" in ex_message # noqa: E501 | ||
Comment on lines
+127
to
+129
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use the |
||
|
||
# 405 - HTTPMethodNotAllowed | ||
with pytest.raises(requests.exceptions.HTTPError) as e: | ||
_ = client._do_request( | ||
"GET", | ||
JOBS_API_ROUTE_SUBMIT, | ||
json_data={"key": "baaaad request"}, | ||
) | ||
ex_message = str(e.value) | ||
assert "405 Client Error: Method Not Allowed" in ex_message | ||
|
||
# 500 - HTTPInternalServerError | ||
with pytest.raises(requests.exceptions.HTTPError) as e: | ||
_ = client.submit_job( | ||
entrypoint="echo hello", | ||
runtime_env={"working_dir": "s3://does_not_exist"}) | ||
ex_message = str(e.value) | ||
assert "500 Server Error" in ex_message | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be a 400, not a 500 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (500 means something went wrong, like an internal assertion failed or unexpected error occurred) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i wrapped everything within |
||
assert "Only .zip files supported for S3 URIs" in ex_message | ||
|
||
|
||
def test_submit_job_with_exception_in_driver(job_sdk_client): | ||
""" | ||
Submit a job that's expected to throw exception while executing. | ||
""" | ||
client = job_sdk_client | ||
|
||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
path = Path(tmp_dir) | ||
driver_script = """ | ||
print('Hello !') | ||
raise RuntimeError('Intentionally failed.') | ||
""" | ||
test_script_file = path / "test_script.py" | ||
with open(test_script_file, "w+") as file: | ||
file.write(driver_script) | ||
|
||
job_id = client.submit_job( | ||
entrypoint="python test_script.py", | ||
runtime_env={"working_dir": tmp_dir}) | ||
|
||
wait_for_condition(_check_job_failed, client=client, job_id=job_id) | ||
stdout, stderr = client.get_job_logs(job_id) | ||
assert stdout == "Hello !" | ||
assert "RuntimeError: Intentionally failed." in stderr | ||
|
||
|
||
def test_stop_long_running_job(job_sdk_client): | ||
""" | ||
Submit a job that runs for a while and stop it in the middle. | ||
""" | ||
client = job_sdk_client | ||
|
||
with tempfile.TemporaryDirectory() as tmp_dir: | ||
path = Path(tmp_dir) | ||
driver_script = """ | ||
print('Hello !') | ||
import time | ||
time.sleep(300) # This should never finish | ||
raise RuntimeError('Intentionally failed.') | ||
""" | ||
test_script_file = path / "test_script.py" | ||
with open(test_script_file, "w+") as file: | ||
file.write(driver_script) | ||
|
||
job_id = client.submit_job( | ||
entrypoint="python test_script.py", | ||
runtime_env={"working_dir": tmp_dir}) | ||
assert client.stop_job(job_id) is True | ||
wait_for_condition(_check_job_stopped, client=client, job_id=job_id) | ||
|
||
|
||
def test_job_metadata(job_sdk_client): | ||
client = job_sdk_client | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a little bit ugly that we have to duplicate this code everywhere. One alternative would be to do this with a context manager:
No need to do it in this PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ahh nice this is definitely better than what we have here. i found it a bit weird towards the end as well but going too far to bother to change it back lol
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I give that a try and i think current dashboard modules don't really respect exceptions thrown from our own context manager and we can only do it by returning a response but set exception code. So i will leave it as is for now :/