Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue with --processes: Stop parent process from trying to kill children too fast #2478

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions locust/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import errno
import logging
import os
import signal
Expand Down Expand Up @@ -229,13 +230,32 @@ def sigint_handler(_signal, _frame):

def kill_workers(children):
exit_code = 0
logging.debug("Sending SIGINT to children")
start_time = time.time()
# give children some time to finish up (in case they had an error parsing arguments etc)
for child_pid in children[:]:
while time.time() < start_time + 3:
try:
_, child_status = os.waitpid(child_pid, os.WNOHANG)
children.remove(child_pid)
try:
if sys.version_info > (3, 8):
child_exit_code = os.waitstatus_to_exitcode(child_status)
exit_code = max(exit_code, child_exit_code)
except AttributeError:
pass # dammit python 3.8...
except OSError as e:
if e.errno == errno.EINTR:
time.sleep(0.1)
else:
logging.error(traceback.format_exc())
else:
break
for child_pid in children:
try:
logging.debug(f"Sending SIGINT to child with pid {child_pid}")
os.kill(child_pid, signal.SIGINT)
except ProcessLookupError:
pass # never mind, process was already dead
logging.debug("waiting for children to terminate")
for child_pid in children:
_, child_status = os.waitpid(child_pid, 0)
try:
Expand All @@ -245,7 +265,10 @@ def kill_workers(children):
except AttributeError:
pass # dammit python 3.8...
if exit_code > 1:
logging.error(f"bad response code from worker children: {exit_code}")
logging.error(f"Bad response code from worker children: {exit_code}")
# ensure master doesnt finish until output from workers has arrived
# otherwise the terminal might look weird.
time.sleep(0.1)

atexit.register(kill_workers, children)

Expand Down Expand Up @@ -398,6 +421,8 @@ def kill_workers(children):
"Starting web interface at %s://0.0.0.0:%s (accepting connections from all network interfaces)"
% (protocol, options.web_port)
)
if options.web_auth:
logging.info("BasicAuth support is deprecated, it will be removed in a future release.")
web_ui = environment.create_web_ui(
host=web_host,
port=options.web_port,
Expand Down
23 changes: 23 additions & 0 deletions locust/test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1925,3 +1925,26 @@ def my_task(self):

self.assertNotIn("Traceback", worker_stderr)
self.assertIn("Didn't get heartbeat from master in over ", worker_stderr)

def test_processes_error_doesnt_blow_up_completely(self):
with mock_locustfile() as mocked:
proc = subprocess.Popen(
[
"locust",
"-f",
mocked.file_path,
"--processes",
"4",
"-L",
"DEBUG",
"UserThatDoesntExist",
],
stdout=PIPE,
stderr=PIPE,
text=True,
)
_, stderr = proc.communicate()
self.assertIn("Unknown User(s): UserThatDoesntExist", stderr)
# the error message should repeat 4 times for the workers and once for the master
self.assertEqual(stderr.count("Unknown User(s): UserThatDoesntExist"), 5)
self.assertNotIn("Traceback", stderr)
Loading