Skip to content

Commit

Permalink
Fix issue with --processes: Stop parent process from trying to kill c…
Browse files Browse the repository at this point in the history
…hildren too fast (e.g. when the user specified invalid parameters or something)
  • Loading branch information
cyberw committed Nov 21, 2023
1 parent c03228f commit 387bdbd
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 3 deletions.
31 changes: 28 additions & 3 deletions locust/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import errno
import logging
import os
import signal
Expand Down Expand Up @@ -229,13 +230,32 @@ def sigint_handler(_signal, _frame):

def kill_workers(children):
exit_code = 0
logging.debug("Sending SIGINT to children")
start_time = time.time()
# give children some time to finish up (in case they had an error parsing arguments etc)
for child_pid in children[:]:
while time.time() < start_time + 3:
try:
_, child_status = os.waitpid(child_pid, os.WNOHANG)
children.remove(child_pid)
try:
if sys.version_info > (3, 8):
child_exit_code = os.waitstatus_to_exitcode(child_status)
exit_code = max(exit_code, child_exit_code)
except AttributeError:
pass # dammit python 3.8...
except OSError as e:
if e.errno == errno.EINTR:
time.sleep(0.1)
else:
logging.error(traceback.format_exc())
else:
break
for child_pid in children:
try:
logging.debug(f"Sending SIGINT to child with pid {child_pid}")
os.kill(child_pid, signal.SIGINT)
except ProcessLookupError:
pass # never mind, process was already dead
logging.debug("waiting for children to terminate")
for child_pid in children:
_, child_status = os.waitpid(child_pid, 0)
try:
Expand All @@ -245,7 +265,10 @@ def kill_workers(children):
except AttributeError:
pass # dammit python 3.8...
if exit_code > 1:
logging.error(f"bad response code from worker children: {exit_code}")
logging.error(f"Bad response code from worker children: {exit_code}")
# ensure master doesnt finish until output from workers has arrived
# otherwise the terminal might look weird.
time.sleep(0.1)

atexit.register(kill_workers, children)

Expand Down Expand Up @@ -398,6 +421,8 @@ def kill_workers(children):
"Starting web interface at %s://0.0.0.0:%s (accepting connections from all network interfaces)"
% (protocol, options.web_port)
)
if options.web_auth:
logging.info("BasicAuth support is deprecated, it will be removed in a future release.")
web_ui = environment.create_web_ui(
host=web_host,
port=options.web_port,
Expand Down
23 changes: 23 additions & 0 deletions locust/test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1925,3 +1925,26 @@ def my_task(self):

self.assertNotIn("Traceback", worker_stderr)
self.assertIn("Didn't get heartbeat from master in over ", worker_stderr)

def test_processes_error_doesnt_blow_up_completely(self):
with mock_locustfile() as mocked:
proc = subprocess.Popen(
[
"locust",
"-f",
mocked.file_path,
"--processes",
"4",
"-L",
"DEBUG",
"UserThatDoesntExist",
],
stdout=PIPE,
stderr=PIPE,
text=True,
)
_, stderr = proc.communicate()
self.assertIn("Unknown User(s): UserThatDoesntExist", stderr)
# the error message should repeat 4 times for the workers and once for the master
self.assertEqual(stderr.count("Unknown User(s): UserThatDoesntExist"), 5)
self.assertNotIn("Traceback", stderr)

0 comments on commit 387bdbd

Please sign in to comment.