From 0f79ebbd750882840113a243035066d4f8f7bea1 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Wed, 14 Jul 2021 00:37:04 -0700 Subject: [PATCH] [Ray debugger] Sort breakpoints chronologically and print in table format (#17051) --- doc/source/ray-debugging.rst | 55 ++++++++++++++++++----------------- python/ray/scripts/scripts.py | 33 +++++++++++++++++---- python/ray/util/rpdb.py | 4 ++- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/doc/source/ray-debugging.rst b/doc/source/ray-debugging.rst index 36e1c67dde1e..32cee6595fb0 100644 --- a/doc/source/ray-debugging.rst +++ b/doc/source/ray-debugging.rst @@ -43,7 +43,7 @@ Put the program into a file named ``debugging.py`` and execute it using: python debugging.py -Each of the 4 executed tasks will drop into a breakpoint when the line +Each of the 2 executed tasks will drop into a breakpoint when the line ``breakpoint()`` is executed. You can attach to the debugger by running the following command on the head node of the cluster: @@ -55,12 +55,12 @@ The ``ray debug`` command will print an output like this: .. code-block:: text - 2020-11-04 15:35:50,011 INFO worker.py:672 -- Connecting to existing Ray cluster at address: 192.168.1.105:6379 + 2021-07-13 16:30:40,112 INFO scripts.py:216 -- Connecting to Ray instance at 192.168.2.61:6379. + 2021-07-13 16:30:40,112 INFO worker.py:740 -- Connecting to existing Ray cluster at address: 192.168.2.61:6379 Active breakpoints: - 0: ray::f() | debugging.py:6 - - 1: ray::f() | debugging.py:6 - + index | timestamp | Ray task | filename:lineno + 0 | 2021-07-13 23:30:37 | ray::f() | debugging.py:6 + 1 | 2021-07-13 23:30:37 | ray::f() | debugging.py:6 Enter breakpoint index or press enter to refresh: @@ -71,11 +71,11 @@ of the execution: .. code-block:: text (Pdb) bt - /Users/pcmoritz/ray/python/ray/workers/default_worker.py(170)() + /home/ubuntu/ray/python/ray/workers/default_worker.py(170)() -> ray.worker.global_worker.main_loop() - /Users/pcmoritz/ray/python/ray/worker.py(385)main_loop() + /home/ubuntu/ray/python/ray/worker.py(385)main_loop() -> self.core_worker.run_task_loop() - > /Users/pcmoritz/tmp/debugging.py(7)f() + > /home/ubuntu/tmp/debugging.py(7)f() -> return x * x You can inspect the value of ``x`` with ``print(x)``. You can see the current source code with ``ll`` @@ -126,7 +126,7 @@ enter. This will result in the following output: .. code-block:: python Enter breakpoint index or press enter to refresh: 0 - > /Users/pcmoritz/tmp/stepping.py(14)() + > /home/ubuntu/tmp/stepping.py(14)() -> result_ref = fact.remote(5) (Pdb) @@ -141,7 +141,7 @@ the following output: *** Connection closed by remote host *** Continuing pdb session in different process... --Call-- - > /Users/pcmoritz/tmp/stepping.py(5)fact() + > /home/ubuntu/tmp/stepping.py(5)fact() -> @ray.remote (Pdb) ll 5 -> @ray.remote @@ -165,13 +165,13 @@ call site and use ``p(result)`` to print the result: .. code-block:: python Enter breakpoint index or press enter to refresh: 0 - > /Users/pcmoritz/tmp/stepping.py(14)() + > /home/ubuntu/tmp/stepping.py(14)() -> result_ref = fact.remote(5) (Pdb) remote *** Connection closed by remote host *** Continuing pdb session in different process... --Call-- - > /Users/pcmoritz/tmp/stepping.py(5)fact() + > /home/ubuntu/tmp/stepping.py(5)fact() -> @ray.remote (Pdb) p(n) 5 @@ -179,7 +179,7 @@ call site and use ``p(result)`` to print the result: *** Connection closed by remote host *** Continuing pdb session in different process... --Call-- - > /Users/pcmoritz/tmp/stepping.py(5)fact() + > /home/ubuntu/tmp/stepping.py(5)fact() -> @ray.remote (Pdb) p(n) 4 @@ -187,13 +187,13 @@ call site and use ``p(result)`` to print the result: *** Connection closed by remote host *** Continuing pdb session in different process... --Return-- - > /Users/pcmoritz/tmp/stepping.py(5)fact()->120 + > /home/ubuntu/tmp/stepping.py(5)fact()->120 -> @ray.remote (Pdb) get *** Connection closed by remote host *** Continuing pdb session in different process... --Return-- - > /Users/pcmoritz/tmp/stepping.py(14)()->None + > /home/ubuntu/tmp/stepping.py(14)()->None -> result_ref = fact.remote(5) (Pdb) p(result) 120 @@ -264,28 +264,29 @@ When the ``serve_debugging.py`` driver hits the breakpoint, it will tell you to .. code-block:: text Active breakpoints: - 0: ray::RayServeWorker_BoostingModel.handle_request() | /Users/pcmoritz/ray/python/ray/serve/backend_worker.py:249 + index | timestamp | Ray task | filename:lineno + 0 | 2021-07-13 23:49:14 | ray::RayServeWrappedReplica.handle_request() | /home/ubuntu/ray/python/ray/serve/backend_worker.py:249 Traceback (most recent call last): - File "/Users/pcmoritz/ray/python/ray/serve/backend_worker.py", line 244, in invoke_single - result = await method_to_call(arg) + File "/home/ubuntu/ray/python/ray/serve/backend_worker.py", line 242, in invoke_single + result = await method_to_call(*args, **kwargs) - File "/Users/pcmoritz/ray/python/ray/async_compat.py", line 29, in wrapper - return func(*args, **kwargs) - - File "serve_debugging.py", line 23, in __call__ + File "serve_debugging.py", line 24, in __call__ prediction = self.model.predict([payload])[0] - File "/Users/pcmoritz/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/_gb.py", line 2165, in predict + File "/home/ubuntu/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/_gb.py", line 1188, in predict raw_predictions = self.decision_function(X) - File "/Users/pcmoritz/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/_gb.py", line 2120, in decision_function + File "/home/ubuntu/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/_gb.py", line 1143, in decision_function X = check_array(X, dtype=DTYPE, order="C", accept_sparse='csr') - File "/Users/pcmoritz/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py", line 531, in check_array + File "/home/ubuntu/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py", line 63, in inner_f + return f(*args, **kwargs) + + File "/home/ubuntu/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py", line 673, in check_array array = np.asarray(array, order=order, dtype=dtype) - File "/Users/pcmoritz/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py", line 83, in asarray + File "/home/ubuntu/anaconda3/lib/python3.7/site-packages/numpy/core/_asarray.py", line 83, in asarray return array(a, dtype, copy=False, order=order) ValueError: could not convert string to float: 'a' diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index aa37a72e1ddf..06622c4d053e 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -194,6 +194,16 @@ def continue_debug_session(): time.sleep(1.0) +def format_table(table): + """Format a table as a list of lines with aligned columns.""" + result = [] + col_width = [max(len(x) for x in col) for col in zip(*table)] + for line in table: + result.append(" | ".join( + "{0:{1}}".format(x, col_width[i]) for i, x in enumerate(line))) + return result + + @cli.command() @click.option( "--address", @@ -212,13 +222,26 @@ def debug(address): active_sessions = ray.experimental.internal_kv._internal_kv_list( "RAY_PDB_") print("Active breakpoints:") - for i, active_session in enumerate(active_sessions): + sessions_data = [] + for active_session in active_sessions: data = json.loads( ray.experimental.internal_kv._internal_kv_get(active_session)) - print( - str(i) + ": " + data["proctitle"] + " | " + data["filename"] + - ":" + str(data["lineno"])) - print(data["traceback"]) + sessions_data.append(data) + sessions_data = sorted( + sessions_data, key=lambda data: data["timestamp"], reverse=True) + table = [["index", "timestamp", "Ray task", "filename:lineno"]] + for i, data in enumerate(sessions_data): + date = datetime.utcfromtimestamp( + data["timestamp"]).strftime("%Y-%m-%d %H:%M:%S") + table.append([ + str(i), date, data["proctitle"], + data["filename"] + ":" + str(data["lineno"]) + ]) + for i, line in enumerate(format_table(table)): + print(line) + if i >= 1 and not sessions_data[i - 1]["traceback"].startswith( + "NoneType: None"): + print(sessions_data[i - 1]["traceback"]) inp = input("Enter breakpoint index or press enter to refresh: ") if inp == "": print() diff --git a/python/ray/util/rpdb.py b/python/ray/util/rpdb.py index 4a47b5723f0e..06a5667aaa07 100644 --- a/python/ray/util/rpdb.py +++ b/python/ray/util/rpdb.py @@ -11,6 +11,7 @@ import select import socket import sys +import time import uuid from pdb import Pdb import setproctitle @@ -211,7 +212,8 @@ def connect_ray_pdb(host=None, "pdb_address": pdb_address, "filename": parentframeinfo.filename, "lineno": parentframeinfo.lineno, - "traceback": "\n".join(traceback.format_exception(*sys.exc_info())) + "traceback": "\n".join(traceback.format_exception(*sys.exc_info())), + "timestamp": time.time(), } _internal_kv_put( "RAY_PDB_{}".format(breakpoint_uuid), json.dumps(data), overwrite=True)