From 089dd9b94924cccb85ba1affccc4b0c8907f192c Mon Sep 17 00:00:00 2001 From: Kai Fricke Date: Thu, 26 Aug 2021 12:13:15 +0200 Subject: [PATCH] [release] Add release logs for 1.6.0 (#18067) --- benchmarks/distributed/test_many_actors.py | 2 +- benchmarks/distributed/test_many_pgs.py | 5 +- benchmarks/distributed/test_many_tasks.py | 2 +- .../object_store.txt | 0 .../single_node.txt | 0 .../1.6.0/benchmarks/many_actors.txt | 1 + .../1.6.0/benchmarks/many_nodes.txt | 1 + .../1.6.0/benchmarks/many_pgs.txt | 1 + release/release_logs/1.6.0/microbenchmark.txt | 28 +++++++++++ .../1.6.0/scalability/object_store.txt | 1 + .../1.6.0/scalability/single_node.txt | 23 +++++++++ .../1.6.0/stress_tests/dead_actors.txt | 50 +++++++++++++++++++ .../1.6.0/stress_tests/many_tasks.txt | 50 +++++++++++++++++++ .../1.6.0/stress_tests/placement_group.txt | 50 +++++++++++++++++++ 14 files changed, 209 insertions(+), 5 deletions(-) rename release/release_logs/1.5.0/{sclability => scalability}/object_store.txt (100%) rename release/release_logs/1.5.0/{sclability => scalability}/single_node.txt (100%) create mode 100644 release/release_logs/1.6.0/benchmarks/many_actors.txt create mode 100644 release/release_logs/1.6.0/benchmarks/many_nodes.txt create mode 100644 release/release_logs/1.6.0/benchmarks/many_pgs.txt create mode 100644 release/release_logs/1.6.0/microbenchmark.txt create mode 100644 release/release_logs/1.6.0/scalability/object_store.txt create mode 100644 release/release_logs/1.6.0/scalability/single_node.txt create mode 100644 release/release_logs/1.6.0/stress_tests/dead_actors.txt create mode 100644 release/release_logs/1.6.0/stress_tests/many_tasks.txt create mode 100644 release/release_logs/1.6.0/stress_tests/placement_group.txt diff --git a/benchmarks/distributed/test_many_actors.py b/benchmarks/distributed/test_many_actors.py index 52a42bfa5e80..9b24a55402e3 100644 --- a/benchmarks/distributed/test_many_actors.py +++ b/benchmarks/distributed/test_many_actors.py @@ -46,7 +46,7 @@ def no_resource_leaks(): rate = MAX_ACTORS_IN_CLUSTER / (end_time - start_time) -print(f"Sucess! Started {MAX_ACTORS_IN_CLUSTER} actors in " +print(f"Success! Started {MAX_ACTORS_IN_CLUSTER} actors in " f"{end_time - start_time}s. ({rate} actors/s)") if "TEST_OUTPUT_JSON" in os.environ: diff --git a/benchmarks/distributed/test_many_pgs.py b/benchmarks/distributed/test_many_pgs.py index 6e9dfb0190f9..41ac73844ba7 100644 --- a/benchmarks/distributed/test_many_pgs.py +++ b/benchmarks/distributed/test_many_pgs.py @@ -72,9 +72,8 @@ def no_resource_leaks(): rate = MAX_PLACEMENT_GROUPS / (end_time - start_time) -print( - f"Sucess! Started {MAX_PLACEMENT_GROUPS} pgs in {end_time - start_time}s. " - f"({rate} pgs/s)") +print(f"Success! Started {MAX_PLACEMENT_GROUPS} pgs in " + f"{end_time - start_time}s. ({rate} pgs/s)") if "TEST_OUTPUT_JSON" in os.environ: out_file = open(os.environ["TEST_OUTPUT_JSON"], "w") diff --git a/benchmarks/distributed/test_many_tasks.py b/benchmarks/distributed/test_many_tasks.py index e811c6b9ec50..419cb8453a49 100644 --- a/benchmarks/distributed/test_many_tasks.py +++ b/benchmarks/distributed/test_many_tasks.py @@ -61,7 +61,7 @@ def test(num_tasks): rate = num_tasks / (end_time - start_time - sleep_time) - print(f"Sucess! Started {num_tasks} tasks in {end_time - start_time}s. " + print(f"Success! Started {num_tasks} tasks in {end_time - start_time}s. " f"({rate} tasks/s)") if "TEST_OUTPUT_JSON" in os.environ: diff --git a/release/release_logs/1.5.0/sclability/object_store.txt b/release/release_logs/1.5.0/scalability/object_store.txt similarity index 100% rename from release/release_logs/1.5.0/sclability/object_store.txt rename to release/release_logs/1.5.0/scalability/object_store.txt diff --git a/release/release_logs/1.5.0/sclability/single_node.txt b/release/release_logs/1.5.0/scalability/single_node.txt similarity index 100% rename from release/release_logs/1.5.0/sclability/single_node.txt rename to release/release_logs/1.5.0/scalability/single_node.txt diff --git a/release/release_logs/1.6.0/benchmarks/many_actors.txt b/release/release_logs/1.6.0/benchmarks/many_actors.txt new file mode 100644 index 000000000000..55bdaeac2de3 --- /dev/null +++ b/release/release_logs/1.6.0/benchmarks/many_actors.txt @@ -0,0 +1 @@ +Success! Started 10000 actors in 30.97493314743042s. (322.84169758828256 actors/s) diff --git a/release/release_logs/1.6.0/benchmarks/many_nodes.txt b/release/release_logs/1.6.0/benchmarks/many_nodes.txt new file mode 100644 index 000000000000..fe9766ca5c9a --- /dev/null +++ b/release/release_logs/1.6.0/benchmarks/many_nodes.txt @@ -0,0 +1 @@ +Success! Started 1000 tasks in 652.3915314674377s. (2.837752643588723 tasks/s) diff --git a/release/release_logs/1.6.0/benchmarks/many_pgs.txt b/release/release_logs/1.6.0/benchmarks/many_pgs.txt new file mode 100644 index 000000000000..3851f435d57b --- /dev/null +++ b/release/release_logs/1.6.0/benchmarks/many_pgs.txt @@ -0,0 +1 @@ +Success! Started 1000 pgs in 60.29739260673523s. (16.584465045148566 pgs/s) diff --git a/release/release_logs/1.6.0/microbenchmark.txt b/release/release_logs/1.6.0/microbenchmark.txt new file mode 100644 index 000000000000..af5bd577675f --- /dev/null +++ b/release/release_logs/1.6.0/microbenchmark.txt @@ -0,0 +1,28 @@ +single client get calls per second 35445.18 +- 479.15 +single client put calls per second 37315.16 +- 201.45 +multi client put calls per second 166250.88 +- 982.89 +single client get calls (Plasma Store) per second 9894.23 +- 32.1 +single client put calls (Plasma Store) per second 6311.68 +- 26.46 +multi client put calls (Plasma Store) per second 8193.5 +- 255.16 +single client put gigabytes per second 19.31 +- 5.35 +multi client put gigabytes per second 35.46 +- 1.04 +single client tasks sync per second 1488.44 +- 18.2 +single client tasks async per second 13546.95 +- 235.34 +multi client tasks async per second 39337.24 +- 1659.78 +1:1 actor calls sync per second 2192.24 +- 14.88 +1:1 actor calls async per second 5904.3 +- 152.57 +1:1 actor calls concurrent per second 5342.01 +- 82.08 +1:n actor calls async per second 16097.03 +- 354.48 +n:n actor calls async per second 41152.98 +- 2660.84 +n:n actor calls with arg async per second 6681.45 +- 227.53 +1:1 async-actor calls sync per second 1494.21 +- 13.16 +1:1 async-actor calls async per second 3350.12 +- 38.39 +1:1 async-actor calls with args async per second 2233.81 +- 44.68 +1:n async-actor calls async per second 14958.35 +- 107.36 +n:n async-actor calls async per second 31716.54 +- 3552.69 +client: get calls per second 1608.18 +- 21.76 +client: put calls per second 874.96 +- 14.19 +client: remote put calls per second 52981.5 +- 368.59 +client: 1:1 actor calls sync per second 510.19 +- 4.2 +client: 1:1 actor calls async per second 555.63 +- 4.73 +client: 1:1 actor calls concurrent per second 555.76 +- 4.45 diff --git a/release/release_logs/1.6.0/scalability/object_store.txt b/release/release_logs/1.6.0/scalability/object_store.txt new file mode 100644 index 000000000000..c6bfd8d50b87 --- /dev/null +++ b/release/release_logs/1.6.0/scalability/object_store.txt @@ -0,0 +1 @@ +Broadcast time: 605.0678841490001 (1073741824 B x 50 nodes) diff --git a/release/release_logs/1.6.0/scalability/single_node.txt b/release/release_logs/1.6.0/scalability/single_node.txt new file mode 100644 index 000000000000..de7d1bd9a505 --- /dev/null +++ b/release/release_logs/1.6.0/scalability/single_node.txt @@ -0,0 +1,23 @@ +Finished many args +Finished many returns +Putting test objects: +Getting objects +Asserting correctness +Done with dese +Putting test objects: +Getting objects +Asserting correctness +Done with zero copy +Finished ray.get on many objects +Submitting many tasks +Unblocking tasks +Finished queueing many tasks +Generating object +Putting object +Getting object +Done +Many args time: 13.604580292000009 (10000 args) +Many returns time: 5.815769784000025 (3000 returns) +Ray.get time: 26.803473274999988 (10000 args) +Queued task time: 153.99888931100003 (1000000 tasks) +Ray.get large object time: 261.104296373 (107374182400 bytes) diff --git a/release/release_logs/1.6.0/stress_tests/dead_actors.txt b/release/release_logs/1.6.0/stress_tests/dead_actors.txt new file mode 100644 index 000000000000..d4a55bda6917 --- /dev/null +++ b/release/release_logs/1.6.0/stress_tests/dead_actors.txt @@ -0,0 +1,50 @@ +(pid=3615, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor +(pid=3615, ip=172.31.66.245) File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor +(pid=3615, ip=172.31.66.245) return method(__ray_actor, *args, **kwargs) +(pid=3615, ip=172.31.66.245) File "stress_tests/test_dead_actors.py", line 28, in ping +(pid=3615, ip=172.31.66.245) SystemExit: -1 +2021-08-25 00:26:11,358 WARNING worker.py:1215 -- A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffffd8b5032c3d4f7a82841bf0d701000000 Worker ID: 4aae639cb0517941ee6a76f510cf203cc3e6238c1a4959133b94e75f Node ID: 2cda12d8ae2a084551458cb2a351fcbef83cbbc299b1ef080a474133 Worker IP address: 172.31.82.117 Worker port: 10124 Worker PID: 3021 +(pid=3021, ip=172.31.82.117) 2021-08-25 00:26:11,349 ERROR worker.py:428 -- SystemExit was raised from the worker +(pid=3021, ip=172.31.82.117) Traceback (most recent call last): +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 640, in ray._raylet.task_execution_handler +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 525, in ray._raylet.execute_task +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 532, in ray._raylet.execute_task +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 536, in ray._raylet.execute_task +(pid=3021, ip=172.31.82.117) File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor +(pid=3021, ip=172.31.82.117) File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor +(pid=3021, ip=172.31.82.117) return method(__ray_actor, *args, **kwargs) +(pid=3021, ip=172.31.82.117) File "stress_tests/test_dead_actors.py", line 28, in ping +(pid=3021, ip=172.31.82.117) SystemExit: -1 +2021-08-25 00:26:12,652 WARNING worker.py:1215 -- A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff0a017da19802a9ca28988a1701000000 Worker ID: 6de11bdd1e48682d0df1414a30306ab5962f1a811762f6483b213245 Node ID: c4c0d6c36c195a2df56f8aa08332c20e3303df4e348de0163d987741 Worker IP address: 172.31.66.245 Worker port: 10154 Worker PID: 3657 +(pid=3657, ip=172.31.66.245) 2021-08-25 00:26:12,647 ERROR worker.py:428 -- SystemExit was raised from the worker +(pid=3657, ip=172.31.66.245) Traceback (most recent call last): +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 640, in ray._raylet.task_execution_handler +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 525, in ray._raylet.execute_task +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 532, in ray._raylet.execute_task +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 536, in ray._raylet.execute_task +(pid=3657, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor +(pid=3657, ip=172.31.66.245) File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor +(pid=3657, ip=172.31.66.245) return method(__ray_actor, *args, **kwargs) +(pid=3657, ip=172.31.66.245) File "stress_tests/test_dead_actors.py", line 28, in ping +(pid=3657, ip=172.31.66.245) SystemExit: -1 +2021-08-25 00:26:13,907 WARNING worker.py:1215 -- A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffffe62057dbd62605f076de5d3301000000 Worker ID: 8bcf9d4ecc267670d785b6c406cee8853f9d62f40d8fe4d83b23b7c0 Node ID: c4c0d6c36c195a2df56f8aa08332c20e3303df4e348de0163d987741 Worker IP address: 172.31.66.245 Worker port: 10155 Worker PID: 3702 +INFO:__main__:Finished trial 99 +Finished in: 141.16861081123352s +Average iteration time: 1.411683669090271s +Max iteration time: 3.7091996669769287s +Min iteration time: 0.027825593948364258s +PASSED. +(pid=3702, ip=172.31.66.245) 2021-08-25 00:26:13,898 ERROR worker.py:428 -- SystemExit was raised from the worker +(pid=3702, ip=172.31.66.245) Traceback (most recent call last): +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 640, in ray._raylet.task_execution_handler +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 525, in ray._raylet.execute_task +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 532, in ray._raylet.execute_task +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 536, in ray._raylet.execute_task +(pid=3702, ip=172.31.66.245) File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor +(pid=3702, ip=172.31.66.245) File "/home/ray/anaconda3/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor +(pid=3702, ip=172.31.66.245) return method(__ray_actor, *args, **kwargs) +(pid=3702, ip=172.31.66.245) File "stress_tests/test_dead_actors.py", line 28, in ping +(pid=3702, ip=172.31.66.245) SystemExit: -1 diff --git a/release/release_logs/1.6.0/stress_tests/many_tasks.txt b/release/release_logs/1.6.0/stress_tests/many_tasks.txt new file mode 100644 index 000000000000..a2be8d3f315a --- /dev/null +++ b/release/release_logs/1.6.0/stress_tests/many_tasks.txt @@ -0,0 +1,50 @@ +INFO:__main__:Submitted 0 +INFO:__main__:Submitted 0 +INFO:__main__:Submitted 800 +INFO:__main__:Submitted 1600 +INFO:__main__:Submitted 2400 +INFO:__main__:Submitted 3200 +INFO:__main__:Submitted 4000 +INFO:__main__:Submitted 4800 +INFO:__main__:Submitted 5600 +INFO:__main__:Submitted 6400 +INFO:__main__:Submitted 7200 +INFO:__main__:Submitted 8000 +INFO:__main__:Submitted 8800 +INFO:__main__:Submitted 9600 +INFO:__main__:Submitted 10400 +INFO:__main__:Submitted 11200 +INFO:__main__:Submitted 12000 +INFO:__main__:Submitted 12800 +INFO:__main__:Submitted 13600 +INFO:__main__:Submitted 14400 +INFO:__main__:Submitted 15200 +INFO:__main__:Submitted 16000 +INFO:__main__:Submitted 16800 +INFO:__main__:Submitted 17600 +INFO:__main__:Submitted 18400 +INFO:__main__:Submitted 19200 +INFO:__main__:Submitted 20000 +INFO:__main__:Submitted 20800 +INFO:__main__:Submitted 21600 +INFO:__main__:Submitted 22400 +INFO:__main__:Submitted 23200 +INFO:__main__:Submitted 24000 +INFO:__main__:Submitted 24800 +INFO:__main__:Submitted 25600 +INFO:__main__:Submitted 26400 +INFO:__main__:Submitted 27200 +INFO:__main__:Submitted 28000 +INFO:__main__:Submitted 28800 +INFO:__main__:Submitted 29600 +INFO:__main__:Submitted 30400 +INFO:__main__:Submitted 31200 +INFO:__main__:Finished stage 3 in 5.727146863937378 seconds. +INFO:__main__:Scheduling many tasks for spillback. +INFO:__main__:Spread: 1.1657548800000086 Last: 642.309683662 First: 641.143928782 +INFO:__main__:Spread: 1.114729925000006 Last: 642.193462636 First: 641.078732711 +INFO:__main__:Spread: 1.1137836179999567 Last: 641.053006067 First: 639.939222449 +INFO:__main__:Spread: 1.1135457709999628 Last: 642.067508841 First: 640.95396307 +INFO:__main__:Spread: 1.1130653850000272 Last: 641.806138764 First: 640.693073379 +INFO:__main__:Avg spread: 1.1241759157999922 +PASSED. diff --git a/release/release_logs/1.6.0/stress_tests/placement_group.txt b/release/release_logs/1.6.0/stress_tests/placement_group.txt new file mode 100644 index 000000000000..1e4e71126b8c --- /dev/null +++ b/release/release_logs/1.6.0/stress_tests/placement_group.txt @@ -0,0 +1,50 @@ +INFO:__main__:remove_group iteration 622 +INFO:__main__:remove_group iteration 623 +INFO:__main__:remove_group iteration 624 +INFO:__main__:remove_group iteration 625 +INFO:__main__:remove_group iteration 626 +INFO:__main__:remove_group iteration 627 +INFO:__main__:remove_group iteration 628 +INFO:__main__:remove_group iteration 629 +INFO:__main__:remove_group iteration 630 +INFO:__main__:remove_group iteration 631 +INFO:__main__:remove_group iteration 632 +INFO:__main__:remove_group iteration 633 +INFO:__main__:remove_group iteration 634 +INFO:__main__:remove_group iteration 635 +INFO:__main__:remove_group iteration 636 +INFO:__main__:remove_group iteration 637 +INFO:__main__:remove_group iteration 638 +INFO:__main__:remove_group iteration 639 +INFO:__main__:remove_group iteration 640 +INFO:__main__:remove_group iteration 641 +INFO:__main__:remove_group iteration 642 +INFO:__main__:remove_group iteration 643 +INFO:__main__:remove_group iteration 644 +INFO:__main__:remove_group iteration 645 +INFO:__main__:remove_group iteration 646 +INFO:__main__:remove_group iteration 647 +INFO:__main__:remove_group iteration 648 +INFO:__main__:remove_group iteration 649 +INFO:__main__:remove_group iteration 650 +INFO:__main__:remove_group iteration 651 +INFO:__main__:remove_group iteration 652 +INFO:__main__:remove_group iteration 653 +INFO:__main__:remove_group iteration 654 +INFO:__main__:remove_group iteration 655 +INFO:__main__:remove_group iteration 656 +INFO:__main__:remove_group iteration 657 +INFO:__main__:remove_group iteration 658 +INFO:__main__:remove_group iteration 659 +INFO:__main__:remove_group iteration 660 +INFO:__main__:remove_group iteration 661 +INFO:__main__:remove_group iteration 662 +INFO:__main__:remove_group iteration 663 +INFO:__main__:remove_group iteration 664 +INFO:__main__:remove_group iteration 665 +2021-08-25 00:24:46,958 WARNING worker.py:1215 -- WARNING: 8 PYTHON worker processes have been started on node: 4bc8d5ab28cef16a5ad3d30c4502a260d1e1df3d588225a348183a5b with address: 172.31.50.22. This could be a result of using a large number of actors, or due to tasks blocked in ray.get() calls (see https://github.com/ray-project/ray/issues/3644 for some discussion of workarounds). +2021-08-25 00:24:48,990 WARNING worker.py:1215 -- WARNING: 10 PYTHON worker processes have been started on node: 4bc8d5ab28cef16a5ad3d30c4502a260d1e1df3d588225a348183a5b with address: 172.31.50.22. This could be a result of using a large number of actors, or due to tasks blocked in ray.get() calls (see https://github.com/ray-project/ray/issues/3644 for some discussion of workarounds). +2021-08-25 00:24:50,791 WARNING worker.py:1215 -- WARNING: 12 PYTHON worker processes have been started on node: 4bc8d5ab28cef16a5ad3d30c4502a260d1e1df3d588225a348183a5b with address: 172.31.50.22. This could be a result of using a large number of actors, or due to tasks blocked in ray.get() calls (see https://github.com/ray-project/ray/issues/3644 for some discussion of workarounds). +Avg placement group creating time: 0.6881522147153959 ms +Avg placement group removing time: 4.041917145644407 ms +PASSED.