From 342ef5ca7eb7df0ee02dfa01714b6391cdbe87bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Ho=C5=99e=C5=88ovsk=C3=BD?=
 <martin.horenovsky@gmail.com>
Date: Wed, 27 Oct 2021 14:26:07 +0200
Subject: [PATCH] Cleanup the shard integration test script

---
 tests/TestScripts/testSharding.py | 136 ++++++++++++++++++++++--------
 1 file changed, 102 insertions(+), 34 deletions(-)

diff --git a/tests/TestScripts/testSharding.py b/tests/TestScripts/testSharding.py
index fcea03e2e1..eb3fad9935 100644
--- a/tests/TestScripts/testSharding.py
+++ b/tests/TestScripts/testSharding.py
@@ -5,86 +5,154 @@
 This is done by running the binary multiple times, once to list all the tests,
 once per shard to list the tests for that shard, and once again per shard to
 execute the tests. The sharded lists are compared to the full list to ensure
-none are skipped, duplicated, and that the order remains the same. This process
-is repeated for multiple command line argument combinations to ensure sharding
-works with different filters and test orderings.
+none are skipped, duplicated, and that the order remains the same.
 """
 
+import random
 import subprocess
 import sys
 import xml.etree.ElementTree as ET
 
 from collections import namedtuple
 
+from typing import List, Dict
+
+seed = random.randint(0, 2 ** 32 - 1)
+number_of_shards = 5
+
 def make_base_commandline(self_test_exe):
     return [
         self_test_exe,
         '--reporter', 'xml',
-        "--shard-count", "5",
-        "--shard-index", "2",
+        '--order', 'rand',
+        '--rng-seed', str(seed),
         "[generators]~[benchmarks]~[.]"
     ]
 
-def list_tests(self_test_exe):
-    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
-
-    process = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = process.communicate()
-    if stderr:
-        raise RuntimeError("Unexpected error output:\n" + process.stderr)
 
-    root = ET.fromstring(stdout)
+def list_tests(self_test_exe: str, extra_args: List[str] = None):
+    cmd = make_base_commandline(self_test_exe) + ['--list-tests']
+    if extra_args:
+        cmd.extend(extra_args)
+
+    try:
+        ret = subprocess.run(cmd,
+                             stdout = subprocess.PIPE,
+                             stderr = subprocess.PIPE,
+                             timeout = 10,
+                             check = True,
+                             universal_newlines = True)
+    except subprocess.CalledProcessError as ex:
+        print('Could not list tests:\n{}'.format(ex.stderr))
+
+    if ret.stderr:
+        raise RuntimeError("Unexpected error output:\n" + ret.stderr)
+
+    root = ET.fromstring(ret.stdout)
     result = [elem.text for elem in root.findall('./TestCase/Name')]
 
     if len(result) < 2:
         raise RuntimeError("Unexpectedly few tests listed (got {})".format(
             len(result)))
 
+
     return result
 
 
-def execute_tests(self_test_exe):
+def execute_tests(self_test_exe: str, extra_args: List[str] = None):
     cmd = make_base_commandline(self_test_exe)
-
-    process = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = process.communicate()
-    if stderr:
+    if extra_args:
+        cmd.extend(extra_args)
+
+    try:
+        ret = subprocess.run(cmd,
+                             stdout = subprocess.PIPE,
+                             stderr = subprocess.PIPE,
+                             timeout = 10,
+                             check = True,
+                             universal_newlines = True)
+    except subprocess.CalledProcessError as ex:
+        print('Could not list tests:\n{}'.format(ex.stderr))
+
+    if ret.stderr:
         raise RuntimeError("Unexpected error output:\n" + process.stderr)
 
-    root = ET.fromstring(stdout)
+    root = ET.fromstring(ret.stdout)
     result = [elem.attrib["name"] for elem in root.findall('./TestCase')]
 
     if len(result) < 2:
         raise RuntimeError("Unexpectedly few tests listed (got {})".format(
             len(result)))
+
     return result
 
 
-def check_listed_and_executed_tests_match(listed_tests, executed_tests):
-        listed_names = set(listed_tests)
-        executed_names = set(executed_tests)
+def test_sharded_listing(self_test_exe: str) -> Dict[int, List[str]]:
+    """
+    Asks the test binary for list of all tests, and also for lists of
+    tests from shards.
+
+    The combination of shards is then checked whether it corresponds to
+    the full list of all tests.
 
-        listed_string = "\n".join(listed_names)
-        exeucted_string = "\n".join(executed_names)
+    Returns the dictionary of shard-index => listed tests for later use.
+    """
+    all_tests = list_tests(self_test_exe)
+    big_shard_tests = list_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
 
-        assert listed_names == executed_names, (
-            "Executed tests do not match the listed tests:\nExecuted:\n{}\n\nListed:\n{}".format(exeucted_string, listed_string)
+    assert all_tests == big_shard_tests, (
+        "No-sharding test list does not match the listing of big shard:\nNo shard:\n{}\n\nWith shard:\n{}\n".format(
+            '\n'.join(all_tests),
+            '\n'.join(big_shard_tests)
         )
+    )
 
+    shard_listings = dict()
+    for shard_idx in range(number_of_shards):
+        shard_listings[shard_idx] = list_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 
-def test_sharding(self_test_exe):
-    listed_tests = list_tests(self_test_exe)
-    executed_tests = execute_tests(self_test_exe)
+    shard_sizes = [len(v) for v in shard_listings.values()]
+    assert len(all_tests) == sum(shard_sizes)
 
-    check_listed_and_executed_tests_match(listed_tests, executed_tests)
+    # Check that the shards have roughly the right sizes (e.g. we don't
+    # have all tests in single shard and the others are empty)
+    differences = [abs(x1 - x2) for x1, x2 in zip(shard_sizes, shard_sizes[1:])]
+    assert all(diff <= 1 for diff in differences), "A shard has weird size: {}".format(shard_sizes)
+
+    combined_shards = [inner for outer in shard_listings.values() for inner in outer]
+    assert all_tests == combined_shards, (
+        "All tests and combined shards disagree.\nNo shard:\n{}\n\nCombined:\n{}\n\n".format(
+            '\n'.join(all_tests),
+            '\n'.join(combined_shards)
+        )
+    )
+    shard_listings[-1] = all_tests
+
+    return shard_listings
+
+
+def test_sharded_execution(self_test_exe: str, listings: Dict[int, List[str]]):
+    """
+    Runs the test binary and checks that the executed tests match the
+    previously listed tests.
+
+    Also does this for various shard indices, and that the combination
+    of all shards matches the full run/listing.
+    """
+    all_tests = execute_tests(self_test_exe)
+    big_shard_tests = execute_tests(self_test_exe, ['--shard-count', '1', '--shard-index', '0'])
+    assert all_tests == big_shard_tests
+
+    assert listings[-1] == all_tests
+
+    for shard_idx in range(number_of_shards):
+        assert listings[shard_idx] == execute_tests(self_test_exe, ['--shard-count', str(number_of_shards), '--shard-index', str(shard_idx)])
 
 
 def main():
     self_test_exe, = sys.argv[1:]
-
-    test_sharding(self_test_exe)
+    listings = test_sharded_listing(self_test_exe)
+    test_sharded_execution(self_test_exe, listings)
 
 if __name__ == '__main__':
     sys.exit(main())