From c6acbb530e72bdc88fd6c40c435f73c8c83747fe Mon Sep 17 00:00:00 2001 From: Cuong Nguyen Date: Mon, 10 Apr 2023 16:59:35 -0700 Subject: [PATCH] @aslonnie's comments Signed-off-by: Cuong Nguyen --- release/ray_release/reporter/db.py | 24 +++++++++++++++---- release/ray_release/tests/test_db_reporter.py | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/release/ray_release/reporter/db.py b/release/ray_release/reporter/db.py index 67b78758bbaf..9b8a42e66b1f 100644 --- a/release/ray_release/reporter/db.py +++ b/release/ray_release/reporter/db.py @@ -19,13 +19,13 @@ def __init__(self): def compute_crash_pattern(self, logs: str) -> str: stack_trace = self._compute_stack_trace(logs.splitlines()) - return self._compute_unique_pattern(stack_trace)[:CRASH_PATTERN_MAX_LENGTH] + return self._compute_signature(stack_trace)[:CRASH_PATTERN_MAX_LENGTH] - def _compute_unique_pattern(self, stack_trace: List[str]) -> str: + def _compute_signature(self, stack_trace: List[str]) -> str: """ - Compute unique pattern from stack trace, by remove factors such as date, time, - temp directory, line numbers, etc. This help to aggregate similar logs into - same bug patterns + Compute signature pattern from stack trace, by remove factors such as date, + time, temp directory, line numbers, etc. This help to aggregate similar logs + into same bug patterns """ massaged_trace = [] for line in stack_trace: @@ -54,34 +54,48 @@ def _compute_stack_trace(self, logs: List[str]) -> List[str]: while i < len(logs): stack = [] trace = error_stacktrace + # Search for lines that are either + # ... ERROR ... + # or + # ... ERROR ... + # Traceback (most recent call last): if "ERROR" in logs[i]: stack.append(logs[i]) next = i + 1 if i + 1 < len(logs) and logs[i + 1].startswith("Traceback"): stack.append(logs[i + 1]) next = i + 2 + # Or if the line with ERROR does not exist, just search for the line with + # Traceback (most recent call last): elif logs[i].startswith("Traceback"): stack.append(logs[i]) trace = stacktrace next = i + 1 + # Or else, skip this line and continue else: i = i + 1 continue + # If the line that contains ERROR, Traceback, etc. is found, scan the logs + # until the line no longer has indentation. This is because stack trace + # is always indented, and stops when the line is no longer indented while next < len(logs): if logs[next].startswith((" ", "\t")): stack.append(logs[next]) next = next + 1 else: break + # Finished capturing the entire stack trace if next < len(logs): stack.append(logs[next]) if stack: trace.append(stack) i = next + 1 + # Favor stack trace that contains the ERROR keyword if error_stacktrace: return error_stacktrace[-1] + # Otherwise any stack trace is fine if stacktrace: return stacktrace[-1] diff --git a/release/ray_release/tests/test_db_reporter.py b/release/ray_release/tests/test_db_reporter.py index be7482ac5b88..92871dffd014 100644 --- a/release/ray_release/tests/test_db_reporter.py +++ b/release/ray_release/tests/test_db_reporter.py @@ -16,8 +16,8 @@ def test_compute_stack_pattern(): ) -def test_compute_unique_pattern(): - assert (DBReporter())._compute_unique_pattern( +def test_compute_signature(): + assert (DBReporter())._compute_signature( [ "Traceback (most recent call last):", ' File "/tmp/something", line 584',