Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] eliminate hash collisions in test caching #4808

Merged
merged 2 commits into from
Nov 29, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 30 additions & 11 deletions tests/Tester.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import textwrap
import pickle
import hashlib
import hedy
import hedy_translation
Expand Down Expand Up @@ -89,10 +90,12 @@ class HedyTester(unittest.TestCase):
def setUpClass(cls):
os.environ["ENABLE_SKIP_FAULTY"] = 'True' # Always test with skipping faulty enabled

def snippet_already_tested_with_current_hedy_version(self, snippet, level):
def snippet_already_tested_with_current_hedy_version(self, test_hash):
try:
hash_language_plus_snippet_and_level = create_hash(get_hedy_source_hash(), snippet, level)
filename = get_hash_filename(hash_language_plus_snippet_and_level)
total_hash_incl_the_hedy_language = create_hash(get_hedy_source_hash(), test_hash)
if total_hash_incl_the_hedy_language is None:
return False
filename = get_hash_filename(total_hash_incl_the_hedy_language)
already_successful = os.path.isfile(filename)
return already_successful
except UnicodeEncodeError: # some tests (generated by Hypothesis) can't be hashed
Expand Down Expand Up @@ -230,7 +233,24 @@ def single_level_tester(
):
if level is None: # no level set (from the multi-tester)? grap current level from class
level = self.level
if not self.snippet_already_tested_with_current_hedy_version(code, level):

# To speed up the test executing we calculate a hash of the test.
# It is important to capture all the parameters that are passed to the function
# as sometimes we expect a snippet to fail and sometimes we expect it to succeed.
# We do this with `locals`, which captures a kwargs of this function and then we hash that.
# This means we have to collect the locals at the beginning because else there will be
# other things than arguments in the `locals()` output.

all_args = locals()
del all_args["self"]
try:
# we use pickle instead of hash for consistency across test-runs
# see PYTHONHASHSEED
test_hash = pickle.dumps(all_args)
except AttributeError:
test_hash = None

if not self.snippet_already_tested_with_current_hedy_version(test_hash):
if skipped_mappings is not None:
result = hedy.transpile(code, level, lang, skip_faulty=skip_faulty)
for skipped in skipped_mappings:
Expand Down Expand Up @@ -282,7 +302,7 @@ def single_level_tester(
self.assertTrue(extra_check_function(result))

# all ok? -> save hash!
hash_of_run = create_hash(get_hedy_source_hash(), code, level)
hash_of_run = create_hash(get_hedy_source_hash(), test_hash)
if hash_of_run:
filename = get_hash_filename(hash_of_run)
os.makedirs(os.path.dirname(filename), mode=0o777, exist_ok=True)
Expand Down Expand Up @@ -436,12 +456,11 @@ def translate_keywords_in_snippets(snippets):
return snippets


def create_hash(hedy_language, snippet, level):
try:
t = snippet + "|\n" + str(level) + "|\n" + hedy_language
return hashlib.sha1(t.encode('utf-8')).hexdigest()
except UnicodeEncodeError: # some tests can't be hashed
return ''
def create_hash(hedy_language, test_hash):
if test_hash is None:
return None
t = str(test_hash) + "|\n" + hedy_language
return hashlib.sha1(t.encode('utf-8')).hexdigest()


def get_hash_filename(input_hash):
Expand Down
Loading