Skip to content

Commit

Permalink
gh-111495: improve test coverage of codecs C API (GH-126030)
Browse files Browse the repository at this point in the history
For now, skip some crashers (tracked in gh-123378).
  • Loading branch information
picnixz authored Nov 1, 2024
1 parent 6c67446 commit 32e07fd
Showing 1 changed file with 115 additions and 23 deletions.
138 changes: 115 additions & 23 deletions Lib/test/test_capi/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,49 @@ def test_codec_stream_writer(self):

class CAPICodecErrors(unittest.TestCase):

@classmethod
def _generate_exception_args(cls):
for objlen in range(5):
maxind = 2 * max(2, objlen)
for start in range(-maxind, maxind + 1):
for end in range(-maxind, maxind + 1):
yield objlen, start, end

@classmethod
def generate_encode_errors(cls):
return tuple(
UnicodeEncodeError('utf-8', '0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def generate_decode_errors(cls):
return tuple(
UnicodeDecodeError('utf-8', b'0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def generate_translate_errors(cls):
return tuple(
UnicodeTranslateError('0' * objlen, start, end, 'why')
for objlen, start, end in cls._generate_exception_args()
)

@classmethod
def setUpClass(cls):
cls.unicode_encode_errors = cls.generate_encode_errors()
cls.unicode_decode_errors = cls.generate_decode_errors()
cls.unicode_translate_errors = cls.generate_translate_errors()
cls.all_unicode_errors = (
cls.unicode_encode_errors
+ cls.unicode_decode_errors
+ cls.unicode_translate_errors
)
cls.bad_unicode_errors = (
ValueError(),
)

def test_codec_register_error(self):
# for cleaning up between tests
from _codecs import _unregister_error as _codecs_unregister_error
Expand Down Expand Up @@ -780,33 +823,82 @@ def test_codec_lookup_error(self):
self.assertIs(codec_lookup_error('ignore'), codecs.ignore_errors)
self.assertIs(codec_lookup_error('replace'), codecs.replace_errors)
self.assertIs(codec_lookup_error('xmlcharrefreplace'), codecs.xmlcharrefreplace_errors)
self.assertIs(codec_lookup_error('backslashreplace'), codecs.backslashreplace_errors)
self.assertIs(codec_lookup_error('namereplace'), codecs.namereplace_errors)
self.assertRaises(LookupError, codec_lookup_error, 'unknown')

def test_codec_error_handlers(self):
exceptions = [
# A UnicodeError with an empty message currently crashes:
# See: https://github.com/python/cpython/issues/123378
# UnicodeEncodeError('bad', '', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'x', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 0, 1, 'reason'),
UnicodeEncodeError('bad', 'xyz123', 1, 4, 'reason'),
]

strict_handler = _testcapi.codec_strict_errors
def test_codec_strict_errors_handler(self):
handler = _testcapi.codec_strict_errors
for exc in self.all_unicode_errors + self.bad_unicode_errors:
with self.subTest(handler=handler, exc=exc):
self.assertRaises(type(exc), handler, exc)

def test_codec_ignore_errors_handler(self):
handler = _testcapi.codec_ignore_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_replace_errors_handler(self):
handler = _testcapi.codec_replace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_xmlcharrefreplace_errors_handler(self):
handler = _testcapi.codec_xmlcharrefreplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def test_codec_backslashreplace_errors_handler(self):
handler = _testcapi.codec_backslashreplace_errors
self.do_test_codec_errors_handler(handler, self.all_unicode_errors)

def test_codec_namereplace_errors_handler(self):
handler = _testlimitedcapi.codec_namereplace_errors
self.do_test_codec_errors_handler(handler, self.unicode_encode_errors)

def do_test_codec_errors_handler(self, handler, exceptions):
at_least_one = False
for exc in exceptions:
with self.subTest(handler=strict_handler, exc=exc):
self.assertRaises(UnicodeEncodeError, strict_handler, exc)

for handler in [
_testcapi.codec_ignore_errors,
_testcapi.codec_replace_errors,
_testcapi.codec_xmlcharrefreplace_errors,
_testlimitedcapi.codec_namereplace_errors,
]:
for exc in exceptions:
with self.subTest(handler=handler, exc=exc):
self.assertIsInstance(handler(exc), tuple)
# See https://github.com/python/cpython/issues/123378 and related
# discussion and issues for details.
if self._exception_may_crash(exc):
continue

at_least_one = True
with self.subTest(handler=handler, exc=exc):
# test that the handler does not crash
self.assertIsInstance(handler(exc), tuple)

if exceptions:
self.assertTrue(at_least_one, "all exceptions are crashing")

for bad_exc in (
self.bad_unicode_errors
+ tuple(e for e in self.all_unicode_errors if e not in exceptions)
):
with self.subTest('bad type', handler=handler, exc=bad_exc):
self.assertRaises(TypeError, handler, bad_exc)

@classmethod
def _exception_may_crash(cls, exc):
"""Indicate whether a Unicode exception might currently crash
the interpreter when used by a built-in codecs error handler.
Until gh-123378 is fixed, we skip the tests for these exceptions.
This should only be used by "do_test_codec_errors_handler".
"""
message, start, end = exc.object, exc.start, exc.end
match exc:
case UnicodeEncodeError():
return end < start or (end - start) >= len(message)
case UnicodeDecodeError():
# The case "end - start >= len(message)" does not crash.
return end < start
case UnicodeTranslateError():
# Test "end <= start" because PyCodec_ReplaceErrors checks
# the Unicode kind of a 0-length string which by convention
# is PyUnicode_1BYTE_KIND and not PyUnicode_2BYTE_KIND as
# the handler currently expects.
return end <= start or (end - start) >= len(message)
return False


if __name__ == "__main__":
Expand Down

0 comments on commit 32e07fd

Please sign in to comment.