forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pythongh-99612: Fix PyUnicode_DecodeUTF8Stateful() for ASCII-only data (
pythonGH-99613) Previously *consumed was not set in this case.
- Loading branch information
1 parent
d460c8e
commit f08e52c
Showing
4 changed files
with
95 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import unittest | ||
from test.support import import_helper | ||
|
||
_testcapi = import_helper.import_module('_testcapi') | ||
|
||
|
||
class CAPITest(unittest.TestCase): | ||
|
||
def test_decodeutf8(self): | ||
"""Test PyUnicode_DecodeUTF8()""" | ||
decodeutf8 = _testcapi.unicode_decodeutf8 | ||
|
||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||
b = s.encode('utf-8') | ||
self.assertEqual(decodeutf8(b), s) | ||
self.assertEqual(decodeutf8(b, 'strict'), s) | ||
|
||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') | ||
self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') | ||
self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') | ||
|
||
self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') | ||
# TODO: Test PyUnicode_DecodeUTF8() with NULL as data and | ||
# negative size. | ||
|
||
def test_decodeutf8stateful(self): | ||
"""Test PyUnicode_DecodeUTF8Stateful()""" | ||
decodeutf8stateful = _testcapi.unicode_decodeutf8stateful | ||
|
||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||
b = s.encode('utf-8') | ||
self.assertEqual(decodeutf8stateful(b), (s, len(b))) | ||
self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) | ||
|
||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) | ||
|
||
self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') | ||
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and | ||
# negative size. | ||
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of | ||
# "consumed". | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
2 changes: 2 additions & 0 deletions
2
Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Fix :c:func:`PyUnicode_DecodeUTF8Stateful` for ASCII-only data: | ||
``*consumed`` was not set. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters