From 1c1c5a37ef0849a859476ae6096f7edc584b523f Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 18 Jul 2024 14:03:41 -0700 Subject: [PATCH 1/5] GH-120754: Remove unbounded reads from zipfile Read without a size may read an unbounded amount of data + allocate unbounded size buffers. Move to capped size reads to prevent potential issues. --- Lib/zipfile/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index e2aaf8bab4913d..085f805c34bac2 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -309,7 +309,7 @@ def _EndRecData(fpin): fpin.seek(-sizeEndCentDir, 2) except OSError: return None - data = fpin.read() + data = fpin.read(sizeEndCentDir + 1) if (len(data) == sizeEndCentDir and data[0:4] == stringEndArchive and data[-2:] == b"\000\000"): @@ -331,7 +331,7 @@ def _EndRecData(fpin): # number does not appear in the comment. maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) fpin.seek(maxCommentStart, 0) - data = fpin.read() + data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir + 1) start = data.rfind(stringEndArchive) if start >= 0: # found the magic number; attempt to unpack and interpret From 08cdd79831858b9357ca744f0dea42c5ea7a7946 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 22 Jul 2024 11:33:45 -0700 Subject: [PATCH 2/5] Update Lib/zipfile/__init__.py Co-authored-by: Daniel Hillier --- Lib/zipfile/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 085f805c34bac2..84a1b4d2db10ed 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -329,7 +329,7 @@ def _EndRecData(fpin): # record signature. The comment is the last item in the ZIP file and may be # up to 64K long. It is assumed that the "end of central directory" magic # number does not appear in the comment. - maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) + maxCommentStart = max(filesize - ZIP_MAX_COMMENT - sizeEndCentDir, 0) fpin.seek(maxCommentStart, 0) data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir + 1) start = data.rfind(stringEndArchive) From 8961b0b0a4dfdef1cf1b78fcb6575343015d1d84 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 22 Jul 2024 11:34:25 -0700 Subject: [PATCH 3/5] Update Lib/zipfile/__init__.py Co-authored-by: Daniel Hillier --- Lib/zipfile/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 84a1b4d2db10ed..3375254c5f56fe 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -331,7 +331,7 @@ def _EndRecData(fpin): # number does not appear in the comment. maxCommentStart = max(filesize - ZIP_MAX_COMMENT - sizeEndCentDir, 0) fpin.seek(maxCommentStart, 0) - data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir + 1) + data = fpin.read(ZIP_MAX_COMMENT + sizeEndCentDir) start = data.rfind(stringEndArchive) if start >= 0: # found the magic number; attempt to unpack and interpret From 6dc8a0b188d99905315a70717bb6797d0db38e23 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Mon, 22 Jul 2024 19:16:46 -0700 Subject: [PATCH 4/5] Don't require reading an EOF The seek just before this _should_ result in this read hitting EOF here or getting less bytes. Co-authored-by: Daniel Hillier --- Lib/zipfile/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 3375254c5f56fe..08c83cfb760250 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -309,7 +309,7 @@ def _EndRecData(fpin): fpin.seek(-sizeEndCentDir, 2) except OSError: return None - data = fpin.read(sizeEndCentDir + 1) + data = fpin.read(sizeEndCentDir) if (len(data) == sizeEndCentDir and data[0:4] == stringEndArchive and data[-2:] == b"\000\000"): From 0d6151e1f67ad48f393d840fe0e269362a0d23f9 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 02:24:51 +0000 Subject: [PATCH 5/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2024-07-23-02-24-50.gh-issue-120754.nHb5mG.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-23-02-24-50.gh-issue-120754.nHb5mG.rst diff --git a/Misc/NEWS.d/next/Library/2024-07-23-02-24-50.gh-issue-120754.nHb5mG.rst b/Misc/NEWS.d/next/Library/2024-07-23-02-24-50.gh-issue-120754.nHb5mG.rst new file mode 100644 index 00000000000000..6c33e7b7ec7716 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-23-02-24-50.gh-issue-120754.nHb5mG.rst @@ -0,0 +1 @@ +Update unbounded ``read`` calls in :mod:`zipfile` to specify an explicit ``size`` putting a limit on how much data they may read. This also updates handling around ZIP max comment size to match the standard instead of reading comments that are one byte too long.