Skip to content

Commit

Permalink
MAINT: Validate PDF magic byte in strict mode (#814)
Browse files Browse the repository at this point in the history
Closes #626
  • Loading branch information
MartinThoma authored Apr 24, 2022
1 parent 63b4c91 commit 5bc7219
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
6 changes: 6 additions & 0 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1831,6 +1831,12 @@ def read(self, stream):
stream.seek(-1, 2)
if not stream.tell():
raise PdfReadError('Cannot read an empty file')
if self.strict:
stream.seek(0, 0)
header_byte = stream.read(5)
if header_byte != b"%PDF-":
raise PdfReadError("PDF starts with '{}', but '%PDF-' expected".format(header_byte.decode("utf8")))
stream.seek(-1, 2)
last1M = stream.tell() - 1024 * 1024 + 1 # offset of last MB of stream
line = b_('')
while line[:5] != b_("%%EOF"):
Expand Down
3 changes: 1 addition & 2 deletions Tests/test_papersizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ def test_din_a0():
area_square_inch = area_square_pixels / 72**2

# 25.4 millimeter is equal to 1 inches
area_square_mm = area_square_inch * (25.4)**2
area_square_mm = area_square_inch * (25.4) ** 2
assert abs(area_square_mm - 999949) < 100
conversion_factor = 72 / 25.4
assert (dim.width - 841 * conversion_factor) < 1
assert (dim.width - 1189 * conversion_factor) < 1



@pytest.mark.parametrize("dimensions", papersizes._din_a)
def test_din_a_ratio(dimensions):
assert abs(dimensions.height - dimensions.width * 2**0.5) <= 2.5
Expand Down
8 changes: 7 additions & 1 deletion Tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,9 +338,15 @@ def test_read_empty():
assert exc.value.args[0] == "Cannot read an empty file"


def test_read_malformed():
def test_read_malformed_header():
with pytest.raises(PdfReadError) as exc:
PdfFileReader(io.BytesIO(b"foo"))
assert exc.value.args[0] == "PDF starts with 'foo', but '%PDF-' expected"


def test_read_malformed_body():
with pytest.raises(PdfReadError) as exc:
PdfFileReader(io.BytesIO(b"%PDF-"))
assert exc.value.args[0] == "Could not read malformed PDF file"


Expand Down

0 comments on commit 5bc7219

Please sign in to comment.