Skip to content

Commit

Permalink
BUG: Process CMYK images without a filter correctly (#2557)
Browse files Browse the repository at this point in the history
Closes #2522
  • Loading branch information
pubpub-zz authored Mar 30, 2024
1 parent 42f970e commit 7883580
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
11 changes: 9 additions & 2 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,16 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes,
".tiff",
False,
)
elif mode == "CMYK":
img, image_format, extension, invert_color = (
Image.frombytes(mode, size, data),
"TIFF",
".tif",
False,
)
elif mode == "":
raise PdfReadError(f"ColorSpace field not found in {x_object_obj}")
else:
if mode == "":
raise PdfReadError(f"ColorSpace field not found in {x_object_obj}")
img, image_format, extension, invert_color = (
Image.frombytes(mode, size, data),
"PNG",
Expand Down
9 changes: 9 additions & 0 deletions tests/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,12 @@ def test_bi_in_text():
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
assert reader.pages[0].images.keys() == ["~0~"]
assert reader.pages[0].images[0].name == "~0~.png"


@pytest.mark.enable_socket()
def test_cmyk_no_filter():
"""Cf #2522"""
url = "https://github.com/py-pdf/pypdf/files/14614887/out3.pdf"
name = "iss2522.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
reader.pages[0].images[0].image

0 comments on commit 7883580

Please sign in to comment.