You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Scanning contents ━━━━━━━━━━━━━━━━━━╺━━━━━━━━━━━━━━━━━━━━━ 45% 40/88 0:00:01
An exception occurred while executing the pipeline _common.py:284
Traceback (most recent call last):
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/_pipelines/_c
ommon.py", line 249, in cli_exception_handler
return fn(options, plugin_manager)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/_pipelines/oc
r.py", line 174, in _run_pipeline
pdfinfo = get_pdfinfo(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/_pipeline.py"
, line 186, in get_pdfinfo
return PdfInfo(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 1133, in __init__
self._pages = _pdf_pageinfo_concurrent(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 793, in _pdf_pageinfo_concurrent
executor(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/_concurrent.p
y", line 78, in __call__
self._execute(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/builtin_plugi
ns/concurrency.py", line 144, in _execute
result = future.result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 451, in
result
return self.__get_result()
File "/usr/lib/python3.10/concurrent/futures/_base.py", line 403, in
__get_result
raise self._exception
File "/usr/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 742, in _pdf_pageinfo_sync
return PageInfo(pdf, pageno, infile, check_pages, detailed_analysis)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 857, in __init__
self._gather_pageinfo(pdf, pageno, infile, check_pages,
detailed_analysis)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 908, in _gather_pageinfo
for info in _process_content_streams(
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 653, in _process_content_streams
yield from _find_regular_images(container, contentsinfo)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 569, in _find_regular_images
yield ImageInfo(name=draw.name, pdfimage=pdfimage,
shorthand=draw.shorthand)
File
"/home/terrapin/.local/lib/python3.10/site-packages/ocrmypdf/pdfinfo/info.
py", line 369, in __init__
pim = PdfImage(pdfimage)
File
"/home/terrapin/.local/lib/python3.10/site-packages/pikepdf/models/image.p
y", line 831, in __init__
self._jpxpil = self.as_pil_image()
File
"/home/terrapin/.local/lib/python3.10/site-packages/pikepdf/models/image.p
y", line 740, in as_pil_image
return Image.open(bio)
File "/home/terrapin/.local/lib/python3.10/site-packages/PIL/Image.py",
line 3323, in open
im = _open_core(
File "/home/terrapin/.local/lib/python3.10/site-packages/PIL/Image.py",
line 3304, in _open_core
im = factory(fp, filename)
File
"/home/terrapin/.local/lib/python3.10/site-packages/PIL/ImageFile.py",
line 137, in __init__
self._open()
File
"/home/terrapin/.local/lib/python3.10/site-packages/PIL/Jpeg2KImagePlugin.
py", line 224, in _open
header = _parse_jp2_header(self.fp)
File
"/home/terrapin/.local/lib/python3.10/site-packages/PIL/Jpeg2KImagePlugin.
py", line 185, in _parse_jp2_header
palette.getcolor(header.read_fields(">" + ("B" * npc)))
File
"/home/terrapin/.local/lib/python3.10/site-packages/PIL/ImagePalette.py",
line 144, in getcolor
raise ValueError(msg)
ValueError: cannot add non-opaque RGBA color to RGB palette
Steps to reproduce
1. Run `ocrmypdf 1.pdf 1-ocr.pdf`
2. Get a stacktrace
I have observed in testing on two different machines that this issue does not seem to reproduce on ocrmypdf version 16.2.0, but on 16.4.2 I can provide 50+ documents that result in this stacktrace.
Describe the bug
Steps to reproduce
Files
This PDF is in public domain
1.pdf
How did you download and install the software?
Linux package manager (apt, dnf, etc.)
OCRmyPDF version
16.4.2
Relevant log output
No response
The text was updated successfully, but these errors were encountered: