Skip to content

Commit

Permalink
ROB : fix errors/warnings on no /resources with extract_text
Browse files Browse the repository at this point in the history
fix  py-pdf#1272 (in text) and py-pdf#1269 (in Xform)
  • Loading branch information
pubpub-zz committed Aug 25, 2022
1 parent 2ddc48a commit 755023d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
5 changes: 4 additions & 1 deletion PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,10 @@ def _extract_text(
cmaps: Dict[
str, Tuple[str, float, Union[str, Dict[int, str]], Dict[str, str]]
] = {}
resources_dict = cast(DictionaryObject, obj["/Resources"])
try:
resources_dict = cast(DictionaryObject, obj["/Resources"])
except Exception:
return "" # no resources means no text is possible (no font)
if "/Font" in resources_dict:
for f in cast(DictionaryObject, resources_dict["/Font"]):
cmaps[f] = build_char_map(f, space_width, obj)
Expand Down
9 changes: 8 additions & 1 deletion tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ def test_extract_text_single_quote_op():
page.extract_text()


def test_no_ressources_on_text_extract():
url = "https://raw.githubusercontent.com/eagletrt/wiki/0f3f16309604f665a47595c890d15af1b3aec6d6/fenice-telemetry-tx/PCB%20Outputs/Pdf/Edge%20Mount%20SMA/TelemetryTX_EM.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name="tika-964029.pdf")))
for page in reader.pages:
page.extract_text()


def test_iss_1142():
# check fix for problem of context save/restore (q/Q)
url = "https://github.com/py-pdf/PyPDF2/files/9150656/ST.2019.PDF"
Expand Down Expand Up @@ -280,7 +287,7 @@ def test_extract_text_page_pdf_impossible_decode_xform(caplog):
for page in reader.pages:
page.extract_text()
warn_msgs = normalize_warnings(caplog.text)
assert warn_msgs == [" impossible to decode XFormObject /Meta203"]
assert warn_msgs == [""] # text extraction recognise no texg


def test_extract_text_operator_t_star(): # L1266, L1267
Expand Down

0 comments on commit 755023d

Please sign in to comment.