Skip to content

Commit

Permalink
Merge pull request #3959 from amitdo/amitdo-pdf-Ignore-non-text-blocks
Browse files Browse the repository at this point in the history
pdfrenderer.cpp: Ignore non-text blocks
  • Loading branch information
stweil authored Nov 10, 2022
2 parents c01ddc0 + c196456 commit fd83f3d
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/publictypes.h> // for PTIsTextType()
#include <tesseract/renderer.h>
#include <cmath>
#include <cstring>
Expand Down Expand Up @@ -354,6 +355,12 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
const std::unique_ptr</*non-const*/ ResultIterator> res_it(api->GetIterator());
while (!res_it->Empty(RIL_BLOCK)) {
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
auto block_type = res_it->BlockType();
if (!PTIsTextType(block_type)) {
// ignore non-text blocks
res_it->Next(RIL_BLOCK);
continue;
}
pdf_str << "BT\n3 Tr"; // Begin text object, use invisible ink
old_fontsize = 0; // Every block will declare its fontsize
new_block = true; // Every block will declare its affine matrix
Expand Down

0 comments on commit fd83f3d

Please sign in to comment.