Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring of ocrd_tesserocr common functionality into core #268

Merged
merged 19 commits into from
Aug 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,28 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Added:

* many utility methods for image manipulation and coordinate handling, #268, OCR-D/ocrd_tesserocr#49
* `bbox_from_points`
* `bbox_from_xywh`
* `bbox_from_polygon`
* `coordinates_for_segment`
* `coordinates_of_segment`
* `crop_image`
* `membername`
* `image_from_polygon`
* `points_from_bbox`
* `points_from_polygon`
* `points_from_xywh`
* `polygon_from_bbox`
* `polygon_from_x0y0x1y1`
* `polygon_from_xywh`
* `polygon_mask`
* `rotate_coordinates`
* `xywh_from_bbox`
* Spec-conformant handling of AlternativeImage, OCR-D/spec#116, OCR-D/ocrd_tesserocr#33, #284

Changed:

* workspace bagger will create files with extension
Expand Down Expand Up @@ -526,9 +548,12 @@ Fixed
Initial Release

<!-- link-labels -->
<<<<<<< HEAD
kba marked this conversation as resolved.
Show resolved Hide resolved
=======
kba marked this conversation as resolved.
Show resolved Hide resolved
[1.0.0b15]: ../../compare/v1.0.0b15...v1.0.0b14
[1.0.0b14]: ../../compare/v1.0.0b14...v1.0.0b13
[1.0.0b13]: ../../compare/v1.0.0b13...v1.0.0b12
>>>>>>> master
kba marked this conversation as resolved.
Show resolved Hide resolved
[1.0.0b12]: ../../compare/v1.0.0b12...v1.0.0b11
[1.0.0b11]: ../../compare/v1.0.0b11...v1.0.0b10
[1.0.0b10]: ../../compare/v1.0.0b10...v1.0.0b9
Expand Down
200 changes: 198 additions & 2 deletions ocrd/ocrd/workspace.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
# import os
import io
from os import makedirs, chdir, getcwd, unlink
from os.path import join as pjoin, isdir

import cv2
from PIL import Image
import numpy as np
from atomicwrites import atomic_write
from deprecated.sphinx import deprecated

from ocrd_models import OcrdMets, OcrdExif
from ocrd_utils import getLogger, is_local_filename, abspath, pushd_popd
from ocrd_utils import (
abspath,
coordinates_of_segment,
crop_image,
getLogger,
image_from_polygon,
is_local_filename,
polygon_from_points,
xywh_from_points,
pushd_popd,
)

from .workspace_backup import WorkspaceBackupManager

log = getLogger('ocrd.workspace')


bertsky marked this conversation as resolved.
Show resolved Hide resolved
class Workspace():
"""
A workspace is a temporary directory set up for a processor. It's the
Expand Down Expand Up @@ -194,7 +206,11 @@ def resolve_image_exif(self, image_url):
self.image_cache['exif'][image_url] = OcrdExif(Image.open(image_filename))
return self.image_cache['exif'][image_url]

@deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment")
def resolve_image_as_pil(self, image_url, coords=None):
return self._resolve_image_as_pil(image_url, coords)

def _resolve_image_as_pil(self, image_url, coords=None):
"""
Resolve an image URL to a PIL image.

Expand Down Expand Up @@ -230,3 +246,183 @@ def resolve_image_as_pil(self, image_url, coords=None):
np.min(poly[:, 0]):np.max(poly[:, 0])
]
return Image.fromarray(region_cut)

def image_from_page(self, page, page_id):
"""Extract the Page image from the workspace.

Given a PageType object, `page`, extract its PIL.Image from
AlternativeImage if it exists. Otherwise extract the PIL.Image
from imageFilename and crop it if a Border exists. Otherwise
kba marked this conversation as resolved.
Show resolved Hide resolved
just return it.

When cropping, respect any orientation angle annotated for
the page (from page-level deskewing) by rotating the
cropped image, respectively.

If the resulting page image is larger than the bounding box of
`page`, pass down the page's box coordinates with an offset of
half the width/height difference.

Return the extracted image, and the absolute coordinates of
the page's bounding box / border (for passing down), and
an OcrdExif instance associated with the original image.
"""
page_image = self._resolve_image_as_pil(page.imageFilename)
page_image_info = OcrdExif(page_image)
page_xywh = {'x': 0,
'y': 0,
'w': page_image.width,
'h': page_image.height}
# region angle: PAGE orientation is defined clockwise,
# whereas PIL/ndimage rotation is in mathematical direction:
page_xywh['angle'] = -(page.get_orientation() or 0)
# FIXME: remove PrintSpace here as soon as GT abides by the PAGE standard:
border = page.get_Border() or page.get_PrintSpace()
if border:
page_points = border.get_Coords().points
log.debug("Using explictly set page border '%s' for page '%s'",
page_points, page_id)
page_xywh = xywh_from_points(page_points)

alternative_image = page.get_AlternativeImage()
if alternative_image:
# (e.g. from page-level cropping, binarization, deskewing or despeckling)
# assumes implicit cropping (i.e. page_xywh has been applied already)
log.debug("Using AlternativeImage %d (%s) for page '%s'",
len(alternative_image), alternative_image[-1].get_comments(),
page_id)
page_image = self._resolve_image_as_pil(
alternative_image[-1].get_filename())
elif border:
# get polygon outline of page border:
page_polygon = np.array(polygon_from_points(page_points))
# create a mask from the page polygon:
page_image = image_from_polygon(page_image, page_polygon)
# recrop into page rectangle:
page_image = crop_image(page_image,
box=(page_xywh['x'],
page_xywh['y'],
page_xywh['x'] + page_xywh['w'],
page_xywh['y'] + page_xywh['h']))
if 'angle' in page_xywh and page_xywh['angle']:
log.info("About to rotate page '%s' by %.2f°",
page_id, page_xywh['angle'])
page_image = page_image.rotate(page_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor='white')
# subtract offset from any increase in binary region size over source:
page_xywh['x'] -= round(0.5 * max(0, page_image.width - page_xywh['w']))
page_xywh['y'] -= round(0.5 * max(0, page_image.height - page_xywh['h']))
return page_image, page_xywh, page_image_info

def image_from_segment(self, segment, parent_image, parent_xywh):
"""Extract a segment image from its parent's image.

Given a PIL.Image of the parent, `parent_image`, and
its absolute coordinates, `parent_xywh`, and a PAGE
segment (TextRegion / TextLine / Word / Glyph) object
logically contained in it, `segment`, extract its PIL.Image
from AlternativeImage (if it exists), or via cropping from
`parent_image`.

When cropping, respect any orientation angle annotated for
the parent (from parent-level deskewing) by compensating the
segment coordinates in an inverse transformation (translation
to center, rotation, re-translation).
Also, mind the difference between annotated and actual size
of the parent (usually from deskewing), by a respective offset
into the image. Cropping uses a polygon mask (not just the
rectangle).

When cropping, respect any orientation angle annotated for
the segment (from segment-level deskewing) by rotating the
cropped image, respectively.

If the resulting segment image is larger than the bounding box of
`segment`, pass down the segment's box coordinates with an offset
of half the width/height difference.

Return the extracted image, and the absolute coordinates of
the segment's bounding box (for passing down).
"""
segment_xywh = xywh_from_points(segment.get_Coords().points)
if 'orientation' in segment.__dict__:
# angle: PAGE orientation is defined clockwise,
# whereas PIL/ndimage rotation is in mathematical direction:
segment_xywh['angle'] = -(segment.get_orientation() or 0)
alternative_image = segment.get_AlternativeImage()
if alternative_image:
# (e.g. from segment-level cropping, binarization, deskewing or despeckling)
log.debug("Using AlternativeImage %d (%s) for segment '%s'",
len(alternative_image), alternative_image[-1].get_comments(),
segment.id)
segment_image = self._resolve_image_as_pil(
alternative_image[-1].get_filename())
else:
# get polygon outline of segment relative to parent image:
segment_polygon = coordinates_of_segment(segment, parent_image, parent_xywh)
# create a mask from the segment polygon:
segment_image = image_from_polygon(parent_image, segment_polygon)
# recrop into segment rectangle:
segment_image = crop_image(segment_image,
box=(segment_xywh['x'] - parent_xywh['x'],
segment_xywh['y'] - parent_xywh['y'],
segment_xywh['x'] - parent_xywh['x'] + segment_xywh['w'],
segment_xywh['y'] - parent_xywh['y'] + segment_xywh['h']))
# note: We should mask overlapping neighbouring segments here,
# but finding the right clipping rules can be difficult if operating
# on the raw (non-binary) image data alone: for each intersection, it
# must be decided which one of either segment or neighbour to assign,
# e.g. an ImageRegion which properly contains our TextRegion should be
# completely ignored, but an ImageRegion which is properly contained
# in our TextRegion should be completely masked, while partial overlap
# may be more difficult to decide. On the other hand, on the binary image,
# we can use connected component analysis to mask foreground areas which
# originate in the neighbouring regions. But that would introduce either
# the assumption that the input has already been binarized, or a dependency
# on some ad-hoc binarization method. Thus, it is preferable to use
# a dedicated processor for this (which produces clipped AlternativeImage
# or reduced polygon coordinates).
if 'angle' in segment_xywh and segment_xywh['angle']:
log.info("About to rotate segment '%s' by %.2f°",
segment.id, segment_xywh['angle'])
segment_image = segment_image.rotate(segment_xywh['angle'],
expand=True,
#resample=Image.BILINEAR,
fillcolor='white')
# subtract offset from any increase in binary region size over source:
segment_xywh['x'] -= round(0.5 * max(0, segment_image.width - segment_xywh['w']))
segment_xywh['y'] -= round(0.5 * max(0, segment_image.height - segment_xywh['h']))
return segment_image, segment_xywh

# pylint: disable=redefined-builtin
def save_image_file(self, image,
file_id,
page_id=None,
file_grp='OCR-D-IMG', # or -BIN?
format='PNG',
force=True):
"""Store and reference an image as file into the workspace.

Given a PIL.Image `image`, and an ID `file_id` to use in METS,
store the image under the fileGrp `file_grp` and physical page
`page_id` into the workspace (in a file name based on
the `file_grp`, `file_id` and `format` extension).

Return the (absolute) path of the created file.
"""
image_bytes = io.BytesIO()
image.save(image_bytes, format=format)
file_path = pjoin(file_grp, file_id + '.' + format.lower())
out = self.add_file(
ID=file_id,
file_grp=file_grp,
pageId=page_id,
local_filename=file_path,
mimetype='image/' + format.lower(),
content=image_bytes.getvalue(),
force=force)
log.info('created file ID: %s, file_grp: %s, path: %s',
file_id, file_grp, out.local_filename)
return file_path
4 changes: 2 additions & 2 deletions ocrd/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ bagit_profile >= 1.3.0
click >=7
requests
lxml
Pillow == 5.4.1
numpy
Pillow >= 5.3.0
opencv-python-headless
Flask
jsonschema
pyyaml
atomicwrites >= 1.3.0
Deprecated == 1.2.0
Loading