-
Notifications
You must be signed in to change notification settings - Fork 3
/
mainProcessing.py
185 lines (141 loc) · 6.4 KB
/
mainProcessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import numpy as np
import cv2
import matplotlib.pyplot as plt
import argparse
import pageBordering
import lineRemoval
import circleRemoval
import connectedComponentsProcessing
import lineClustering
import wordAnalysis
def img_resize(img, scale):
"""Resize an image, keeping aspect ratio, according to a decimal scale percentage."""
if scale == 1:
return img
dsize = (int(img.shape[1] * scale), int(img.shape[0] * scale))
return cv2.resize(img, dsize)
def restricted_float(x):
"""
Takes an argparsed argument x and ensures it is a float in the range (0, 1].
Credit: https://stackoverflow.com/questions/12116685/how-can-i-require-my-python-scripts-argument-to-be-a-float-between-0-0-1-0-usin
"""
try:
x = float(x)
except ValueError:
raise argparse.ArgumentTypeError("%r not a floating-point literal" % (x,))
if x <= 0.0 or x > 1.0:
raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]"%(x,))
return x
def save_intermediate_img(config, img, name, plot=False):
"""Save an intermediate preprocessing image given the config fields."""
if config['inter']:
path = config['inter_path'] + '/' + str(config['inter_saved']) + name + ".png"
if not plot:
cv2.imwrite(path, img_resize(img, scale=config['inter_scale']))
else:
plt.savefig(path)
plt.figure(config['inter_saved'])
config['inter_saved'] += 1
def get_canny(img, c=(40, 50), apertureSize=3):
"""Perform canny edge detection."""
return cv2.Canny(img, c[0], c[1], apertureSize=apertureSize)
class ProcessedPage:
"""
A class to handle the preprocessing pipeline and final output.
Attributes
----------
config : dict
A dictionary describing a data loading/saving configuration.
img : np.ndarray
The original image loaded from file.
cleaned : np.ndarray
A cleaned image of img.
canny : np.ndarray
A canny edges image of cleaned.
lines : list[Line]
A list of Line objects containing information about each line.
Methods
-------
preprocess_page():
Preprocess the image into a cleaned image and a canny image.
clean_page():
Cleans the image by bordering it and removing any page holes/lines.
get_words():
Creates lines and word classes and images.
"""
def __init__(self, config):
self.config = config
self.img = cv2.imread(config['image'])
self.config['save_inter_func'](self.config, self.img, "img")
self.cleaned, self.canny = self.preprocess_page()
self.get_words()
def preprocess_page(self):
"""Preprocess the image into a cleaned image and a canny image."""
# Clean the image
cleaned = self.clean_page()
# Convert to grayscale
gray = cv2.cvtColor(cleaned, cv2.COLOR_BGR2GRAY)
self.config['save_inter_func'](self.config, gray, "gray")
# Blur the gray-scale image
blurred = cv2.medianBlur(gray, 5)
self.config['save_inter_func'](self.config, blurred, "blurred")
# Perform canny edge detection
canny = get_canny(blurred)
self.config['save_inter_func'](self.config, canny, "canny")
return cleaned, canny
def clean_page(self):
"""Cleans the image by bordering it and removing any page holes/lines."""
# Border the image to page
error, bordered = pageBordering.page_border(self.img.copy())
if error:
raise Exception("The image provided could not be bordered.")
self.config['save_inter_func'](self.config, bordered, "bordered")
# Removes page holes
holes_removed = circleRemoval.page_hole_removal(bordered)
self.config['save_inter_func'](self.config, holes_removed, "holes_removed")
# Remove lines on lined paper (repeating for multiple iterations gives better results)
lines_removed = holes_removed
for i in range(3):
lines_removed, gray = lineRemoval.lines_removal(lines_removed)
self.config['save_inter_func'](self.config, lines_removed, "lines_removed")
return lines_removed
def get_words(self):
"""Creates and returns word images."""
components = connectedComponentsProcessing.connected_components(self.canny, self.config)
line_components = lineClustering.line_clustering(components, self.config)
self.lines = wordAnalysis.get_words_in_line(self.cleaned, components, \
line_components, self.config)
# To save all word images, iterate lines, words in a line, and images corresponding to a word
if self.config['words_path'] is not None:
for i, line in enumerate(self.lines):
for j, word in enumerate(line.words):
for k, img in enumerate(word.images):
cv2.imwrite(self.config['words_path'] + "/word{}_{}-{}.jpg".format(i, j, k), img)
def preprocess(image_path, words_path, intermediate_path, scale=1):
"""
Preprocess an image and return the word images.
Parameters:
image_path (str): Path to the image to preprocess
save_path (str): Path to save preprocessing-related images as well as word images
intermediate_path (str), optional: Path to save intermediate preprocessing images
Returns:
(list): A list of lines, where each element is a list of word images in each line list
"""
config = dict({'image': image_path, 'words_path': words_path, \
'inter_path': intermediate_path, 'inter_saved': 0, \
'inter': intermediate_path != None, 'inter_scale': scale, \
'save_inter_func': save_intermediate_img})
return ProcessedPage(config)
if __name__ == "__main__":
# Parse command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('image', type=str, help='Load image from path')
parser.add_argument('-w', '--words', type=str, help='Save path for word images', \
default=None)
parser.add_argument('-i', '--intermediate', type=str, \
help='Save path for intermediate preprocessing images', default=None)
parser.add_argument('-s', '--scale', type=restricted_float, \
help='Scale of saved intermediate preprocessing images', default=1)
args = parser.parse_args()
# Get word images
processed = preprocess(args.image, args.words, args.intermediate, scale=args.scale)