-
Notifications
You must be signed in to change notification settings - Fork 25
/
captcha_producer.py
75 lines (63 loc) · 2.58 KB
/
captcha_producer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 27 21:10:47 2016
@author: [email protected]
"""
import tensorflow as tf
import numpy as np
from captcha.image import ImageCaptcha
import re
import random
import string
import argparse
import os
from captcha_config import config
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-p",
"--path",
required=False,
help="path to store generated images")
parser.add_argument("-n",
"--number",
required=True,
help="number of images generated")
producer = ImageCaptcha(width=config['image_width'],
height=config['image_height'],
font_sizes=[40])
args = vars(parser.parse_args())
if "path" not in args:
path = config['images_path']
else:
path = args['path']
if not os.path.exists(path):
os.makedirs(path)
print("Starting generating %d images in %s" % (int(args['number']), path))
for i in range(int(args['number'])):
number_to_write = "".join([random.choice(string.digits) for _ in range(4)])
producer.write(number_to_write, os.path.join(path, str(i)+"_"+number_to_write+".png"))
print("images generated!")
print("-" * 30)
print("Staring convert tfrecords of these generated images!")
generate_tfrecords(path)
print("tfrecords generated!")
def generate_tfrecords(path):
image_names = [os.path.join(path, image_name_) for image_name_ in os.listdir(path)]
def convert_filename_to_label_str(image_name):
image_label_strs = re.findall("_(.*?)\.", image_name)[0]
image_label_np = np.array(list(image_label_strs), dtype=np.int32)
image_label_np_str = image_label_np.tostring()
return image_label_np_str
name = path.split("/")[1]
tfrecord_writer = tf.python_io.TFRecordWriter("tfrecords/" + name + ".tfrecords")
for index, image_name in enumerate(image_names):
image_label_np_str = convert_filename_to_label_str(image_name)
# print(index)
example = tf.train.Example(features=tf.train.Features(feature={
"filename": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_name])),
"label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_label_np_str]))
}))
tfrecord_writer.write(example.SerializeToString())
tfrecord_writer.close()
if __name__ == "__main__":
main()