-
Notifications
You must be signed in to change notification settings - Fork 9
/
prepare.py
46 lines (39 loc) · 1.24 KB
/
prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
import logging as log
import glob
import numpy as np
import os.path as path
from scipy import misc
def prepare_dataset(folder):
log.info("loading images from %s ...", folder)
# read all images into an array
paths = glob.glob(path.join(folder, '*.png'))
images = [misc.imread(path) for path in paths]
images = np.asarray(images)
# normalize pixels to [0.0, 1.0]
images = images / 255
n_images = images.shape[0]
labels = np.zeros(n_images)
log.info("vectorializing %d samples ...", n_images)
# get label from filename
for i in range(n_images):
filename = path.basename(paths[i])[0]
labels[i] = int(filename[0])
# create the flattened training matrix
dataset = []
for i in range(n_images):
X = images[i].flatten()
y = labels[i]
# label first
dataset.append(np.insert(X, 0, y, axis=0))
return pd.DataFrame(dataset)
def prepare_input(path):
log.debug("vectorializing %s ...", path)
x = misc.imread(path)
log.debug("shape : %s", x.shape)
x = misc.imresize(x, (20, 20))
log.debug("resized : %s", x.shape)
x = x / 255
x = x.flatten()
log.debug("flat : %s", x.shape)
return np.asarray([x])