Skip to content
This repository has been archived by the owner on Oct 21, 2024. It is now read-only.

Convert saved drawing images/files (png/jpg) to same numpy (npy) bitmaps for prediction #34

Open
shubhank008 opened this issue Jun 7, 2019 · 2 comments

Comments

@shubhank008
Copy link

I have been scratching my head for over 5 days now trying various models and code repos and still have not been able to make it work. The model trains well and evals well but I am failing at actual predictions.
Instead of models based on drawing strokes, I have been playing with models using actual drawing images to predict (like a image classifier) and most of these models use the numpy bitmaps dataset (npy files).

Everything is well and good except the part to feed the model drawing from a saved image file (since most of these articles or code repos fed it via canvas or JS or android). I tried to replicate their prediction code (mainly image processing) as much as I can in python but the predictions are still way way wrong.

Here is my image processing and prediction code:

from PIL import Image
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from random import randint
from scipy.misc.pilutil import imsave, imread, imresize
%matplotlib inline  

clock = qd.get_drawing("circle")
apple = clock
apple.image.save("apple.png")


mypath = "data/"
txt_name_list = []
for (dirpath, dirnames, filenames) in walk(mypath):
        if filenames != '.DS_Store':
            txt_name_list.extend(filenames)
            break
    
    

def adjust_gamma(image, gamma=1.5):
   invGamma = 1.0 / gamma
   table = np.array([((i / 255.0) ** invGamma) * 255
      for i in np.arange(0, 256)]).astype("uint8")

   return cv.LUT(image, table)


def preprocess(img):
    # for sketch & not canvas drawings use the following:

    gray = cv.bilateralFilter(img, 9, 75, 75)
    #
    gray = cv.erode(gray, None, iterations=1)
    #
    gray = adjust_gamma(gray, 1.1)
    #return gray

    th3 = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY_INV, 11, 2)
    #th3 = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY_INV, 11, 2)
    return th3
  
  
  
#img = apple.image.convert("L")

#imgData = request.get_data()
#convertImage(imgData)
print("debug")

x = imread('apple.png', mode='L')

x = preprocess(x)

#x = cv.bitwise_not(x)



x = imresize(x, (32, 32))

x = x.astype('float32')
x /= 255

x = x.reshape(1, 32, 32, 1)

print(txt_name_list)
#print(x)

out = model.predict(x)
#print(out)
print(np.argmax(out, axis=1))
index = np.array(np.argmax(out, axis=1))
index = index[0]

print(txt_name_list[index])

plt.imshow(x.squeeze()) 

There is quite a difference between how image looks in numpy dataset and how it comes after I process it.

chrome_2019-06-06_10-01-14

chrome_2019-06-06_10-01-27

Here is my full model:

from __future__ import print_function
import  numpy  as  np
import matplotlib.pyplot as plt
from  sklearn.model_selection  import train_test_split
from os import walk, getcwd
import h5py
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
import cv2 as cv
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, AveragePooling2D
from keras import backend as K
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.metrics import confusion_matrix

#For Multi GPU
from keras.utils import multi_gpu_model
from keras import metrics

batch_size = 128

epochs = 40

img_rows, img_cols = 28, 28

mypath = "data/"
txt_name_list = []

#slice_train = 30500
slice_train = 10000

def top_3_acc(y_true, y_pred):
    return metrics.top_k_categorical_accuracy(y_true, y_pred, k=3)

def readData():
    x_train = []
    x_test = []
    y_train = []
    y_test = []
    xtotal = []
    ytotal = []
    x_val = []
    y_val = []

    for (dirpath, dirnames, filenames) in walk(mypath):
        if filenames != '.DS_Store':
            txt_name_list.extend(filenames)
            break

    #print(mypath)
    i=0
    classescount = 0

    for txt_name in txt_name_list:
        txt_path = mypath + txt_name
        x = np.load(txt_path)
        print(txt_name)
        print(i)
        classescount += 1
        x = x.astype('float32') / 255.  ##scale images
        y = [i] * len(x)
        x = x[:slice_train]
        y = y[:slice_train]

        if i != 0:
            xtotal = np.concatenate((x, xtotal), axis=0)
            ytotal = np.concatenate((y, ytotal), axis=0)
        else:
            xtotal = x
            ytotal = y
        i += 1

    print(classescount)
    print("xshape = ", xtotal.shape)
    print("yshape = ", ytotal.shape)
    x_train, x_test, y_train, y_test = train_test_split(xtotal, ytotal, test_size=0.3, random_state=42)
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=1)

    return x_train, x_val, x_test, y_train, y_val, y_test, classescount


def lenet(x_train, x_val, x_test, y_train, y_val, y_test, num_classes):
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
        x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
        x_val = x_val.reshape(x_val.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
        x_val = x_val.reshape(x_val.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    # more reshaping
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_val = x_val.astype('float32')
    x_train /= 255
    x_test /= 255
    x_val /= 255

    # convert class vectors
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    y_val = keras.utils.to_categorical(y_val, num_classes)

    x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    x_val = np.pad(x_val, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')

    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_val.shape[0], 'validation samples')
    print(x_test.shape[0], 'test samples')

    print(y_train.shape)

    print(input_shape)

    model = Sequential()

    model.add(Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 1)))
    model.add(AveragePooling2D())

    model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
    model.add(AveragePooling2D())

    model.add(Flatten())

    model.add(Dense(units=120, activation='relu'))

    model.add(Dense(units=84, activation='relu'))

    model.add(Dense(units=num_classes, activation='softmax'))

    filepath = "saved/weightslenet.{epoch:02d}.h5"
    ES = EarlyStopping(patience=5)
    check = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, mode='max')

    #model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy', top_3_acc])
    #Trying Multi GPU
    #model = multi_gpu_model(model, gpus=2)
    #model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(x_val, y_val), callbacks=[ES, check])
    #model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1, validation_data=(x_val, y_val), callbacks=[ES, check])

    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    model.save('cnnOld2.h5')
    print("Saved model to disk")
    #
    # cm = metrics.confusion_matrix(test_batch.classes, y_pred)
    # # or
    # # cm = np.array([[1401,    0],[1112, 0]])
    #
    # plt.imshow(cm, cmap=plt.cm.Blues)
    # plt.xlabel("Predicted labels")
    # plt.ylabel("True labels")
    # plt.xticks([], [])
    # plt.yticks([], [])
    # plt.title('Confusion matrix ')
    # plt.colorbar()
    # plt.show()
    print(y_test)

    loaded_model = keras.models.load_model('cnnOld2.h5', custom_objects={"top_3_acc": top_3_acc})
    print("test")
    #y_pred = loaded_model.predict_on_batch(x_test)
    #score = loaded_model.evaluate(x_test, y_test, verbose=0)

    y_pred = loaded_model.predict(x_test)
    print(y_pred)

    indexes = np.argmax(y_pred, axis=1)
    i=0
    for y in y_pred:
        y[y<1000]=0
        # print("allzero",y)
        y[indexes[i]] = 1
        i+=1

    cm = confusion_matrix(
        y_test.argmax(axis=1), y_pred.argmax(axis=1))
    acc = accuracy_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), normalize=True, sample_weight=None)
    cr = classification_report(y_test.argmax(axis=1), y_pred.argmax(axis=1))
    print(cm)
    print(acc)
    print(cr)

def main():
    x_train, x_val, x_test, y_train, y_val, y_test, num_classes = readData()
    lenet(x_train, x_val, x_test, y_train, y_val, y_test, num_classes)

if __name__ == '__main__':
    main()
@shubhank008
Copy link
Author

Another and different approach I tried with same problem running predictions on saved images. This model also works on android but cannot seem to process image file same way to get prediction

https://colab.research.google.com/github/zaidalyafeai/zaidalyafeai.github.io/blob/master/sketcher/Sketcher.ipynb

@rachitanayak
Copy link

Hi, were you able to do the conversion to .npy format?

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants