diff --git a/recognition/45838464-improvedUNET-ISIC/README.MD b/recognition/45838464-improvedUNET-ISIC/README.MD new file mode 100644 index 0000000000..01c9f23194 --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/README.MD @@ -0,0 +1,41 @@ +# Skin Mole Segmentation on the ISIC2017 data set using the improved UNet. + +## Author +Name: Michael Smith +Student Number: 45838464 + +## Problem Description +Image segmentation is important in the current scene of medicine as it allows patients to gain important information without the immediate presence of a doctor. This is especially beneficial for people who don't have reliable access to a hospital or GP. Image segmentation is the process of separating an image into its constituent classes. In this example the two classes are the skin and the mole. The goal is to segment a test data set of lesions and compare it to the manually segemented images. The degree of similariy is the dice coeffiecient (described here https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient) and this model should aim to achieve a dice coefficient higher than 0.8. + +## Why Improved UNet and How it Works +The improved UNet model is an autoencoder network that provides boosts in performance over the original UNeting: "... architecture of the context pathway, normalization schemes, number of featuremaps throughout the network, nonlinearity and the structure of the upsampling +pathway". Both networks are made up of encoder and decoder submodules, the input image is encoded down to a low dimension latent space, then is decoded up to segmentation map. The detailed architecture is shown in the figure below. + +![image](./report_images/improved-unet-diagram.png) + +Each block can be built by layers provided in the tensorflow.keras.layers library. The details for the blocks (context, localisation, segmentaion etc.) are in modules.py. These submodules are then combine into the full ImprovedUNet model which is at the end of modules.py. + +The An important difference between this model and the paper model is that this problem is using 2D data not 3D. The input is a 128x128x3 array (3 for r,g,b channels) and the ouput is 128x128x2 (2 for each class). + +## Data Preprocessing +The ISIC data needs to be preprocessed in order for optimal performace of the model. Firstly the "superpixel" files were removed using script commands. The image files were transformed to tensorflow arrays using the io and image libraries from tensorflow. The data preprossing function are located in dataset.py. Fortunately the ISIC 2017 data has already been seperated into training, validationa and testing splits and so they can be used directly. Training size is 2000, validation size is 150, testing size is 600. Note, once the data has been preprocessed once it helps to save these arrays using np.save(). This will prevent wasting time for preprocessing. These arrays can be loaded with np.load(). + +## Training Results +The model was trained with a batch size of 32 and run for 30 epochs. The dice similarity is shown to be sufficiently heigh. + +![image](./report_images/dice_accuracy.png) + +![image](./report_images/loss.png) + +## Dependecies +python 3.9.13 +tensorflow 2.9.1 +matlplotlib 3.5.2 +numpy 1.23.3 + +To use GPU when training + +tensorflow-gpu 2.6.0 + +## References +Isensee, F., Kickingereder, P., Wick, W., Bendszus, M., & Maier-Hein, K. H. (2018, February 28). Brain tumor segmentation and radiomics survival prediction: Contribution to the brats 2017 challenge. arXiv.org. Retrieved October 19, 2022, from https://arxiv.org/abs/1802.10508v1 \ No newline at end of file diff --git a/recognition/45838464-improvedUNET-ISIC/dataset.py b/recognition/45838464-improvedUNET-ISIC/dataset.py new file mode 100644 index 0000000000..3e3eec8f22 --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/dataset.py @@ -0,0 +1,47 @@ +import tensorflow as tf +from tensorflow.keras.utils import to_categorical +import numpy as np +import glob + +def preprocess_data(path): + """ + Returns an array of the image data + """ + + images = [] + image_locations = sorted(glob.glob(path)) + + for file in image_locations: + + # load image + image = tf.io.read_file(file) + image = tf.io.decode_jpeg(image, channels=3) + + # resize and normalize + image = tf.image.resize_with_pad(image, 128, 128) + image = image / 255.0 + images.append(image) + + images = np.array(images) + return images + +def preprocess_masks(path): + + masks = [] + mask_locations = sorted(glob.glob(path)) + + for file in mask_locations: + + # load mask + mask = tf.io.read_file(file) + mask = tf.io.decode_png(mask, channels=1) + + # resize and normalize + mask = tf.image.resize_with_pad(mask, 128, 128) + mask = mask / 255.0 + masks.append(mask) + + # convert to one-hot encoding + masks = np.array(masks) + masks = to_categorical(masks) + return masks diff --git a/recognition/45838464-improvedUNET-ISIC/modules.py b/recognition/45838464-improvedUNET-ISIC/modules.py new file mode 100644 index 0000000000..4895966bf2 --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/modules.py @@ -0,0 +1,109 @@ +import tensorflow as tf +from tensorflow.keras.layers import Conv2D,LeakyReLU,Dropout,UpSampling2D,Input, concatenate, Add +from tensorflow.keras import Model +import tensorflow_addons as tfa +import numpy as np + +IMAGE_SIZE = 128 +KERNEL_SIZE = 3 +FIRST_DEPTH = 16 +DROPOUT = 0.3 +activation_func = LeakyReLU(alpha=0.01) + +def context_module(input, depth): + """ + From "Brain Tumor Segmentation and Radiomics + Survival Prediction: Contribution to the BRATS + 2017 Challenge" -> "Each context module is in fact a pre-activation residual block [13] with two + 3x3x3 convolutional layers and a dropout layer (pdrop = 0.3) in between. + """ + block = tfa.layers.InstanceNormalization()(input) + block = Conv2D(depth, KERNEL_SIZE, padding="same", activation=activation_func)(block) + block = Dropout(DROPOUT)(block) + block = tfa.layers.InstanceNormalization()(block) + block = Conv2D(depth, KERNEL_SIZE, padding='same', activation=activation_func)(block) + return block + +def localization_module(input, depth): + """ + "A localization module + consists of a 3x3x3 convolution followed by a 1x1x1 convolution that halves the + number of feature maps." + """ + block = Conv2D(depth, KERNEL_SIZE, padding = 'same', activation=activation_func)(input) + block = Conv2D(depth, (1, 1), padding = 'same', activation=activation_func)(block) + return block + +def encoding_layer(input, depth, stride): + """ + Building block for the encoder network as decribed in the paper. + """ + conv = Conv2D(depth, KERNEL_SIZE, padding = 'same',activation=activation_func, strides=stride)(input) + contxt = context_module(conv, depth) + add = Add()([conv, contxt]) + + return add + +def decoding_layer(input, add, depth): + """ + Decoding building block as described in the paper. + "... which is done by + means of a simple upscale that repeats the feature voxels twice in each spatial + dimension, followed by a 3x3x3 convolution that halves the number of feature + maps" + + """ + block = UpSampling2D(size=(2, 2))(input) + block = Conv2D(depth, KERNEL_SIZE, activation=activation_func, padding = 'same')(block) + print(block.shape) + print(add.shape) + + conc = concatenate([block, add]) + loc = localization_module(conc, depth) + + return loc + +def Improved_UNet(): + """ + Improved UNet Architecture build from block defined above. + """ + + input_layer = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, CHANNELS)) + + ### Encoder ### + sum1 = encoding_layer(input_layer, FIRST_DEPTH*2, stride=(1, 1)) + sum2 = encoding_layer(sum1, FIRST_DEPTH*2, stride=(2, 2)) + sum3 = encoding_layer(sum2, FIRST_DEPTH*(2**2), stride=(2, 2)) + sum4 = encoding_layer(sum3, FIRST_DEPTH*(2**3), stride=(2, 2)) + sum5 = encoding_layer(sum4, FIRST_DEPTH*(2**4), stride=(2, 2)) + + ### Decoder ### + loc1 = decoding_layer(sum5, sum4, FIRST_DEPTH*(2**3)) + + loc2 = decoding_layer(loc1, sum3, FIRST_DEPTH*(2**2)) + + seg1 = Conv2D(3, (1, 1), padding = 'same')(loc2) + seg1 = UpSampling2D((2, 2))(seg1) + + loc3 = decoding_layer(loc2, sum2, FIRST_DEPTH*2) + + seg2 = Conv2D(3, (1, 1), padding = 'same')(loc3) + seg2 = Add()([seg1, seg2]) + seg2 = UpSampling2D((2, 2))(seg2) + + lastup = UpSampling2D((2, 2))(loc3) + lastup = Conv2D(FIRST_DEPTH, KERNEL_SIZE, padding = 'same', activation=activation_func)(lastup) + lastconc = concatenate([lastup, sum1]) + + lastconv = Conv2D(FIRST_DEPTH*2, KERNEL_SIZE, strides = (1, 1), padding = 'same')(lastconc) + seg3 = Conv2D(3, (1, 1), padding = 'same')(lastconv) + + final_seg = Add()([seg2, seg3]) + + # softmax (one hot encoded) + output_layer = Conv2D(2, (1, 1), activation='softmax')(final_seg) + model = Model(name="Improved-uNET", inputs=input_layer, outputs=output_layer) + + return model + + diff --git a/recognition/45838464-improvedUNET-ISIC/notebook.ipynb b/recognition/45838464-improvedUNET-ISIC/notebook.ipynb new file mode 100644 index 0000000000..023b87880e --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/notebook.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'preprocess_mask' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_8444\\3099504444.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpreprocess_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_location\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpreprocess_mask\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmask_location\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name 'preprocess_mask' is not defined" + ] + } + ], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.keras.utils import to_categorical\n", + "import numpy as np\n", + "import os\n", + "import glob\n", + "from dataset import *\n", + "\n", + "data_location = \"./data/ISIC-2017_Training_Data/*.jpg\"\n", + "mask_location = \"./data/ISIC-2017_Training_Part1_GroundTruth/*.png\"\n", + "\n", + "\n", + "x = preprocess_data(data_location)\n", + "y = preprocess_masks(mask_location)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(isic_data_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(isic_data_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(isic_data_train[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.12 ('tf-isic')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "63b7f0ca92ed5fdf48e8242447823e1637e55d3cf1bc39c2261b2158c4676ed4" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/recognition/45838464-improvedUNET-ISIC/predict.py b/recognition/45838464-improvedUNET-ISIC/predict.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/recognition/45838464-improvedUNET-ISIC/report-images/dice_accuracy.png b/recognition/45838464-improvedUNET-ISIC/report-images/dice_accuracy.png new file mode 100644 index 0000000000..cfac78c04f Binary files /dev/null and b/recognition/45838464-improvedUNET-ISIC/report-images/dice_accuracy.png differ diff --git a/recognition/45838464-improvedUNET-ISIC/report-images/improved-unet-diagram.PNG b/recognition/45838464-improvedUNET-ISIC/report-images/improved-unet-diagram.PNG new file mode 100644 index 0000000000..f1f2789d70 Binary files /dev/null and b/recognition/45838464-improvedUNET-ISIC/report-images/improved-unet-diagram.PNG differ diff --git a/recognition/45838464-improvedUNET-ISIC/report-images/loss.png b/recognition/45838464-improvedUNET-ISIC/report-images/loss.png new file mode 100644 index 0000000000..4a9e49a992 Binary files /dev/null and b/recognition/45838464-improvedUNET-ISIC/report-images/loss.png differ diff --git a/recognition/45838464-improvedUNET-ISIC/train.py b/recognition/45838464-improvedUNET-ISIC/train.py new file mode 100644 index 0000000000..c15f7b7765 --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/train.py @@ -0,0 +1,45 @@ +from dataset import preprocess_data, preprocess_masks +from modules import Improved_UNet +from utils import dice_coefficient, dice_coefficient_loss, plot_metrics +from tensorflow.keras.optimizers import Adam +import numpy as np +import matplotlib.pyplot as plt + + +def train(datapaths, batch_size, epochs): + """ + Trains the Improved UNet model based on the given data. + datapaths is a list in the form: + [train_data_path/*jpg, train_truth_path/*png, val_data_path/*jpg, val_truth_path/*png] + Learning rate as per the paper. + """ + + # process data + # x_train = preprocess_data(datapaths[0]) + # y_train = preprocess_masks(datapaths[1]) + + # x_val = preprocess_data(datapaths[2]) + # y_val = preprocess_masks(datapaths[3]) + + x_train = np.load('x_train.npy') + y_train = np.load('y_train.npy') + x_val = np.load('x_val.npy') + y_val = np.load('y_val.npy') + + # build up Improved UNet model + model = Improved_UNet() + model.compile(optimizer = Adam(0.0005), loss=dice_coefficient_loss, metrics=[dice_coefficient]) + + history = model.fit(x_train, y_train, validation_data= (x_val, y_val), + batch_size=batch_size,shuffle='True',epochs=epochs) + + # save model + model.save('./trained-model', include_optimizer=True, save_format='tf') + # plot learning + plot_metrics(history) + +# if __name__ == "__main__": +# epochs = 30 +# batch_size = 32 + +# train() \ No newline at end of file diff --git a/recognition/45838464-improvedUNET-ISIC/utils.py b/recognition/45838464-improvedUNET-ISIC/utils.py new file mode 100644 index 0000000000..271108f550 --- /dev/null +++ b/recognition/45838464-improvedUNET-ISIC/utils.py @@ -0,0 +1,48 @@ +import tensorflow.keras.backend as K +import matplotlib.pyplot as plt +def dice_coefficient(a, b): + """ + Dice Coefficient function from : https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient + Used to determine how closely two set overlap each other. In this case we use it to see how close the + predicted mask matches the ground truth mask. + """ + + + a = K.flatten(a) + b = K.flatten(b) + a_union_b = K.sum(a * b) + mag_a = K.sum(a) + mag_b = K.sum(b) + + return (2.0 * a_union_b) / (mag_a + mag_b) + +def dice_coefficient_loss(truth, predition): + """ + Loss function as described in the Improved Unet paper. + """ + return 1 - dice_coefficient(truth, predition) + +def plot_metrics(history): + """ + Plots the dice coefficient and the loss of the model throughout the training process. + """ + plt.figure(1) + plt.plot(history.history['loss'], label='Training Loss') + plt.plot(history.history['val_loss'], label="Validation Loss") + plt.xlabel("Epochs") + plt.ylabel("Loss") + plt.title("Training and Validation Loss") + plt.legend(loc='lower right') + plt.savefig("./report-images/loss.png") + + plt.figure(2) + plt.plot(history.history['dice_coefficient'], label='Training Accuracy') + plt.plot(history.history['val_dice_coefficient'], label="Validation Accuracy") + plt.xlabel("Epochs") + plt.ylabel("Dice Similarity") + plt.title("Training and Validation Accuracy") + plt.legend(loc='lower right') + plt.savefig("./report-images/dice_accuracy.png") + + +