From 98fdf1d87db6f8cbc124941952001ba358ae5ac1 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:09:15 +0800 Subject: [PATCH 01/15] =?UTF-8?q?=E6=B7=BB=E5=8A=A0UWMGI=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=9B=86=E7=9A=84=E8=BD=AC=E6=8D=A2=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/data/convert_uwmgi.py | 147 ++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 tools/data/convert_uwmgi.py diff --git a/tools/data/convert_uwmgi.py b/tools/data/convert_uwmgi.py new file mode 100644 index 0000000000..210d8df6ac --- /dev/null +++ b/tools/data/convert_uwmgi.py @@ -0,0 +1,147 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import os +import random +import shutil +import zipfile + +import cv2 +import numpy as np +import pandas as pd +from PIL import Image +from tqdm import tqdm + + +def get_image(fp): + image = np.array(Image.open(fp)) + image = image.astype('float32') + image = image / np.max(image) * 255 + image = np.tile(image[..., None], [1, 1, 3]) + image = image.astype('uint8') + return image + + +def to_image_id(image_filepath): + image_dirs = image_filepath.replace('/', '\\').split('\\') + image_dirs = [image_dirs[2]] + image_dirs[4].split('_')[:2] + image_id = '_'.join(image_dirs) + return image_id + + +def rle_decode(mask_rle, image_shape): + s = mask_rle.split() + starts, lengths = [np.asarray(x, dtype=int) + for x in (s[0:][::2], s[1:][::2])] + starts -= 1 + ends = starts + lengths + img = np.zeros(image_shape[0] * image_shape[1], dtype='uint8') + for low, high in zip(starts, ends): + img[low:high] = 1 + return img.reshape(image_shape) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "input", + help="the directory of original UWMGI dataset zip file", + type=str) + parser.add_argument( + "output", + help="the directory to save converted UWMGI dataset", + type=str) + parser.add_argument( + '--train_proportion', + help='the proportion of train dataset', + type=float, + default=0.8) + parser.add_argument( + '--val_proportion', + help='the proportion of validation dataset', + type=float, + default=0.2) + args = parser.parse_args() + + assert os.path.exists(args.input), \ + f"The directory({args.input}) of " \ + f"original UWMGI dataset does not exist!" + assert zipfile.is_zipfile(args.input) + + assert 0 < args.train_proportion <= 1 + assert 0 <= args.val_proportion < 1 + assert args.train_proportion + args.val_proportion == 1 + + if not os.path.exists(args.output): + os.makedirs(args.output, exist_ok=True) + else: + if os.listdir(args.output): + shutil.rmtree(args.output) + os.makedirs(os.path.join(args.output, 'images/train')) + os.makedirs(os.path.join(args.output, 'annotations/train')) + os.makedirs(os.path.join(args.output, 'images/val')) + os.makedirs(os.path.join(args.output, 'annotations/val')) + + with zipfile.ZipFile(args.input, 'r') as zip_fp: + total_df = pd.read_csv(zip_fp.open('train.csv', 'r')) + + total_image_namelist = [] + for name in zip_fp.namelist(): + if os.path.splitext(name)[1] == '.png': + total_image_namelist.append(name) + train_image_namelist = random.sample( + total_image_namelist, int( + len(total_image_namelist) * args.train_proportion)) + val_image_namelist = np.setdiff1d( + total_image_namelist, train_image_namelist) + + pbar = tqdm(total=len(total_image_namelist)) + for image_namelist, split in zip( + [train_image_namelist, val_image_namelist], ['train', 'val']): + txt_lines = [] + for image_name in image_namelist: + with zip_fp.open(image_name, 'r') as fp: + image = get_image(fp) + image_id = to_image_id(image_name) + anns = total_df[total_df['id'] == image_id] + height, width = image.shape[:2] + mask = np.zeros([height, width * 3], dtype='uint8') + for _, ann in anns.iterrows(): + if not pd.isna(ann['segmentation']): + if ann['class'] == 'large_bowel': + mask[:, 0:width] = rle_decode( + ann['segmentation'], (height, width)) + elif ann['class'] == 'small_bowel': + mask[:, width:width * 2] = rle_decode( + ann['segmentation'], (height, width)) + else: # ann['class'] == 'stomach' + mask[:, width * 2:] = rle_decode( + ann['segmentation'], (height, width)) + cv2.imwrite(os.path.join( + args.output, 'images', split, image_id + '.jpg'), image) + cv2.imwrite(os.path.join( + args.output, 'annotations', split, image_id + '.png'), mask) + txt_lines.append( + os.path.join('images', split, image_id + '.jpg') + + ' ' + os.path.join('annotations', split, image_id + '.png')) + pbar.update() + + with open(os.path.join(args.output, split + '.txt'), 'w') as fp: + fp.write('\n'.join(txt_lines)) + + +if __name__ == '__main__': + main() From 420047a08a7ecba0e9e99e996798b5e9bfab9367 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:10:58 +0800 Subject: [PATCH 02/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9Dataset=E5=92=8CCompose?= =?UTF-8?q?=20op=E4=BD=BF=E5=85=B6=E9=80=82=E9=85=8D=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E5=A4=9A=E6=A0=87=E7=AD=BE=E6=95=B0=E6=8D=AE=E7=9A=84=E6=83=85?= =?UTF-8?q?=E5=86=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddleseg/datasets/dataset.py | 3 ++- paddleseg/transforms/transforms.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/paddleseg/datasets/dataset.py b/paddleseg/datasets/dataset.py index d518f5b4f8..f2a0c8593f 100644 --- a/paddleseg/datasets/dataset.py +++ b/paddleseg/datasets/dataset.py @@ -155,7 +155,8 @@ def __getitem__(self, idx): data['gt_fields'] = [] if self.mode == 'val': data = self.transforms(data) - data['label'] = data['label'][np.newaxis, :, :] + if data['label'].ndim == 2: + data['label'] = data['label'][np.newaxis, :, :] else: data['gt_fields'].append('label') diff --git a/paddleseg/transforms/transforms.py b/paddleseg/transforms/transforms.py index eb298a4a6c..77b3e1a427 100644 --- a/paddleseg/transforms/transforms.py +++ b/paddleseg/transforms/transforms.py @@ -82,6 +82,11 @@ def __call__(self, data): if 'label' in data.keys() and isinstance(data['label'], str): data['label'] = np.asarray(Image.open(data['label'])) + img_h, img_w = data['img'].shape[:2] + if data['label'].shape[0] != img_h: + data['label'] = data['label'].reshape([-1, img_h, img_w]).transpose([1, 2, 0]) + elif data['label'].shape[1] != img_w: + data['label'] = data['label'].reshape([img_h, -1, img_w]).transpose([0, 2, 1]) # the `trans_info` will save the process of image shape, and will be used in evaluation and prediction. if 'trans_info' not in data.keys(): @@ -93,6 +98,8 @@ def __call__(self, data): if data['img'].ndim == 2: data['img'] = data['img'][..., np.newaxis] data['img'] = np.transpose(data['img'], (2, 0, 1)) + if 'label' in data and data['label'].ndim == 3: + data['label'] = np.transpose(data['label'], (2, 0, 1)) return data From 64d8548dbfc942455097945570b87c9c2c460fc7 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:11:44 +0800 Subject: [PATCH 03/15] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AF=B9=E5=A4=9A?= =?UTF-8?q?=E6=A0=87=E7=AD=BE=E6=A8=A1=E5=BC=8F=E4=B8=8B=E7=9A=84=E6=8E=A8?= =?UTF-8?q?=E7=90=86=E7=BB=93=E6=9E=9C=E7=9A=84=E5=8F=AF=E8=A7=86=E5=8C=96?= =?UTF-8?q?=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddleseg/utils/visualize.py | 38 ++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/paddleseg/utils/visualize.py b/paddleseg/utils/visualize.py index 27211c4113..d6e5842ff1 100644 --- a/paddleseg/utils/visualize.py +++ b/paddleseg/utils/visualize.py @@ -19,7 +19,7 @@ from PIL import Image as PILImage -def visualize(image, result, color_map, save_dir=None, weight=0.6): +def visualize(image, result, color_map, save_dir=None, weight=0.6, use_multilabel=False): """ Convert predict result to color image, and save added image. @@ -29,6 +29,7 @@ def visualize(image, result, color_map, save_dir=None, weight=0.6): color_map (list): The color used to save the prediction results. save_dir (str): The directory for saving visual image. Default: None. weight (float): The image weight of visual image, and the result weight is (1 - weight). Default: 0.6 + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: vis_result (np.ndarray): If `save_dir` is None, return the visualized result. @@ -36,14 +37,29 @@ def visualize(image, result, color_map, save_dir=None, weight=0.6): color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] color_map = np.array(color_map).astype("uint8") - # Use OpenCV LUT for color mapping - c1 = cv2.LUT(result, color_map[:, 0]) - c2 = cv2.LUT(result, color_map[:, 1]) - c3 = cv2.LUT(result, color_map[:, 2]) - pseudo_img = np.dstack((c3, c2, c1)) im = cv2.imread(image) - vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) + if not use_multilabel: + # Use OpenCV LUT for color mapping + c1 = cv2.LUT(result, color_map[:, 0]) + c2 = cv2.LUT(result, color_map[:, 1]) + c3 = cv2.LUT(result, color_map[:, 2]) + pseudo_img = np.dstack((c3, c2, c1)) + + vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) + else: + vis_result = im.copy() + for i in range(result.shape[0]): + mask = result[i] + c1 = np.where(mask, color_map[i, 0], vis_result[..., 0]) + c2 = np.where(mask, color_map[i, 1], vis_result[..., 1]) + c3 = np.where(mask, color_map[i, 2], vis_result[..., 2]) + pseudo_img = np.dstack((c3, c2, c1)).astype('uint8') + + contour, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + vis_result = cv2.addWeighted(vis_result, weight, pseudo_img, 1 - weight, 0) + contour_color = (int(color_map[i, 0]), int(color_map[i, 1]), int(color_map[i, 2])) + vis_result = cv2.drawContours(vis_result, contour, -1, contour_color, 1) if save_dir is not None: if not os.path.exists(save_dir): @@ -55,7 +71,7 @@ def visualize(image, result, color_map, save_dir=None, weight=0.6): return vis_result -def get_pseudo_color_map(pred, color_map=None): +def get_pseudo_color_map(pred, color_map=None, use_multilabel=False): """ Get the pseudo color image. @@ -63,10 +79,16 @@ def get_pseudo_color_map(pred, color_map=None): pred (numpy.ndarray): the origin predicted image. color_map (list, optional): the palette color map. Default: None, use paddleseg's default color map. + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: (numpy.ndarray): the pseduo image. """ + if use_multilabel: + bg_pred = (pred.sum(axis=0, keepdims=True) == 0).astype('int32') + pred = np.concatenate([bg_pred, pred], axis=0) + gray_idx = np.arange(pred.shape[0]).astype(np.uint8) + pred = (pred * gray_idx[:, None, None]).sum(axis=0) pred_mask = PILImage.fromarray(pred.astype(np.uint8), mode='P') if color_map is None: color_map = get_color_map_list(256) From a045e4c565c4f1888d3e48180ddeb831d916e4fb Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:12:14 +0800 Subject: [PATCH 04/15] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AF=B9=E5=A4=9A?= =?UTF-8?q?=E6=A0=87=E7=AD=BE=E6=A8=A1=E5=BC=8F=E4=B8=8B=E7=9A=84=E8=AF=AD?= =?UTF-8?q?=E4=B9=89=E5=88=86=E5=89=B2=E4=BB=BB=E5=8A=A1=E8=AF=84=E4=BC=B0?= =?UTF-8?q?=E6=8C=87=E6=A0=87=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddleseg/utils/metrics.py | 56 +++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/paddleseg/utils/metrics.py b/paddleseg/utils/metrics.py index 5327a464f0..fd7b0c3ba4 100644 --- a/paddleseg/utils/metrics.py +++ b/paddleseg/utils/metrics.py @@ -18,7 +18,7 @@ import sklearn.metrics as skmetrics -def calculate_area(pred, label, num_classes, ignore_index=255): +def calculate_area(pred, label, num_classes, ignore_index=255, use_multilabel=False): """ Calculate intersect, prediction and label area @@ -27,36 +27,42 @@ def calculate_area(pred, label, num_classes, ignore_index=255): label (Tensor): The ground truth of image. num_classes (int): The unique number of target classes. ignore_index (int): Specifies a target value that is ignored. Default: 255. + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: Tensor: The intersection area of prediction and the ground on all class. Tensor: The prediction area on all class. Tensor: The ground truth area on all class """ - if len(pred.shape) == 4: - pred = paddle.squeeze(pred, axis=1) - if len(label.shape) == 4: - label = paddle.squeeze(label, axis=1) - if not pred.shape == label.shape: - raise ValueError('Shape of `pred` and `label should be equal, ' - 'but there are {} and {}.'.format(pred.shape, - label.shape)) - pred_area = [] - label_area = [] - intersect_area = [] - mask = label != ignore_index - - for i in range(num_classes): - pred_i = paddle.logical_and(pred == i, mask) - label_i = label == i - intersect_i = paddle.logical_and(pred_i, label_i) - pred_area.append(paddle.sum(paddle.cast(pred_i, "int64"))) - label_area.append(paddle.sum(paddle.cast(label_i, "int64"))) - intersect_area.append(paddle.sum(paddle.cast(intersect_i, "int64"))) - - pred_area = paddle.stack(pred_area) - label_area = paddle.stack(label_area) - intersect_area = paddle.stack(intersect_area) + if not use_multilabel: + if len(pred.shape) == 4: + pred = paddle.squeeze(pred, axis=1) + if len(label.shape) == 4: + label = paddle.squeeze(label, axis=1) + if not pred.shape == label.shape: + raise ValueError('Shape of `pred` and `label should be equal, ' + 'but there are {} and {}.'.format(pred.shape, + label.shape)) + pred_area = [] + label_area = [] + intersect_area = [] + mask = label != ignore_index + + for i in range(num_classes): + pred_i = paddle.logical_and(pred == i, mask) + label_i = label == i + intersect_i = paddle.logical_and(pred_i, label_i) + pred_area.append(paddle.sum(paddle.cast(pred_i, "int64"))) + label_area.append(paddle.sum(paddle.cast(label_i, "int64"))) + intersect_area.append(paddle.sum(paddle.cast(intersect_i, "int64"))) + + pred_area = paddle.stack(pred_area) + label_area = paddle.stack(label_area) + intersect_area = paddle.stack(intersect_area) + else: + pred_area = pred.sum([0, 2, 3]).astype('int64') + label_area = label.sum([0, 2, 3]).astype('int64') + intersect_area = (pred * label).sum([0, 2, 3]).astype('int64') return intersect_area, pred_area, label_area From 7dc44ddca64c554dd841db6434f6d236d688723e Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:13:11 +0800 Subject: [PATCH 05/15] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=AF=B9=E5=A4=9A?= =?UTF-8?q?=E6=A0=87=E7=AD=BE=E6=A8=A1=E5=BC=8F=E4=B8=8B=EF=BC=8C=E4=BC=A0?= =?UTF-8?q?=E5=85=A5--use=5Fmultilabel=E5=8F=82=E6=95=B0=E7=9A=84=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddleseg/core/infer.py | 20 +++++++++++++++----- paddleseg/core/predict.py | 15 ++++++++++----- paddleseg/core/val.py | 19 +++++++++++++------ tools/predict.py | 9 +++++++++ tools/train.py | 12 ++++++++++++ tools/val.py | 8 ++++++++ 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/paddleseg/core/infer.py b/paddleseg/core/infer.py index d5df03e86e..66a529164b 100644 --- a/paddleseg/core/infer.py +++ b/paddleseg/core/infer.py @@ -136,7 +136,8 @@ def inference(model, trans_info=None, is_slide=False, stride=None, - crop_size=None): + crop_size=None, + use_multilabel=False): """ Inference for image. @@ -147,6 +148,7 @@ def inference(model, is_slide (bool): Whether to infer by sliding window. Default: False. crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. @@ -167,7 +169,10 @@ def inference(model, logit = logit.transpose((0, 3, 1, 2)) if trans_info is not None: logit = reverse_transform(logit, trans_info, mode='bilinear') - pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + if not use_multilabel: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + else: + pred = (F.sigmoid(logit) > 0.5).astype('int32') return pred, logit else: return logit @@ -181,7 +186,8 @@ def aug_inference(model, flip_vertical=False, is_slide=False, stride=None, - crop_size=None): + crop_size=None, + use_multilabel=False): """ Infer with augmentation. @@ -195,6 +201,7 @@ def aug_inference(model, is_slide (bool): Whether to infer by sliding wimdow. Default: False. crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: Tensor: Prediction of image with shape (1, 1, h, w) is returned. @@ -229,6 +236,9 @@ def aug_inference(model, # comparable to single-scale logits final_logit /= num_augs final_logit = reverse_transform(final_logit, trans_info, mode='bilinear') - pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') + if not use_multilabel: + pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') + else: + pred = (F.sigmoid(final_logit) > 0.5).astype('int32') - return pred, final_logit + return pred, final_logit \ No newline at end of file diff --git a/paddleseg/core/predict.py b/paddleseg/core/predict.py index 73d2f866de..016a93878c 100644 --- a/paddleseg/core/predict.py +++ b/paddleseg/core/predict.py @@ -58,7 +58,8 @@ def predict(model, is_slide=False, stride=None, crop_size=None, - custom_color=None): + custom_color=None, + use_multilabel=False): """ predict and visualize the image_list. @@ -79,6 +80,7 @@ def predict(model, crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. It should be provided when `is_slide` is True. custom_color (list, optional): Save images with a custom color map. Default: None, use paddleseg's default color map. + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. """ utils.utils.load_entire_model(model, model_path) @@ -110,7 +112,8 @@ def predict(model, flip_vertical=flip_vertical, is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) else: pred, _ = infer.inference( model, @@ -118,7 +121,8 @@ def predict(model, trans_info=data['trans_info'], is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) pred = paddle.squeeze(pred) pred = pred.numpy().astype('uint8') @@ -132,13 +136,14 @@ def predict(model, # save added image added_image = utils.visualize.visualize( - im_path, pred, color_map, weight=0.6) + im_path, pred, color_map, weight=0.6, use_multilabel=use_multilabel) added_image_path = os.path.join(added_saved_dir, im_file) mkdir(added_image_path) cv2.imwrite(added_image_path, added_image) # save pseudo color prediction - pred_mask = utils.visualize.get_pseudo_color_map(pred, color_map) + pred_mask = utils.visualize.get_pseudo_color_map( + pred, color_map, use_multilabel=use_multilabel) pred_saved_path = os.path.join( pred_saved_dir, os.path.splitext(im_file)[0] + ".png") mkdir(pred_saved_path) diff --git a/paddleseg/core/val.py b/paddleseg/core/val.py index 80a820b6bc..437c9acf4f 100644 --- a/paddleseg/core/val.py +++ b/paddleseg/core/val.py @@ -38,7 +38,8 @@ def evaluate(model, amp_level='O1', num_workers=0, print_detail=True, - auc_roc=False): + auc_roc=False, + use_multilabel=False): """ Launch evalution. @@ -59,6 +60,7 @@ def evaluate(model, num_workers (int, optional): Num workers for data loader. Default: 0. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. auc_roc(bool, optional): whether add auc_roc metric + use_multilabel (bool, optional): Whether to enable multilabel mode. Default: False. Returns: float: The mIoU of validation datasets. @@ -120,7 +122,8 @@ def evaluate(model, flip_vertical=flip_vertical, is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) else: pred, logits = infer.aug_inference( model, @@ -131,7 +134,8 @@ def evaluate(model, flip_vertical=flip_vertical, is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) else: if precision == 'fp16': with paddle.amp.auto_cast( @@ -148,7 +152,8 @@ def evaluate(model, trans_info=data['trans_info'], is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) else: pred, logits = infer.inference( model, @@ -156,13 +161,15 @@ def evaluate(model, trans_info=data['trans_info'], is_slide=is_slide, stride=stride, - crop_size=crop_size) + crop_size=crop_size, + use_multilabel=use_multilabel) intersect_area, pred_area, label_area = metrics.calculate_area( pred, label, eval_dataset.num_classes, - ignore_index=eval_dataset.ignore_index) + ignore_index=eval_dataset.ignore_index, + use_multilabel=use_multilabel) # Gather from all ranks if nranks > 1: diff --git a/tools/predict.py b/tools/predict.py index 09302d2052..9e1016cc5c 100644 --- a/tools/predict.py +++ b/tools/predict.py @@ -98,6 +98,13 @@ def parse_args(): help='Save images with a custom color map. Default: None, use paddleseg\'s default color map.', type=int) + # Set multi-label mode + parser.add_argument( + '--use_multilabel', + action='store_true', + default=False, + help='Whether to enable multilabel mode. Default: False.') + return parser.parse_args() @@ -118,6 +125,8 @@ def merge_test_config(cfg, args): test_config['stride'] = args.stride if args.custom_color: test_config['custom_color'] = args.custom_color + if args.use_multilabel: + test_config['use_multilabel'] = args.use_multilabel return test_config diff --git a/tools/train.py b/tools/train.py index 09d864499a..b9ce6cf7af 100644 --- a/tools/train.py +++ b/tools/train.py @@ -124,6 +124,12 @@ def parse_args(): ) parser.add_argument( '--opts', help='Update the key-value pairs of all options.', nargs='+') + # Set multi-label mode + parser.add_argument( + '--use_multilabel', + action='store_true', + default=False, + help='Whether to enable multilabel mode. Default: False.') return parser.parse_args() @@ -145,6 +151,12 @@ def main(args): utils.set_device(args.device) utils.set_cv2_num_threads(args.num_workers) + if args.use_multilabel: + if 'test_config' not in cfg.dic: + cfg.dic['test_config'] = {'use_multilabel': True} + else: + cfg.dic['test_config']['use_multilabel'] = True + # TODO refactor # Only support for the DeepLabv3+ model if args.data_format == 'NHWC': diff --git a/tools/val.py b/tools/val.py index 454737608b..2ce837972f 100644 --- a/tools/val.py +++ b/tools/val.py @@ -97,6 +97,12 @@ def parse_args(): help='Update the key-value pairs of all options.', default=None, nargs='+') + # Set multi-label mode + parser.add_argument( + '--use_multilabel', + action='store_true', + default=False, + help='Whether to enable multilabel mode. Default: False.') return parser.parse_args() @@ -112,6 +118,8 @@ def merge_test_config(cfg, args): test_config['is_slide'] = args.is_slide test_config['crop_size'] = args.crop_size test_config['stride'] = args.stride + if args.use_multilabel: + test_config['use_multilabel'] = args.use_multilabel return test_config From 0a5adbb44b0fd55db65db84450e23ace6a2e6c98 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Tue, 19 Sep 2023 07:14:28 +0800 Subject: [PATCH 06/15] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=A4=9A=E6=A0=87?= =?UTF-8?q?=E7=AD=BE=E8=AF=AD=E4=B9=89=E5=88=86=E5=89=B2=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=E5=9C=A8UWMGI=E6=95=B0=E6=8D=AE=E9=9B=86=E4=B8=8A=E7=9A=84?= =?UTF-8?q?=E5=AE=9E=E4=BE=8B=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6=E5=92=8C?= =?UTF-8?q?=E8=AF=B4=E6=98=8E=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/_base_/uwmgi.yml | 53 +++++++ configs/multilabelseg/README.md | 137 ++++++++++++++++++ configs/multilabelseg/README_cn.md | 137 ++++++++++++++++++ ...plabv3_resnet50_os8_uwmgi_256x256_160k.yml | 18 +++ .../pp_mobileseg_tiny_uwmgi_256x256_160k.yml | 34 +++++ 5 files changed, 379 insertions(+) create mode 100644 configs/_base_/uwmgi.yml create mode 100644 configs/multilabelseg/README.md create mode 100644 configs/multilabelseg/README_cn.md create mode 100644 configs/multilabelseg/deeplabv3_resnet50_os8_uwmgi_256x256_160k.yml create mode 100644 configs/multilabelseg/pp_mobileseg_tiny_uwmgi_256x256_160k.yml diff --git a/configs/_base_/uwmgi.yml b/configs/_base_/uwmgi.yml new file mode 100644 index 0000000000..51989b876f --- /dev/null +++ b/configs/_base_/uwmgi.yml @@ -0,0 +1,53 @@ +batch_size: 8 +iters: 160000 + +train_dataset: + type: Dataset + dataset_root: data/UWMGI + transforms: + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0.25 + - type: RandomPaddingCrop + crop_size: [256, 256] + - type: RandomHorizontalFlip + - type: RandomVerticalFlip + - type: RandomDistort + brightness_range: 0.4 + contrast_range: 0.4 + saturation_range: 0.4 + num_classes: 3 + train_path: data/UWMGI/train.txt + mode: train + +val_dataset: + type: Dataset + dataset_root: data/UWMGI + transforms: + - type: Resize + target_size: [256, 256] + - type: Normalize + num_classes: 3 + val_path: data/UWMGI/val.txt + mode: val + +optimizer: + type: SGD + momentum: 0.9 + weight_decay: 4.0e-5 + +lr_scheduler: + type: PolynomialDecay + learning_rate: 0.001 + end_lr: 0 + power: 0.9 + +loss: + types: + - type: MixedLoss + losses: + - type: BCELoss + - type: LovaszHingeLoss + coef: [0.5, 0.5] + coef: [1] diff --git a/configs/multilabelseg/README.md b/configs/multilabelseg/README.md new file mode 100644 index 0000000000..6fac88273a --- /dev/null +++ b/configs/multilabelseg/README.md @@ -0,0 +1,137 @@ +English | [简体中文](README_cn.md) + +# Multi-label semantic segmentation based on PaddleSeg + +## 1. introduction + +Multi-label semantic segmentation is an image segmentation task that aims to assign each pixel in an image to multiple categories, rather than just one category. This can better express complex information in the image, such as overlapping, occlusion, boundaries, etc. of different objects. Multi label semantic segmentation has many application scenarios, such as medical image analysis, remote sensing image interpretation, autonomous driving, and so on. + +
+ + + +
+ ++ *The above effect shows the inference results obtained from the model trained using images in the [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/) dataset* + +## 2. Supported models and loss functions + +| Model | Loss | +|:-------------------------------------------------------------------------------------------:|:------------------------:| +| DeepLabV3, DeepLabV3P, MobileSeg,+ + + +
+ ++ *以上效果展示图基于 [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/)数据集中的图片使用训练的模型所得到的推理结果。* + +## 2. 已支持的模型和损失函数 + +| Model | Loss | +|:-------------------------------------------------------------------------------------------:|:------------------------:| +| DeepLabV3, DeepLabV3P, MobileSeg,- - - + + + + + +
+ *The above effect shows the inference results obtained from the model trained using images in the [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/) dataset* @@ -76,18 +79,17 @@ train_dataset: type: Dataset dataset_root: data/UWMGI transforms: - - type: ResizeStepScaling - min_scale_factor: 0.5 - max_scale_factor: 2.0 - scale_step_size: 0.25 - - type: RandomPaddingCrop - crop_size: [256, 256] + - type: Resize + target_size: [256, 256] - type: RandomHorizontalFlip - type: RandomVerticalFlip - type: RandomDistort brightness_range: 0.4 contrast_range: 0.4 saturation_range: 0.4 + - type: Normalize + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] num_classes: 3 train_path: data/UWMGI/train.txt mode: train @@ -99,6 +101,8 @@ val_dataset: - type: Resize target_size: [256, 256] - type: Normalize + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] num_classes: 3 val_path: data/UWMGI/val.txt mode: val diff --git a/configs/multilabelseg/README_cn.md b/configs/multilabelseg/README_cn.md index 6412a862ce..2fb3d98f42 100644 --- a/configs/multilabelseg/README_cn.md +++ b/configs/multilabelseg/README_cn.md @@ -7,9 +7,12 @@ 多标签语义分割是一种图像分割任务,它的目的是将图像中的每个像素分配到多个类别中,而不是只有一个类别。这样可以更好地表达图像中的复杂信息,例如不同物体的重叠、遮挡、边界等。多标签语义分割有许多应用场景,例如医学图像分析、遥感图像解译、自动驾驶等。- - - + + + + + +
+ *以上效果展示图基于 [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/)数据集中的图片使用训练的模型所得到的推理结果。* @@ -76,18 +79,17 @@ train_dataset: type: Dataset dataset_root: data/UWMGI transforms: - - type: ResizeStepScaling - min_scale_factor: 0.5 - max_scale_factor: 2.0 - scale_step_size: 0.25 - - type: RandomPaddingCrop - crop_size: [256, 256] + - type: Resize + target_size: [256, 256] - type: RandomHorizontalFlip - type: RandomVerticalFlip - type: RandomDistort brightness_range: 0.4 contrast_range: 0.4 saturation_range: 0.4 + - type: Normalize + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] num_classes: 3 train_path: data/UWMGI/train.txt mode: train @@ -99,6 +101,8 @@ val_dataset: - type: Resize target_size: [256, 256] - type: Normalize + mean: [0.0, 0.0, 0.0] + std: [1.0, 1.0, 1.0] num_classes: 3 val_path: data/UWMGI/val.txt mode: val From 00f9f37fffd70557641b01644605a0abbd74b180 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Fri, 22 Sep 2023 05:46:38 +0800 Subject: [PATCH 13/15] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=9B=BE=E7=89=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/multilabelseg/README.md | 3 --- configs/multilabelseg/README_cn.md | 3 --- 2 files changed, 6 deletions(-) diff --git a/configs/multilabelseg/README.md b/configs/multilabelseg/README.md index 5a74906712..322aaf7f54 100644 --- a/configs/multilabelseg/README.md +++ b/configs/multilabelseg/README.md @@ -10,9 +10,6 @@ Multi-label semantic segmentation is an image segmentation task that aims to ass - - - + *The above effect shows the inference results obtained from the model trained using images in the [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/) dataset* diff --git a/configs/multilabelseg/README_cn.md b/configs/multilabelseg/README_cn.md index 2fb3d98f42..07653e2f03 100644 --- a/configs/multilabelseg/README_cn.md +++ b/configs/multilabelseg/README_cn.md @@ -10,9 +10,6 @@ - - - + *以上效果展示图基于 [UWMGI](https://www.kaggle.com/competitions/uw-madison-gi-tract-image-segmentation/)数据集中的图片使用训练的模型所得到的推理结果。* From f50c33f578a490debf3c666ed43d02baf54d6138 Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Fri, 22 Sep 2023 08:07:37 +0800 Subject: [PATCH 14/15] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=84=9A=E6=9C=AC?= =?UTF-8?q?=EF=BC=8C=E4=BD=BF=E5=85=B6=E6=94=AF=E6=8C=81`UWMGI`=20?= =?UTF-8?q?=E5=92=8C=E4=B8=BB=E6=B5=81=E7=9A=84COCO=E7=B1=BB=E5=9E=8B?= =?UTF-8?q?=E6=A0=87=E6=B3=A8=E8=BD=AC=E6=8D=A2=E4=B8=BAppseg=20dataset=20?= =?UTF-8?q?api=E6=94=AF=E6=8C=81=E7=9A=84=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/data/convert_multilabel.py | 254 +++++++++++++++++++++++++++++++ tools/data/convert_uwmgi.py | 147 ------------------ 2 files changed, 254 insertions(+), 147 deletions(-) create mode 100644 tools/data/convert_multilabel.py delete mode 100644 tools/data/convert_uwmgi.py diff --git a/tools/data/convert_multilabel.py b/tools/data/convert_multilabel.py new file mode 100644 index 0000000000..0fe6372030 --- /dev/null +++ b/tools/data/convert_multilabel.py @@ -0,0 +1,254 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +File: convert_multilabel.py +This file is used to convert `uwmgi` or `coco` type dataset to support multi-label dataset format. +Examples of usage are as follows: +1. convert UWMGI dataset +python convert_multilabel.py --dataset_type uwmgi --zip_input ${uwmgi_origin_zip_file} --output ${save_dir} --train_proportion 0.8 --val_proportion 0.2 +2. convert COCO type dataset +2.1 not yet split training and validation dataset +python convert_multilabel.py --dataset_type coco --img_input ${img_dir} --ann_input ${ann_dir} --output ${save_dir} --train_proportion 0.8 --val_proportion 0.2 +2.2 training and validation dataset split +python convert_multilabel.py --dataset_type coco --img_input ${train_img_dir} --ann_input ${train_ann_dir} --output ${save_dir} --train_proportion 1.0 --val_proportion 0.0 +python convert_multilabel.py --dataset_type coco --img_input ${val_img_dir} --ann_input ${val_ann_dir} --output ${save_dir} --train_proportion 0.0 --val_proportion 1.0 +""" + +import argparse +import os +import random +import zipfile + +import cv2 +import numpy as np +import pandas as pd +from PIL import Image +from pycocotools.coco import COCO +from tqdm import tqdm + + +def uwmgi_get_image(fp): + image = np.array(Image.open(fp)) + image = image.astype('float32') + image = image / np.max(image) * 255 + image = np.tile(image[..., None], [1, 1, 3]) + image = image.astype('uint8') + return image + + +def uwmgi_get_image_id(image_filepath): + image_dirs = image_filepath.replace('/', '\\').split('\\') + image_dirs = [image_dirs[2]] + image_dirs[4].split('_')[:2] + image_id = '_'.join(image_dirs) + return image_id + + +def uwmgi_rle_decode(mask_rle, image_shape): + s = mask_rle.split() + starts, lengths = [np.asarray(x, dtype=int) + for x in (s[0:][::2], s[1:][::2])] + starts -= 1 + ends = starts + lengths + img = np.zeros(image_shape[0] * image_shape[1], dtype='uint8') + for low, high in zip(starts, ends): + img[low:high] = 1 + return img.reshape(image_shape) + + +def uwmgi_to_multilabel_format(args): + with zipfile.ZipFile(args.input, 'r') as zip_fp: + total_df = pd.read_csv(zip_fp.open('train.csv', 'r')) + + total_image_namelist = [] + for name in zip_fp.namelist(): + if os.path.splitext(name)[1] == '.png': + total_image_namelist.append(name) + train_image_namelist = random.sample( + total_image_namelist, int( + len(total_image_namelist) * args.train_proportion)) + val_image_namelist = np.setdiff1d( + total_image_namelist, train_image_namelist) + + pbar = tqdm(total=len(total_image_namelist)) + for image_namelist, split in zip( + [train_image_namelist, val_image_namelist], ['train', 'val']): + txt_lines = [] + for image_name in image_namelist: + with zip_fp.open(image_name, 'r') as fp: + image = uwmgi_get_image(fp) + image_id = uwmgi_get_image_id(image_name) + anns = total_df[total_df['id'] == image_id] + height, width = image.shape[:2] + mask = np.zeros([height, width * 3], dtype='uint8') + for _, ann in anns.iterrows(): + if not pd.isna(ann['segmentation']): + if ann['class'] == 'large_bowel': + mask[:, 0:width] = uwmgi_rle_decode( + ann['segmentation'], (height, width)) + elif ann['class'] == 'small_bowel': + mask[:, width:width * 2] = uwmgi_rle_decode( + ann['segmentation'], (height, width)) + else: # ann['class'] == 'stomach' + mask[:, width * 2:] = uwmgi_rle_decode( + ann['segmentation'], (height, width)) + cv2.imwrite(os.path.join( + args.output, 'images', split, image_id + '.jpg'), image) + cv2.imwrite(os.path.join( + args.output, 'annotations', split, image_id + '.png'), mask) + txt_lines.append( + os.path.join('images', split, image_id + '.jpg') + + ' ' + os.path.join('annotations', split, image_id + '.png')) + pbar.update() + + with open(os.path.join(args.output, split + '.txt'), 'w') as fp: + fp.write('\n'.join(txt_lines)) + + +def coco_to_multilabel_format(args): + coco = COCO(args.ann_input) + cat_id_map = { + old_cat_id: new_cat_id + for new_cat_id, old_cat_id in enumerate(coco.getCatIds()) + } + num_classes = len(list(cat_id_map.keys())) + + assert 'annotations' in coco.dataset, \ + 'Annotation file: {} does not contains ground truth!!!'.format(args.ann_input) + + total_img_id_list = sorted(list(coco.imgToAnns.keys())) + train_img_id_list = random.sample( + total_img_id_list, int(len(total_img_id_list) * args.train_proportion)) + val_img_id_list = np.setdiff1d(total_img_id_list, train_img_id_list) + + pbar = tqdm(total=len(total_img_id_list)) + for img_id_list, split in zip( + [train_img_id_list, val_img_id_list], ['train', 'val']): + txt_lines = [] + for img_id in img_id_list: + img_info = coco.loadImgs([img_id])[0] + img_filename = img_info['file_name'] + img_w = img_info['width'] + img_h = img_info['height'] + + img_filepath = os.path.join(args.img_input, img_filename) + if not os.path.exists(img_filepath): + print('Illegal image file: {}, ' + 'and it will be ignored'.format(img_filepath)) + continue + + if img_w < 0 or img_h < 0: + print('Illegal width: {} or height: {} in annotation, ' + 'and im_id: {} will be ignored'.format(img_w, img_h, img_id)) + continue + + ann_ids = coco.getAnnIds(imgIds=[img_id]) + anns = coco.loadAnns(ann_ids) + + mask = np.zeros([img_h, num_classes * img_w], dtype='uint8') + for ann in anns: + cat_id = cat_id_map[ann['category_id']] + one_cls_mask = coco.annToMask(ann) + mask[:, cat_id * img_w: (cat_id + 1) * img_w] = np.where( + one_cls_mask, one_cls_mask, + mask[:, cat_id * img_w: (cat_id + 1) * img_w]) + + image = cv2.imread(img_filepath, cv2.IMREAD_COLOR) + cv2.imwrite(os.path.join( + args.output, 'images', split, + os.path.splitext(img_filename)[0] + '.jpg'), image) + cv2.imwrite(os.path.join( + args.output, 'annotations', split, + os.path.splitext(img_filename)[0] + '.png'), mask) + txt_lines.append(os.path.join( + 'images', split, os.path.splitext(img_filename)[0] + '.jpg') + + ' ' + os.path.join( + 'annotations', split, os.path.splitext(img_filename)[0] + '.png')) + pbar.update() + + with open(os.path.join(args.output, split + '.txt'), 'w') as fp: + fp.write('\n'.join(txt_lines)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--dataset_type', + help='the type of dataset, can be `uwmgi` or `coco`', + type=str) + parser.add_argument( + "--zip_input", + help="the directory of original dataset zip file", + type=str) + parser.add_argument( + "--img_input", + help="the directory of original dataset image file", + type=str) + parser.add_argument( + "--ann_input", + help="the directory of original dataset annotation file", + type=str) + parser.add_argument( + "--output", + help="the directory to save converted dataset", + type=str) + parser.add_argument( + '--train_proportion', + help='the proportion of train dataset', + type=float, + default=0.8) + parser.add_argument( + '--val_proportion', + help='the proportion of validation dataset', + type=float, + default=0.2) + args = parser.parse_args() + + assert args.dataset_type in ['uwmgi', 'coco'], \ + "Now only support the `uwmgi` and `coco`!!!" + + assert 0 <= args.train_proportion <= 1 + assert 0 <= args.val_proportion <= 1 + assert args.train_proportion + args.val_proportion == 1 + + if not os.path.exists(args.output): + os.makedirs(args.output, exist_ok=True) + + os.makedirs(os.path.join(args.output, 'images/train'), exist_ok=True) + os.makedirs(os.path.join(args.output, 'annotations/train'), exist_ok=True) + os.makedirs(os.path.join(args.output, 'images/val'), exist_ok=True) + os.makedirs(os.path.join(args.output, 'annotations/val'), exist_ok=True) + + if args.dataset_type == 'uwmgi': + assert os.path.exists(args.zip_input), \ + f"The directory({args.zip_input}) of " \ + f"original UWMGI dataset does not exist!" + assert zipfile.is_zipfile(args.input) + + uwmgi_to_multilabel_format(args) + + else: # args.dataset_type == 'coco' + assert os.path.exists(args.img_input), \ + f"The directory({args.img_input}) of " \ + f"original image file does not exist!" + assert os.path.exists(args.ann_input), \ + f"The directory({args.ann_input}) of " \ + f"original annotation file does not exist!" + + coco_to_multilabel_format(args) + + print("Dataset converts success, the data path: {}".format(args.output)) + + +if __name__ == '__main__': + main() diff --git a/tools/data/convert_uwmgi.py b/tools/data/convert_uwmgi.py deleted file mode 100644 index 210d8df6ac..0000000000 --- a/tools/data/convert_uwmgi.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import random -import shutil -import zipfile - -import cv2 -import numpy as np -import pandas as pd -from PIL import Image -from tqdm import tqdm - - -def get_image(fp): - image = np.array(Image.open(fp)) - image = image.astype('float32') - image = image / np.max(image) * 255 - image = np.tile(image[..., None], [1, 1, 3]) - image = image.astype('uint8') - return image - - -def to_image_id(image_filepath): - image_dirs = image_filepath.replace('/', '\\').split('\\') - image_dirs = [image_dirs[2]] + image_dirs[4].split('_')[:2] - image_id = '_'.join(image_dirs) - return image_id - - -def rle_decode(mask_rle, image_shape): - s = mask_rle.split() - starts, lengths = [np.asarray(x, dtype=int) - for x in (s[0:][::2], s[1:][::2])] - starts -= 1 - ends = starts + lengths - img = np.zeros(image_shape[0] * image_shape[1], dtype='uint8') - for low, high in zip(starts, ends): - img[low:high] = 1 - return img.reshape(image_shape) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "input", - help="the directory of original UWMGI dataset zip file", - type=str) - parser.add_argument( - "output", - help="the directory to save converted UWMGI dataset", - type=str) - parser.add_argument( - '--train_proportion', - help='the proportion of train dataset', - type=float, - default=0.8) - parser.add_argument( - '--val_proportion', - help='the proportion of validation dataset', - type=float, - default=0.2) - args = parser.parse_args() - - assert os.path.exists(args.input), \ - f"The directory({args.input}) of " \ - f"original UWMGI dataset does not exist!" - assert zipfile.is_zipfile(args.input) - - assert 0 < args.train_proportion <= 1 - assert 0 <= args.val_proportion < 1 - assert args.train_proportion + args.val_proportion == 1 - - if not os.path.exists(args.output): - os.makedirs(args.output, exist_ok=True) - else: - if os.listdir(args.output): - shutil.rmtree(args.output) - os.makedirs(os.path.join(args.output, 'images/train')) - os.makedirs(os.path.join(args.output, 'annotations/train')) - os.makedirs(os.path.join(args.output, 'images/val')) - os.makedirs(os.path.join(args.output, 'annotations/val')) - - with zipfile.ZipFile(args.input, 'r') as zip_fp: - total_df = pd.read_csv(zip_fp.open('train.csv', 'r')) - - total_image_namelist = [] - for name in zip_fp.namelist(): - if os.path.splitext(name)[1] == '.png': - total_image_namelist.append(name) - train_image_namelist = random.sample( - total_image_namelist, int( - len(total_image_namelist) * args.train_proportion)) - val_image_namelist = np.setdiff1d( - total_image_namelist, train_image_namelist) - - pbar = tqdm(total=len(total_image_namelist)) - for image_namelist, split in zip( - [train_image_namelist, val_image_namelist], ['train', 'val']): - txt_lines = [] - for image_name in image_namelist: - with zip_fp.open(image_name, 'r') as fp: - image = get_image(fp) - image_id = to_image_id(image_name) - anns = total_df[total_df['id'] == image_id] - height, width = image.shape[:2] - mask = np.zeros([height, width * 3], dtype='uint8') - for _, ann in anns.iterrows(): - if not pd.isna(ann['segmentation']): - if ann['class'] == 'large_bowel': - mask[:, 0:width] = rle_decode( - ann['segmentation'], (height, width)) - elif ann['class'] == 'small_bowel': - mask[:, width:width * 2] = rle_decode( - ann['segmentation'], (height, width)) - else: # ann['class'] == 'stomach' - mask[:, width * 2:] = rle_decode( - ann['segmentation'], (height, width)) - cv2.imwrite(os.path.join( - args.output, 'images', split, image_id + '.jpg'), image) - cv2.imwrite(os.path.join( - args.output, 'annotations', split, image_id + '.png'), mask) - txt_lines.append( - os.path.join('images', split, image_id + '.jpg') - + ' ' + os.path.join('annotations', split, image_id + '.png')) - pbar.update() - - with open(os.path.join(args.output, split + '.txt'), 'w') as fp: - fp.write('\n'.join(txt_lines)) - - -if __name__ == '__main__': - main() From 8cf3d517adb9e9f4546d97f02dd02dc3f7ec0c5b Mon Sep 17 00:00:00 2001 From: zhenming lin <3575188313@qq.com> Date: Fri, 22 Sep 2023 08:08:06 +0800 Subject: [PATCH 15/15] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E5=92=8C=E8=BD=AC=E6=8D=A2=E8=84=9A=E6=9C=AC=E7=9A=84=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E5=91=BD=E4=BB=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/multilabelseg/README.md | 7 ++++--- configs/multilabelseg/README_cn.md | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/configs/multilabelseg/README.md b/configs/multilabelseg/README.md index 322aaf7f54..9ae964adfc 100644 --- a/configs/multilabelseg/README.md +++ b/configs/multilabelseg/README.md @@ -34,9 +34,10 @@ In the multi-label semantic segmentation task, the shape of the annotated graysc Download the raw data compression package of the UWMGI dataset and convert it to a format supported by PaddleSeg's [Dataset](../../paddleseg/datasets/dataset.py) API using the provided script. ```shell wget https://storage.googleapis.com/kaggle-competitions-data/kaggle-v2/27923/3495119/bundle/archive.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1693533809&Signature=ThCLjIYxSXfk85lCbZ5Cz2Ta4g8AjwJv0%2FgRpqpchlZLLYxk3XRnrZqappboha0moC7FuqllpwlLfCambQMbKoUjCLylVQqF0mEsn0IaJdYwprWYY%2F4FJDT2lG0HdQfAxJxlUPonXeZyZ4pZjOrrVEMprxuiIcM2kpGk35h7ry5ajkmdQbYmNQHFAJK2iO%2F4a8%2F543zhZRWsZZVbQJHid%2BjfO6ilLWiAGnMFpx4Sh2B01TUde9hBCwpxgJv55Gs0a4Z1KNsBRly6uqwgZFYfUBAejySx4RxFB7KEuRowDYuoaRT8NhSkzT2i7qqdZjgHxkFZJpRMUlDcf1RSJVkvEA%3D%3D&response-content-disposition=attachment%3B+filename%3Duw-madison-gi-tract-image-segmentation.zip -python tools/data/convert_uwmgi.py \ - ./uw-madison-gi-tract-image-segmentation.zip \ - ./data/UWMGI/ \ +python tools/data/convert_multilabel.py \ + --dataset_type uwmgi \ + --zip_input ./uw-madison-gi-tract-image-segmentation.zip \ + --output ./data/UWMGI/ \ --train_proportion 0.8 \ --val_proportion 0.2 # optional diff --git a/configs/multilabelseg/README_cn.md b/configs/multilabelseg/README_cn.md index 07653e2f03..2342d1ac0b 100644 --- a/configs/multilabelseg/README_cn.md +++ b/configs/multilabelseg/README_cn.md @@ -34,9 +34,10 @@ 下载UWMGI数据集的原始数据压缩包,并使用提供的脚本转换为PaddleSeg的[Dataset](../../paddleseg/datasets/dataset.py) API支持的格式。 ```shell wget https://storage.googleapis.com/kaggle-competitions-data/kaggle-v2/27923/3495119/bundle/archive.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1693533809&Signature=ThCLjIYxSXfk85lCbZ5Cz2Ta4g8AjwJv0%2FgRpqpchlZLLYxk3XRnrZqappboha0moC7FuqllpwlLfCambQMbKoUjCLylVQqF0mEsn0IaJdYwprWYY%2F4FJDT2lG0HdQfAxJxlUPonXeZyZ4pZjOrrVEMprxuiIcM2kpGk35h7ry5ajkmdQbYmNQHFAJK2iO%2F4a8%2F543zhZRWsZZVbQJHid%2BjfO6ilLWiAGnMFpx4Sh2B01TUde9hBCwpxgJv55Gs0a4Z1KNsBRly6uqwgZFYfUBAejySx4RxFB7KEuRowDYuoaRT8NhSkzT2i7qqdZjgHxkFZJpRMUlDcf1RSJVkvEA%3D%3D&response-content-disposition=attachment%3B+filename%3Duw-madison-gi-tract-image-segmentation.zip -python tools/data/convert_uwmgi.py \ - ./uw-madison-gi-tract-image-segmentation.zip \ - ./data/UWMGI/ \ +python tools/data/convert_multilabel.py \ + --dataset_type uwmgi \ + --zip_input ./uw-madison-gi-tract-image-segmentation.zip \ + --output ./data/UWMGI/ \ --train_proportion 0.8 \ --val_proportion 0.2 # 可选