diff --git a/configs/dcn/yolov3_enhance_reader.yml b/configs/dcn/yolov3_enhance_reader.yml new file mode 100644 index 0000000000..228e5558aa --- /dev/null +++ b/configs/dcn/yolov3_enhance_reader.yml @@ -0,0 +1,104 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + use_fine_grained_loss: true + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false + - !NormalizeBox {} + - !PadBox + num_max_boxes: 50 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] + random_inter: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: False + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - !Gt2YoloTarget + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + downsample_ratios: [32, 16, 8] + batch_size: 8 + shuffle: true + drop_last: true + worker_num: 8 + bufsize: 32 + use_process: true + +EvalReader: + inputs_def: + image_shape: [3, 608, 608] + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 50 + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: false + - !ResizeImage + interp: 2 + target_size: 608 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: False + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + drop_empty: false + worker_num: 8 + bufsize: 32 + +TestReader: + inputs_def: + image_shape: [3, 608, 608] + fields: ['image', 'im_size', 'im_id'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: false + - !ResizeImage + interp: 2 + target_size: 608 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: False + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml similarity index 85% rename from configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml rename to configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml index bc067249c7..8a7fde7970 100755 --- a/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml +++ b/configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml @@ -1,12 +1,12 @@ architecture: YOLOv3 use_gpu: true -max_iters: 55000 +max_iters: 85000 log_smooth_window: 20 save_dir: output snapshot_iter: 10000 metric: COCO -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar -weights: output/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco/model_final +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar +weights: output/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco/model_final num_classes: 80 use_fine_grained_loss: true @@ -39,6 +39,7 @@ YOLOv3Head: nms_top_k: 1000 normalized: false score_threshold: 0.01 + drop_block: true YOLOv3Loss: batch_size: 8 @@ -58,8 +59,8 @@ LearningRate: - !PiecewiseDecay gamma: 0.1 milestones: - - 40000 - - 50000 + - 55000 + - 75000 - !LinearWarmup start_factor: 0. steps: 4000 @@ -72,4 +73,4 @@ OptimizerBuilder: factor: 0.0005 type: L2 -_READER_: '../yolov3_reader.yml' +_READER_: 'yolov3_enhance_reader.yml' diff --git a/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml new file mode 100755 index 0000000000..8d4e0200e4 --- /dev/null +++ b/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml @@ -0,0 +1,70 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 85000 +log_smooth_window: 20 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar +weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final +num_classes: 80 +use_fine_grained_loss: true + +YOLOv3: + backbone: ResNet + yolo_head: YOLOv3Head + use_fine_grained_loss: true + +ResNet: + norm_type: sync_bn + freeze_at: 0 + freeze_norm: false + norm_decay: 0. + depth: 50 + feature_maps: [3, 4, 5] + variant: d + dcn_v2_stages: [5] + +YOLOv3Head: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + yolo_loss: YOLOv3Loss + nms: + background_label: -1 + keep_top_k: 100 + nms_threshold: 0.45 + nms_top_k: 1000 + normalized: false + score_threshold: 0.01 + drop_block: true + +YOLOv3Loss: + batch_size: 8 + ignore_thresh: 0.7 + label_smooth: false + use_fine_grained_loss: true + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 55000 + - 75000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'yolov3_enhance_reader.yml' diff --git a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml index 9e66437407..7042d9538d 100755 --- a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml +++ b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml @@ -1,18 +1,19 @@ architecture: YOLOv3 use_gpu: true -max_iters: 55000 +max_iters: 85000 log_smooth_window: 20 save_dir: output snapshot_iter: 10000 metric: COCO -pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar -weights: output/yolov3_r50vd_dcn_obj365_pretrained_coco/model_final +pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar +weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final num_classes: 80 -use_fine_grained_loss: false +use_fine_grained_loss: true YOLOv3: backbone: ResNet yolo_head: YOLOv3Head + use_fine_grained_loss: true ResNet: norm_type: sync_bn @@ -43,6 +44,7 @@ YOLOv3Loss: batch_size: 8 ignore_thresh: 0.7 label_smooth: false + use_fine_grained_loss: true LearningRate: base_lr: 0.001 @@ -50,8 +52,8 @@ LearningRate: - !PiecewiseDecay gamma: 0.1 milestones: - - 40000 - - 50000 + - 55000 + - 75000 - !LinearWarmup start_factor: 0. steps: 4000 @@ -64,106 +66,4 @@ OptimizerBuilder: factor: 0.0005 type: L2 -TrainReader: - inputs_def: - fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] - num_max_boxes: 50 - dataset: - !COCODataSet - image_dir: train2017 - anno_path: annotations/instances_train2017.json - dataset_dir: dataset/coco - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - - !RandomCrop {} - - !RandomFlipImage - is_normalized: false - - !NormalizeBox {} - - !PadBox - num_max_boxes: 50 - - !BboxXYXY2XYWH {} - batch_transforms: - - !RandomShape - sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] - random_inter: True - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: False - is_channel_first: false - - !Permute - to_bgr: false - channel_first: True - # Gt2YoloTarget is only used when use_fine_grained_loss set as true, - # this operator will be deleted automatically if use_fine_grained_loss - # is set as false - - !Gt2YoloTarget - anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - anchors: [[10, 13], [16, 30], [33, 23], - [30, 61], [62, 45], [59, 119], - [116, 90], [156, 198], [373, 326]] - downsample_ratios: [32, 16, 8] - batch_size: 8 - shuffle: true - drop_last: true - worker_num: 8 - bufsize: 32 - use_process: true - -EvalReader: - inputs_def: - image_shape: [3, 608, 608] - fields: ['image', 'im_size', 'im_id'] - num_max_boxes: 50 - dataset: - !COCODataSet - dataset_dir: dataset/coco - anno_path: annotations/instances_val2017.json - image_dir: val2017 - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - with_mixup: false - - !ResizeImage - interp: 2 - target_size: 608 - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: False - is_channel_first: false - - !Permute - to_bgr: false - channel_first: True - batch_size: 8 - drop_empty: false - worker_num: 8 - bufsize: 32 - -TestReader: - inputs_def: - image_shape: [3, 608, 608] - fields: ['image', 'im_size', 'im_id'] - dataset: - !ImageFolder - anno_path: annotations/instances_val2017.json - with_background: false - sample_transforms: - - !DecodeImage - to_rgb: True - with_mixup: false - - !ResizeImage - interp: 2 - target_size: 608 - - !NormalizeImage - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - is_scale: True - is_channel_first: false - - !Permute - to_bgr: false - channel_first: True - batch_size: 1 +_READER_: 'yolov3_enhance_reader.yml' diff --git a/demo/dropblock.png b/demo/dropblock.png new file mode 100644 index 0000000000..66b8a88e39 Binary files /dev/null and b/demo/dropblock.png differ diff --git a/docs/featured_model/YOLOv3_ENHANCEMENT.md b/docs/featured_model/YOLOv3_ENHANCEMENT.md index a154ebc232..78694af6f9 100644 --- a/docs/featured_model/YOLOv3_ENHANCEMENT.md +++ b/docs/featured_model/YOLOv3_ENHANCEMENT.md @@ -1,6 +1,7 @@ # YOLOv3增强模型 --- + ## 简介 [YOLOv3](https://arxiv.org/abs/1804.02767) 是由 [Joseph Redmon](https://arxiv.org/search/cs?searchtype=author&query=Redmon%2C+J) 和 [Ali Farhadi](https://arxiv.org/search/cs?searchtype=author&query=Farhadi%2C+A) 提出的单阶段检测器, 该检测 @@ -8,12 +9,22 @@ PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3) 中提出的图像增强和label smooth等优化方法,精度优于darknet框架的实现版本,在COCO-2017数据集上,YOLOv3(DarkNet)达到`mAP(0.50:0.95)= 38.9`的精度,比darknet实现版本的精度(33.0)要高5.9。同时,在推断速度方面,基于Paddle预测库的加速方法,推断速度比darknet高30%。 -在此基础上,PaddleDetection对YOLOv3进一步改进,得到了更大的精度和速度优势。 +在此基础上,PaddleDetection对YOLOv3进一步改进,进一步提升了速度和精度,最终在COCO mAP上可以达到43.2。 ## 方法描述 -将YOLOv3骨架网络更换为ResNet50-vd,同时在最后一个Residual block中引入[Deformable convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作。另外,使用[object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型,进一步提高YOLOv3的精度。 +1.将[YOLOv3](https://arxiv.org/pdf/1804.02767.pdf)骨架网络更换为[ResNet50-VD](https://arxiv.org/pdf/1812.01187.pdf)。ResNet50-VD网络相比原生的DarkNet53网络在速度和精度上都有一定的优势,且相较DarkNet53 ResNet系列更容易扩展,针对自己业务场景可以选择ResNet18、34、101等不同结构作为检测模型的主干网络。 + +2.引入[Deformable Convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作,Deformable Convolution已经在多个视觉任务中广泛验证过其效果,在Yolo v3增强模型中考虑到速度与精度的平衡,我们仅使用Deformable Convolution替换了主干网络中Stage5部分的3x3卷积。 + +3.在FPN部分增加[DropBlock](https://arxiv.org/abs/1810.12890)模块,提高模型泛化能力。Dropout操作如下图(b)中所示是分类网络中广泛使用的增强模型泛化能力的重要手段之一。DropBlock算法相比于Dropout算法,在Drop特征的时候会集中Drop掉某一块区域,更适应于在检测任务中提高网络泛化能力。 + +![image-20200204141739840](../images/dropblock.png) + +4.Yolo v3作为一阶段检测网络,在定位精度上相比Faster RCNN,Cascade RCNN等网络结构有着其天然的劣势,增加[IoU Loss](https://arxiv.org/abs/1908.03851)分支,可以一定程度上提高BBox定位精度,缩小一阶段和两阶段检测网络的差距。 + +5.使用[Object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型,Object365数据集包含约60万张图片以及365种类别,相比coco数据集进行预训练可以进一步提高YOLOv3的精度。 ## 使用方法 @@ -21,15 +32,17 @@ PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Det ```bash export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -python tools/train.py -c configs/dcn/yolov3_r50vd_dcn.yml +python tools/train.py -c configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml ``` 更多模型参数请使用``python tools/train.py --help``查看,或参考[训练、评估及参数说明](../tutorials/GETTING_STARTED_cn.md)文档 ### 模型效果 -| 模型 | 预训练模型 | 验证集 mAP | P4预测速度 | 下载 | -| :---------------------:|:-----------------: | :-------------: | :----------------------:|:-----------------------------------------------------: | -| YOLOv3 DarkNet | [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar) | 38.9 | 原生:88.3ms
tensorRT-FP32: 42.5ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | -| YOLOv3 ResNet50_vd dcn | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 39.1 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) | -| YOLOv3 ResNet50_vd dcn | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar) | 41.4 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365.tar) | +| 模型 | 预训练模型 | 验证集 mAP | P4预测速度 | 下载 | +| :--------------------------------------: | :----------------------------------------------------------: | :--------: | :------------------------------------: | :----------------------------------------------------------: | +| YOLOv3 DarkNet | [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar) | 38.9 | 原生:88.3ms
tensorRT-FP32: 42.5ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) | +| YOLOv3 ResNet50_vd DCN | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 39.1 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) | +| YOLOv3 ResNet50_vd DCN | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 42.5 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365_v2.tar) | +| YOLOv3 ResNet50_vd DCN DropBlock | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 42.8 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) | +| YOLOv3 ResNet50_vd DCN DropBlock IoULoss | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 43.2 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) | diff --git a/docs/images/dropblock.png b/docs/images/dropblock.png new file mode 100644 index 0000000000..66b8a88e39 Binary files /dev/null and b/docs/images/dropblock.png differ