diff --git a/configs/dcn/yolov3_enhance_reader.yml b/configs/dcn/yolov3_enhance_reader.yml
new file mode 100644
index 0000000000..228e5558aa
--- /dev/null
+++ b/configs/dcn/yolov3_enhance_reader.yml
@@ -0,0 +1,104 @@
+TrainReader:
+ inputs_def:
+ fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+ num_max_boxes: 50
+ use_fine_grained_loss: true
+ dataset:
+ !COCODataSet
+ image_dir: train2017
+ anno_path: annotations/instances_train2017.json
+ dataset_dir: dataset/coco
+ with_background: false
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ - !RandomCrop {}
+ - !RandomFlipImage
+ is_normalized: false
+ - !NormalizeBox {}
+ - !PadBox
+ num_max_boxes: 50
+ - !BboxXYXY2XYWH {}
+ batch_transforms:
+ - !RandomShape
+ sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+ random_inter: True
+ - !NormalizeImage
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ is_scale: False
+ is_channel_first: false
+ - !Permute
+ to_bgr: false
+ channel_first: True
+ # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
+ # this operator will be deleted automatically if use_fine_grained_loss
+ # is set as false
+ - !Gt2YoloTarget
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ downsample_ratios: [32, 16, 8]
+ batch_size: 8
+ shuffle: true
+ drop_last: true
+ worker_num: 8
+ bufsize: 32
+ use_process: true
+
+EvalReader:
+ inputs_def:
+ image_shape: [3, 608, 608]
+ fields: ['image', 'im_size', 'im_id']
+ num_max_boxes: 50
+ dataset:
+ !COCODataSet
+ dataset_dir: dataset/coco
+ anno_path: annotations/instances_val2017.json
+ image_dir: val2017
+ with_background: false
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: false
+ - !ResizeImage
+ interp: 2
+ target_size: 608
+ - !NormalizeImage
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ is_scale: False
+ is_channel_first: false
+ - !Permute
+ to_bgr: false
+ channel_first: True
+ batch_size: 8
+ drop_empty: false
+ worker_num: 8
+ bufsize: 32
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 608, 608]
+ fields: ['image', 'im_size', 'im_id']
+ dataset:
+ !ImageFolder
+ anno_path: annotations/instances_val2017.json
+ with_background: false
+ sample_transforms:
+ - !DecodeImage
+ to_rgb: True
+ with_mixup: false
+ - !ResizeImage
+ interp: 2
+ target_size: 608
+ - !NormalizeImage
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ is_scale: False
+ is_channel_first: false
+ - !Permute
+ to_bgr: false
+ channel_first: True
+ batch_size: 1
diff --git a/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml
similarity index 85%
rename from configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
rename to configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml
index bc067249c7..8a7fde7970 100755
--- a/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco.yml
@@ -1,12 +1,12 @@
architecture: YOLOv3
use_gpu: true
-max_iters: 55000
+max_iters: 85000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
metric: COCO
-pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
-weights: output/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco/model_final
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
+weights: output/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco/model_final
num_classes: 80
use_fine_grained_loss: true
@@ -39,6 +39,7 @@ YOLOv3Head:
nms_top_k: 1000
normalized: false
score_threshold: 0.01
+ drop_block: true
YOLOv3Loss:
batch_size: 8
@@ -58,8 +59,8 @@ LearningRate:
- !PiecewiseDecay
gamma: 0.1
milestones:
- - 40000
- - 50000
+ - 55000
+ - 75000
- !LinearWarmup
start_factor: 0.
steps: 4000
@@ -72,4 +73,4 @@ OptimizerBuilder:
factor: 0.0005
type: L2
-_READER_: '../yolov3_reader.yml'
+_READER_: 'yolov3_enhance_reader.yml'
diff --git a/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml
new file mode 100755
index 0000000000..8d4e0200e4
--- /dev/null
+++ b/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml
@@ -0,0 +1,70 @@
+architecture: YOLOv3
+use_gpu: true
+max_iters: 85000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+metric: COCO
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
+weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final
+num_classes: 80
+use_fine_grained_loss: true
+
+YOLOv3:
+ backbone: ResNet
+ yolo_head: YOLOv3Head
+ use_fine_grained_loss: true
+
+ResNet:
+ norm_type: sync_bn
+ freeze_at: 0
+ freeze_norm: false
+ norm_decay: 0.
+ depth: 50
+ feature_maps: [3, 4, 5]
+ variant: d
+ dcn_v2_stages: [5]
+
+YOLOv3Head:
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ norm_decay: 0.
+ yolo_loss: YOLOv3Loss
+ nms:
+ background_label: -1
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ normalized: false
+ score_threshold: 0.01
+ drop_block: true
+
+YOLOv3Loss:
+ batch_size: 8
+ ignore_thresh: 0.7
+ label_smooth: false
+ use_fine_grained_loss: true
+
+LearningRate:
+ base_lr: 0.001
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 55000
+ - 75000
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
+
+_READER_: 'yolov3_enhance_reader.yml'
diff --git a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
index 9e66437407..7042d9538d 100755
--- a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
@@ -1,18 +1,19 @@
architecture: YOLOv3
use_gpu: true
-max_iters: 55000
+max_iters: 85000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
metric: COCO
-pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
-weights: output/yolov3_r50vd_dcn_obj365_pretrained_coco/model_final
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
+weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final
num_classes: 80
-use_fine_grained_loss: false
+use_fine_grained_loss: true
YOLOv3:
backbone: ResNet
yolo_head: YOLOv3Head
+ use_fine_grained_loss: true
ResNet:
norm_type: sync_bn
@@ -43,6 +44,7 @@ YOLOv3Loss:
batch_size: 8
ignore_thresh: 0.7
label_smooth: false
+ use_fine_grained_loss: true
LearningRate:
base_lr: 0.001
@@ -50,8 +52,8 @@ LearningRate:
- !PiecewiseDecay
gamma: 0.1
milestones:
- - 40000
- - 50000
+ - 55000
+ - 75000
- !LinearWarmup
start_factor: 0.
steps: 4000
@@ -64,106 +66,4 @@ OptimizerBuilder:
factor: 0.0005
type: L2
-TrainReader:
- inputs_def:
- fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
- num_max_boxes: 50
- dataset:
- !COCODataSet
- image_dir: train2017
- anno_path: annotations/instances_train2017.json
- dataset_dir: dataset/coco
- with_background: false
- sample_transforms:
- - !DecodeImage
- to_rgb: True
- - !RandomCrop {}
- - !RandomFlipImage
- is_normalized: false
- - !NormalizeBox {}
- - !PadBox
- num_max_boxes: 50
- - !BboxXYXY2XYWH {}
- batch_transforms:
- - !RandomShape
- sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
- random_inter: True
- - !NormalizeImage
- mean: [0.485, 0.456, 0.406]
- std: [0.229, 0.224, 0.225]
- is_scale: False
- is_channel_first: false
- - !Permute
- to_bgr: false
- channel_first: True
- # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
- # this operator will be deleted automatically if use_fine_grained_loss
- # is set as false
- - !Gt2YoloTarget
- anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
- anchors: [[10, 13], [16, 30], [33, 23],
- [30, 61], [62, 45], [59, 119],
- [116, 90], [156, 198], [373, 326]]
- downsample_ratios: [32, 16, 8]
- batch_size: 8
- shuffle: true
- drop_last: true
- worker_num: 8
- bufsize: 32
- use_process: true
-
-EvalReader:
- inputs_def:
- image_shape: [3, 608, 608]
- fields: ['image', 'im_size', 'im_id']
- num_max_boxes: 50
- dataset:
- !COCODataSet
- dataset_dir: dataset/coco
- anno_path: annotations/instances_val2017.json
- image_dir: val2017
- with_background: false
- sample_transforms:
- - !DecodeImage
- to_rgb: True
- with_mixup: false
- - !ResizeImage
- interp: 2
- target_size: 608
- - !NormalizeImage
- mean: [0.485, 0.456, 0.406]
- std: [0.229, 0.224, 0.225]
- is_scale: False
- is_channel_first: false
- - !Permute
- to_bgr: false
- channel_first: True
- batch_size: 8
- drop_empty: false
- worker_num: 8
- bufsize: 32
-
-TestReader:
- inputs_def:
- image_shape: [3, 608, 608]
- fields: ['image', 'im_size', 'im_id']
- dataset:
- !ImageFolder
- anno_path: annotations/instances_val2017.json
- with_background: false
- sample_transforms:
- - !DecodeImage
- to_rgb: True
- with_mixup: false
- - !ResizeImage
- interp: 2
- target_size: 608
- - !NormalizeImage
- mean: [0.485, 0.456, 0.406]
- std: [0.229, 0.224, 0.225]
- is_scale: True
- is_channel_first: false
- - !Permute
- to_bgr: false
- channel_first: True
- batch_size: 1
+_READER_: 'yolov3_enhance_reader.yml'
diff --git a/demo/dropblock.png b/demo/dropblock.png
new file mode 100644
index 0000000000..66b8a88e39
Binary files /dev/null and b/demo/dropblock.png differ
diff --git a/docs/featured_model/YOLOv3_ENHANCEMENT.md b/docs/featured_model/YOLOv3_ENHANCEMENT.md
index a154ebc232..78694af6f9 100644
--- a/docs/featured_model/YOLOv3_ENHANCEMENT.md
+++ b/docs/featured_model/YOLOv3_ENHANCEMENT.md
@@ -1,6 +1,7 @@
# YOLOv3增强模型
---
+
## 简介
[YOLOv3](https://arxiv.org/abs/1804.02767) 是由 [Joseph Redmon](https://arxiv.org/search/cs?searchtype=author&query=Redmon%2C+J) 和 [Ali Farhadi](https://arxiv.org/search/cs?searchtype=author&query=Farhadi%2C+A) 提出的单阶段检测器, 该检测
@@ -8,12 +9,22 @@
PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3) 中提出的图像增强和label smooth等优化方法,精度优于darknet框架的实现版本,在COCO-2017数据集上,YOLOv3(DarkNet)达到`mAP(0.50:0.95)= 38.9`的精度,比darknet实现版本的精度(33.0)要高5.9。同时,在推断速度方面,基于Paddle预测库的加速方法,推断速度比darknet高30%。
-在此基础上,PaddleDetection对YOLOv3进一步改进,得到了更大的精度和速度优势。
+在此基础上,PaddleDetection对YOLOv3进一步改进,进一步提升了速度和精度,最终在COCO mAP上可以达到43.2。
## 方法描述
-将YOLOv3骨架网络更换为ResNet50-vd,同时在最后一个Residual block中引入[Deformable convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作。另外,使用[object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型,进一步提高YOLOv3的精度。
+1.将[YOLOv3](https://arxiv.org/pdf/1804.02767.pdf)骨架网络更换为[ResNet50-VD](https://arxiv.org/pdf/1812.01187.pdf)。ResNet50-VD网络相比原生的DarkNet53网络在速度和精度上都有一定的优势,且相较DarkNet53 ResNet系列更容易扩展,针对自己业务场景可以选择ResNet18、34、101等不同结构作为检测模型的主干网络。
+
+2.引入[Deformable Convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作,Deformable Convolution已经在多个视觉任务中广泛验证过其效果,在Yolo v3增强模型中考虑到速度与精度的平衡,我们仅使用Deformable Convolution替换了主干网络中Stage5部分的3x3卷积。
+
+3.在FPN部分增加[DropBlock](https://arxiv.org/abs/1810.12890)模块,提高模型泛化能力。Dropout操作如下图(b)中所示是分类网络中广泛使用的增强模型泛化能力的重要手段之一。DropBlock算法相比于Dropout算法,在Drop特征的时候会集中Drop掉某一块区域,更适应于在检测任务中提高网络泛化能力。
+
+![image-20200204141739840](../images/dropblock.png)
+
+4.Yolo v3作为一阶段检测网络,在定位精度上相比Faster RCNN,Cascade RCNN等网络结构有着其天然的劣势,增加[IoU Loss](https://arxiv.org/abs/1908.03851)分支,可以一定程度上提高BBox定位精度,缩小一阶段和两阶段检测网络的差距。
+
+5.使用[Object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型,Object365数据集包含约60万张图片以及365种类别,相比coco数据集进行预训练可以进一步提高YOLOv3的精度。
## 使用方法
@@ -21,15 +32,17 @@ PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Det
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-python tools/train.py -c configs/dcn/yolov3_r50vd_dcn.yml
+python tools/train.py -c configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
```
更多模型参数请使用``python tools/train.py --help``查看,或参考[训练、评估及参数说明](../tutorials/GETTING_STARTED_cn.md)文档
### 模型效果
-| 模型 | 预训练模型 | 验证集 mAP | P4预测速度 | 下载 |
-| :---------------------:|:-----------------: | :-------------: | :----------------------:|:-----------------------------------------------------: |
-| YOLOv3 DarkNet | [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar) | 38.9 | 原生:88.3ms
tensorRT-FP32: 42.5ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) |
-| YOLOv3 ResNet50_vd dcn | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 39.1 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) |
-| YOLOv3 ResNet50_vd dcn | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar) | 41.4 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365.tar) |
+| 模型 | 预训练模型 | 验证集 mAP | P4预测速度 | 下载 |
+| :--------------------------------------: | :----------------------------------------------------------: | :--------: | :------------------------------------: | :----------------------------------------------------------: |
+| YOLOv3 DarkNet | [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar) | 38.9 | 原生:88.3ms
tensorRT-FP32: 42.5ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) |
+| YOLOv3 ResNet50_vd DCN | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 39.1 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) |
+| YOLOv3 ResNet50_vd DCN | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 42.5 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365_v2.tar) |
+| YOLOv3 ResNet50_vd DCN DropBlock | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 42.8 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) |
+| YOLOv3 ResNet50_vd DCN DropBlock IoULoss | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) | 43.2 | 原生:74.4ms
tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) |
diff --git a/docs/images/dropblock.png b/docs/images/dropblock.png
new file mode 100644
index 0000000000..66b8a88e39
Binary files /dev/null and b/docs/images/dropblock.png differ