Add support for cpu-only mode. Also enable use of TF's work sharders.

smallcorgi · Nov 21, 2016 · a6db9e0 · a6db9e0
1 parent 3098ddc
commit a6db9e0
Show file tree

Hide file tree

Showing 9 changed files with 311 additions and 132 deletions.
diff --git a/README.md b/README.md
@@ -85,8 +85,9 @@ The demo performs detection using a VGG16 network trained for detection on PASCA
 6. Run script to train and test model
 	```Shell
 	cd $FRCN_ROOT
-	./experiments/scripts/faster_rcnn_end2end.sh GPU_ID VGG16 pascal_voc
+	./experiments/scripts/faster_rcnn_end2end.sh $DEVICE $DEVICE_ID VGG16 pascal_voc
 	```
+  DEVICE is either cpu/gpu
 
 ### The result of testing on PASCAL VOC 2007 
 

diff --git a/experiments/scripts/faster_rcnn_end2end.sh b/experiments/scripts/faster_rcnn_end2end.sh
@@ -12,14 +12,14 @@ set -e
 
 export PYTHONUNBUFFERED="True"
 
-GPU_ID=$1
-NET=$2
-NET_lc=${NET,,}
-DATASET=$3
+DEV=$1
+DEV_ID=$2
+NET=$3
+DATASET=$4
 
 array=( $@ )
 len=${#array[@]}
-EXTRA_ARGS=${array[@]:3:$len}
+EXTRA_ARGS=${array[@]:4:$len}
 EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_}
 
 case $DATASET in
@@ -48,7 +48,7 @@ LOG="experiments/logs/faster_rcnn_end2end_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'
 exec &> >(tee -a "$LOG")
 echo Logging output to "$LOG"
 
-time python ./tools/train_net.py --gpu ${GPU_ID} \
+time python ./tools/train_net.py --device ${DEV} --device_id ${DEV_ID} \
   --weights data/pretrain_model/VGG_imagenet.npy \
   --imdb ${TRAIN_IMDB} \
   --iters ${ITERS} \
@@ -60,7 +60,7 @@ set +x
 NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'`
 set -x
 
-time python ./tools/test_net.py --gpu ${GPU_ID} \
+time python ./tools/test_net.py --device ${DEV} --device_id ${DEV_ID} \
   --weights ${NET_FINAL} \
   --imdb ${TEST_IMDB} \
   --cfg experiments/cfgs/faster_rcnn_end2end.yml \

diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py
@@ -19,6 +19,7 @@
 import os
 import os.path as osp
 import numpy as np
+from distutils import spawn
 # `pip install easydict` if you don't have it
 from easydict import EasyDict as edict
 
@@ -225,11 +226,15 @@
 # Place outputs under an experiments directory
 __C.EXP_DIR = 'default'
 
-# Use GPU implementation of non-maximum suppression
-__C.USE_GPU_NMS = True
 
-# Default GPU device id
-__C.GPU_ID = 0
+if spawn.find_executable("nvcc"):
+    # Use GPU implementation of non-maximum suppression
+    __C.USE_GPU_NMS = True
+
+    # Default GPU device id
+    __C.GPU_ID = 0
+else:
+    __C.USE_GPU_NMS = False
 
 
 def get_output_dir(imdb, weights_filename):

diff --git a/lib/fast_rcnn/nms_wrapper.py b/lib/fast_rcnn/nms_wrapper.py
@@ -6,7 +6,8 @@
 # --------------------------------------------------------
 
 from fast_rcnn.config import cfg
-from nms.gpu_nms import gpu_nms
+if cfg.USE_GPU_NMS:
+    from nms.gpu_nms import gpu_nms
 from nms.cpu_nms import cpu_nms
 
 def nms(dets, thresh, force_cpu=False):

diff --git a/lib/make.sh b/lib/make.sh
@@ -1,15 +1,27 @@
 TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
-echo $TF_INC
 
 CUDA_PATH=/usr/local/cuda/
+CXXFLAGS=''
+
+if [[ "$OSTYPE" == "darwin"* ]]; then
+	CXXFLAGS+='-undefined dynamic_lookup'
+fi
 
 cd roi_pooling_layer
 
-nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \
-	-I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
+if [ -d "$CUDA_PATH" ]; then
+	nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \
+		-I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CXXFLAGS \
+		-arch=sm_37
+
+	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
+		roi_pooling_op.cu.o -I $TF_INC  -D GOOGLE_CUDA=1 -fPIC $CXXFLAGS \
+		-lcudart -L $CUDA_PATH/lib64
+else
+	g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
+		-I $TF_INC -fPIC $CXXFLAGS
+fi
 
-g++ -std=c++11 -shared -o roi_pooling.so roi_pooling_op.cc \
-	roi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64
 cd ..
 
 #cd feature_extrapolating_layer