Skip to content

Commit

Permalink
fix write by a single thread
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Dec 30, 2020
1 parent 919f40b commit 44183ae
Showing 1 changed file with 6 additions and 14 deletions.
20 changes: 6 additions & 14 deletions python/tvm/topi/cuda/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,16 +532,11 @@ def nms_inner_loop(ib, j):
# The box j is valid, invalidate other boxes that overlap with j above iou_threshold

# When return_indices is False, no need to populate box_indices
# Only one thread needs to this write
if return_indices:
orig_idx = sorted_index[i * num_anchors + j]
box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx]

# TODO(masahi): Want to do this instead of above, but the following is eliminated
# during codegen
# # Only one thread needs to this write
# with ib.if_scope(tx == 0):
# orig_idx = sorted_index[i * num_anchors + j]
# box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx]
with ib.if_scope(tx + 0 == 0):
orig_idx = sorted_index[i * num_anchors + j]
box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx]

num_valid_boxes_local[0] += 1

Expand Down Expand Up @@ -593,11 +588,8 @@ def nms_inner_loop(ib, j):
with ib.else_scope():
nms_inner_loop(ib, j)

num_valid_boxes[i] = num_valid_boxes_local[0]
# TODO(masahi): Want to do this instead of above, but the following is eliminated
# during codegen
# with ib.if_scope(tx == 0):
# num_valid_boxes[i] = num_valid_boxes_local[0]
with ib.if_scope(tx + 0 == 0):
num_valid_boxes[i] = num_valid_boxes_local[0]

with ib.else_scope():
num_valid_boxes[i] = 0
Expand Down

0 comments on commit 44183ae

Please sign in to comment.