Skip to content

Commit

Permalink
revert "chore(core): update launch backend to subprocess (#158)" (#176)
Browse files Browse the repository at this point in the history
This reverts commit 9ac889 to fix training bug
  • Loading branch information
FateScript authored Jul 26, 2021
1 parent 44d837d commit 8ba73ad
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 33 deletions.
6 changes: 3 additions & 3 deletions tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from yolox.core import Trainer, launch
from yolox.exp import get_exp
from yolox.utils import configure_nccl, get_num_devices
from yolox.utils import configure_nccl


def make_parser():
Expand Down Expand Up @@ -106,8 +106,8 @@ def main(exp, args):
exp = get_exp(args.exp_file, args.name)
exp.merge(args.opts)

num_gpu = get_num_devices() if args.devices is None else args.devices
assert num_gpu <= get_num_devices()
num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
assert num_gpu <= torch.cuda.device_count()

dist_url = "auto" if args.dist_url is None else args.dist_url
launch(
Expand Down
20 changes: 9 additions & 11 deletions yolox/core/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,15 @@ def launch(
port = _find_free_port()
dist_url = f"tcp://127.0.0.1:{port}"

processes = []
for rank in range(num_gpus_per_machine):
p = mp.Process(
target=_distributed_worker,
args=(
rank, main_func, world_size, num_gpus_per_machine,
machine_rank, backend, dist_url, args))
p.start()
processes.append(p)
for p in processes:
p.join()
mp.spawn(
_distributed_worker,
nprocs=num_gpus_per_machine,
args=(
main_func, world_size, num_gpus_per_machine,
machine_rank, backend, dist_url, args
),
daemon=False,
)
else:
main_func(*args)

Expand Down
2 changes: 1 addition & 1 deletion yolox/data/datasets/mosaicdetection.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,4 @@ def mixup(self, origin_img, origin_labels, input_dim):
origin_img = origin_img.astype(np.float32)
origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)

return origin_img, origin_labels
return origin_img.astype(np.uint8), origin_labels
10 changes: 1 addition & 9 deletions yolox/utils/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

__all__ = [
"filter_box", "postprocess", "bboxes_iou", "matrix_iou",
"adjust_box_anns", "xyxy2xywh", "xyxy2cxcywh",
"adjust_box_anns", "xyxy2xywh",
]


Expand Down Expand Up @@ -113,11 +113,3 @@ def xyxy2xywh(bboxes):
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
return bboxes


def xyxy2cxcywh(bboxes):
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
return bboxes
7 changes: 0 additions & 7 deletions yolox/utils/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,12 @@
__all__ = [
"AverageMeter",
"MeterBuffer",
"get_num_devices",
"get_total_and_free_memory_in_Mb",
"occupy_mem",
"gpu_mem_usage",
]


def get_num_devices():
devices_list_info = os.popen("nvidia-smi -L")
devices_list_info = devices_list_info.read().strip().split("\n")
return len(devices_list_info)


def get_total_and_free_memory_in_Mb(cuda_device):
devices_info_str = os.popen(
"nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
Expand Down
2 changes: 0 additions & 2 deletions yolox/utils/setup_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,3 @@ def configure_module(ulimit_value=8192):
except Exception:
# cv2 version mismatch might rasie exceptions.
pass

os.environ["OMP_NUM_THREADS"] = str(1)

0 comments on commit 8ba73ad

Please sign in to comment.