You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Nov 1, 2024. It is now read-only.
when trainning, raise error:
ERROR: Unexpected segmentation fault encountered in worker.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 986, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/usr/lib/python3.6/queue.py", line 173, in get
self.not_empty.wait(remaining)
File "/usr/lib/python3.6/threading.py", line 299, in wait
gotit = waiter.acquire(True, timeout)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 624) is killed by signal: Segmentation fault.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/workspace/pycls/train1.0.1/train_net.py", line 24, in
main()
File "/workspace/pycls/train1.0.1/train_net.py", line 20, in main
dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model)
File "/workspace/pycls/train1.0.1/pycls/core/distributed.py", line 146, in multi_proc_run
fun(*fun_args, **fun_kwargs)
File "/workspace/pycls/train1.0.1/pycls/core/trainer.py", line 185, in train_model
train_epoch(*params, cur_epoch)
File "/workspace/pycls/train1.0.1/pycls/core/trainer.py", line 88, in train_epoch
for cur_iter, (inputs, labels) in enumerate(loader):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1182, in _next_data
idx, data = self._get_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1138, in _get_data
success, data = self._try_get_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 999, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 624) exited unexpectedly
The text was updated successfully, but these errors were encountered:
when trainning, raise error:
ERROR: Unexpected segmentation fault encountered in worker.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 986, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/usr/lib/python3.6/queue.py", line 173, in get
self.not_empty.wait(remaining)
File "/usr/lib/python3.6/threading.py", line 299, in wait
gotit = waiter.acquire(True, timeout)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 624) is killed by signal: Segmentation fault.
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/workspace/pycls/train1.0.1/train_net.py", line 24, in
main()
File "/workspace/pycls/train1.0.1/train_net.py", line 20, in main
dist.multi_proc_run(num_proc=cfg.NUM_GPUS, fun=trainer.train_model)
File "/workspace/pycls/train1.0.1/pycls/core/distributed.py", line 146, in multi_proc_run
fun(*fun_args, **fun_kwargs)
File "/workspace/pycls/train1.0.1/pycls/core/trainer.py", line 185, in train_model
train_epoch(*params, cur_epoch)
File "/workspace/pycls/train1.0.1/pycls/core/trainer.py", line 88, in train_epoch
for cur_iter, (inputs, labels) in enumerate(loader):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1182, in _next_data
idx, data = self._get_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 1138, in _get_data
success, data = self._try_get_data()
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 999, in _try_get_data
raise RuntimeError('DataLoader worker (pid(s) {}) exited unexpectedly'.format(pids_str)) from e
RuntimeError: DataLoader worker (pid(s) 624) exited unexpectedly
The text was updated successfully, but these errors were encountered: