Skip to content

RuntimeError:DataLoader worker (pid 33959) is killed by signal: Killed.indices should be either on cpu or on the same device as the indexed tensor (cpu) #249

@deffery

Description

@deffery

#I encountered this problem during the training of stage 2.
Traceback (most recent call last):
File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1134, in _try_get_data
data = self._data_queue.get(timeout=timeout)
File "/opt/conda/lib/python3.8/multiprocessing/queues.py", line 107, in get
if not self._poll(timeout):
File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 257, in poll
return self._poll(timeout)
File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 424, in _poll
r = wait([self], timeout)
File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 936, in wait
timeout = deadline - time.monotonic()
File "/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 33959) is killed by signal: Killed.
Traceback (most recent call last):
File "/home/gpu4/UniAD/./tools/train.py", line 256, in
main()
File "/home/gpu4/UniAD/./tools/train.py", line 245, in main
custom_train_model(
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/apis/train.py", line 21, in custom_train_model
custom_train_detector(
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/apis/mmdet_train.py", line 194, in custom_train_detector
runner.run(data_loaders, cfg.workflow)
File "/home/gpu4/mmcv/mmcv/runner/epoch_based_runner.py", line 136, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/gpu4/mmcv/mmcv/runner/epoch_based_runner.py", line 53, in train
self.run_iter(data_batch, train_mode=True, **kwargs)
File "/home/gpu4/mmcv/mmcv/runner/epoch_based_runner.py", line 31, in run_iter
outputs = self.model.train_step(data_batch, self.optimizer,
File "/home/gpu4/mmcv/mmcv/parallel/distributed.py", line 63, in train_step
output = self.module.train_step(*inputs[0], **kwargs[0])
File "/home/gpu4/miniconda3/envs/uniad2.0/lib/python3.9/site-packages/mmdet/models/detectors/base.py", line 248, in train_step
losses = self(**data)
File "/home/gpu4/miniconda3/envs/uniad2.0/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/detectors/uniad_e2e.py", line 81, in forward
return self.forward_train(**kwargs)
File "/home/gpu4/mmcv/mmcv/runner/fp16_utils.py", line 116, in new_func
return old_func(*args, **kwargs)
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/detectors/uniad_e2e.py", line 187, in forward_train
ret_dict_motion = self.motion_head.forward_train(bev_embed,
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/dense_heads/motion_head.py", line 137, in forward_train
losses = self.loss(*loss_inputs)
File "/home/gpu4/mmcv/mmcv/runner/fp16_utils.py", line 205, in new_func
return old_func(*args, **kwargs)
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/dense_heads/motion_head.py", line 416, in loss
gt_fut_traj_all, gt_fut_traj_mask_all = self.compute_matched_gt_traj(
File "/home/gpu4/UniAD/projects/mmdet3d_plugin/uniad/dense_heads/motion_head.py", line 475, in compute_matched_gt_traj
bboxes = track_bbox_results[i][0].tensor[valid_traj_masks]
RuntimeError: indices should be either on cpu or on the same device as the indexed tensor (cpu)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions