Skip to content

Commit f06c560

Browse files
committed
use global rank for flight recorder
1 parent af5a4ff commit f06c560

File tree

1 file changed

+1
-2
lines changed

1 file changed

+1
-2
lines changed

torchft/process_group.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def __init__(self, timeout: timedelta = timedelta(seconds=60.0)) -> None:
738738
self._use_abort: bool = torch.cuda.nccl.version() >= (2, 25)
739739

740740
self._errored: Optional[Exception] = None
741-
self._rank: int = 0
741+
self._rank: int = dist.get_rank()
742742

743743
NONBLOCKING_TIMEOUT_ENV = "TORCH_NCCL_NONBLOCKING_TIMEOUT"
744744
if NONBLOCKING_TIMEOUT_ENV not in os.environ:
@@ -788,7 +788,6 @@ def _create_pg(self, store: Store, rank: int, world_size: int) -> BaseProcessGro
788788
from torch.distributed import ProcessGroupNCCL as BaseProcessGroupNCCL
789789

790790
self._errored = None
791-
self._rank = rank
792791

793792
# pyre-fixme[16]: no attribute ProcessGroupNCCL
794793
opts = BaseProcessGroupNCCL.Options()

0 commit comments

Comments
 (0)