Skip to content

Commit fef4abc

Browse files
committed
use http transport
Summary: use http transport instead of pg transport -- pg transport fails to resolve address when running locally
1 parent 949a981 commit fef4abc

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

train_diloco.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
ProcessGroupGloo,
3535
ProcessGroupNCCL,
3636
)
37-
from torchft.checkpointing.pg_transport import PGTransport
37+
from torchft.checkpointing.http_transport import HTTPTransport
3838
from torchft.local_sgd import DiLoCo
3939

4040
logging.basicConfig(level=logging.INFO)
@@ -67,13 +67,12 @@ def state_dict():
6767
timeout=timedelta(seconds=10),
6868
)
6969
if torch.cuda.is_available() and USE_NCCL
70-
else ProcessGroupGloo(timeout=timedelta(seconds=5))
70+
else ProcessGroupGloo(timeout=timedelta(seconds=10))
7171
)
7272

73-
transport = PGTransport(
74-
pg,
73+
transport = HTTPTransport(
7574
timeout=timedelta(seconds=10),
76-
device=device,
75+
num_chunks=0,
7776
)
7877

7978
manager = Manager(

0 commit comments

Comments
 (0)