File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change 14
14
os .environ ["NCCL_HOSTID" ] = str (REPLICA_GROUP_ID )
15
15
16
16
USE_STREAMING = os .getenv ("USE_STREAMING" , "False" ) == "True"
17
+ USE_NCCL = os .getenv ("USE_NCCL" , "False" ) == "True"
17
18
18
19
import torch
19
20
import torch .nn .functional as F
@@ -60,19 +61,19 @@ def state_dict():
60
61
"outer_optim" : outer_optimizer .state_dict (),
61
62
}
62
63
63
- device = "cuda" if torch .cuda .is_available () else "cpu"
64
+ device = torch . device ( "cuda" if torch .cuda .is_available () else "cpu" )
64
65
pg = (
65
66
ProcessGroupNCCL (
66
67
timeout = timedelta (seconds = 10 ),
67
68
)
68
- if torch .cuda .is_available ()
69
+ if torch .cuda .is_available () and USE_NCCL
69
70
else ProcessGroupGloo (timeout = timedelta (seconds = 5 ))
70
71
)
71
72
72
73
transport = PGTransport (
73
74
pg ,
74
75
timeout = timedelta (seconds = 10 ),
75
- device = ( "cuda" if torch . cuda . is_available () else "cpu" ) ,
76
+ device = device ,
76
77
)
77
78
78
79
manager = Manager (
You can’t perform that action at this time.
0 commit comments