Lines Matching +full:nvidia +full:- +full:smi
6 # b) an increasing number of processes. This produces a 1-GPU baseline,
7 # an 8-GPU baseline (if applicable), as well as measurements for however
38 return allgather_object(proc.stdout.decode("utf-8"))
57 optimizer = optim.SGD(model.parameters(), 0.001, momentum=0.9, weight_decay=1e-4)
77 measurements.append(time.time() - start)
103 prefix = f"{len(ranks):4} GPUs -- {prefix}"
134 # Multi-machine benchmarks
196 parser.add_argument("--rank", type=int, default=os.environ["RANK"])
197 parser.add_argument("--world-size", type=int, required=True)
198 parser.add_argument("--distributed-backend", type=str, default="nccl")
199 parser.add_argument("--bucket-size", type=int, default=25)
200 parser.add_argument("--master-addr", type=str, required=True)
201 parser.add_argument("--master-port", type=str, required=True)
202 parser.add_argument("--model", type=str)
204 "--json", type=str, metavar="PATH", help="Write file with benchmark results"
220 output = allgather_run("nvidia-smi topo -m")
222 print('Output of "nvidia-smi topo -m" differs between machines')
226 print("-----------------------------------")
228 print("-----------------------------------")
235 print("--- nvidia-smi topo -m ---")
238 print("--------------------------")