1# NVIDIA NCCL 2 2# A package of optimized primitives for collective multi-GPU communication. 3 4licenses(["notice"]) 5 6exports_files(["LICENSE.txt"]) 7 8load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library") 9load( 10 "@local_config_nccl//:build_defs.bzl", 11 "cuda_rdc_library", 12 "gen_device_srcs", 13) 14 15cc_library( 16 name = "src_hdrs", 17 hdrs = [ 18 "src/include/collectives.h", 19 "src/nccl.h", 20 ], 21 strip_include_prefix = "src", 22) 23 24cc_library( 25 name = "include_hdrs", 26 hdrs = glob(["src/include/**"]), 27 strip_include_prefix = "src/include", 28 deps = ["@local_config_cuda//cuda:cuda_headers"], 29) 30 31cc_library( 32 name = "device_hdrs", 33 hdrs = glob(["src/collectives/device/*.h"]), 34 strip_include_prefix = "src/collectives/device", 35) 36 37# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines. 38# RDC compilation requires that each compiled module has a unique ID. Clang 39# derives the module ID from the path only so we need to copy the files to get 40# different IDs for different parts of compilation. NVCC does not have that 41# problem because it generates IDs based on preprocessed content. 42gen_device_srcs( 43 name = "device_srcs", 44 srcs = [ 45 "src/collectives/device/all_gather.cu.cc", 46 "src/collectives/device/all_reduce.cu.cc", 47 "src/collectives/device/broadcast.cu.cc", 48 "src/collectives/device/reduce.cu.cc", 49 "src/collectives/device/reduce_scatter.cu.cc", 50 "src/collectives/device/sendrecv.cu.cc", 51 ], 52) 53 54cuda_rdc_library( 55 name = "device", 56 srcs = [ 57 "src/collectives/device/functions.cu.cc", 58 ":device_srcs", 59 ] + glob([ 60 # Required for header inclusion checking, see below for details. 61 "src/collectives/device/*.h", 62 "src/nccl.h", 63 ]), 64 deps = [ 65 ":device_hdrs", 66 ":include_hdrs", 67 ":src_hdrs", 68 "@local_config_cuda//cuda:cuda_headers", 69 ], 70) 71 72# Primary NCCL target. 73# 74# This needs to be cuda_library instead of cc_library so that clang uses the 75# correct name for kernel host stubs (function pointers to initialize ncclKerns 76# in enqueue.cc) after https://reviews.llvm.org/D68578. 77cuda_library( 78 name = "nccl", 79 srcs = glob( 80 include = [ 81 "src/**/*.cc", 82 # Required for header inclusion checking, see below for details. 83 "src/graph/*.h", 84 ], 85 # Exclude device-library code. 86 exclude = ["src/collectives/device/**"], 87 ) + [ 88 # Required for header inclusion checking (see 89 # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs). 90 # Files in src/ which #include "nccl.h" load it from there rather than 91 # from the virtual includes directory. 92 "src/include/collectives.h", 93 "src/nccl.h", 94 ], 95 hdrs = ["src/nccl.h"], 96 include_prefix = "third_party/nccl", 97 linkopts = select({ 98 "@org_tensorflow//tensorflow:macos": [], 99 "//conditions:default": ["-lrt"], 100 }), 101 strip_include_prefix = "src", 102 visibility = ["//visibility:public"], 103 deps = [ 104 ":device", 105 ":include_hdrs", 106 ":src_hdrs", 107 ], 108) 109