1exports_files(["LICENSE"]) 2 3load( 4 "@org_tensorflow//third_party/mkl:build_defs.bzl", 5 "if_mkl", 6) 7load( 8 "@org_tensorflow//tensorflow:tensorflow.bzl", 9 "tf_openmp_copts", 10) 11load( 12 "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", 13 "if_mkldnn_openmp", 14) 15load( 16 "@org_tensorflow//third_party/mkl:build_defs.bzl", 17 "if_mkl_ml", 18) 19load( 20 "@org_tensorflow//third_party:common.bzl", 21 "template_rule", 22) 23 24_DNNL_RUNTIME_OMP = { 25 "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP", 26 "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP", 27 "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE", 28 "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE", 29 "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL", 30 "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO", 31 "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA", 32 "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", 33 "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", 34 "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", 35 "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", 36 "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", 37 "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0", 38 "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0", 39 "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0", 40 "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0", 41 "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0", 42 "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0", 43 "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0", 44 "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0", 45 "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0", 46 "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0", 47 "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0", 48 "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0", 49 "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0", 50 "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0", 51 "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0", 52 "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0", 53 "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0", 54 "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0", 55 "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0", 56 "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1", 57 "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0", 58 "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0", 59 "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0", 60 "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0", 61 "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0", 62 "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0", 63 "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0", 64 "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0", 65 "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0", 66 "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0", 67 "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0", 68} 69 70_DNNL_RUNTIME_THREADPOOL = { 71 "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_THREADPOOL", 72 "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_THREADPOOL", 73 "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE", 74 "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE", 75 "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL", 76 "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO", 77 "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA", 78 "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", 79 "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", 80 "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", 81 "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", 82 "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", 83 "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0", 84 "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0", 85 "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0", 86 "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0", 87 "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0", 88 "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0", 89 "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0", 90 "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0", 91 "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0", 92 "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0", 93 "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0", 94 "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0", 95 "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0", 96 "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0", 97 "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0", 98 "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0", 99 "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0", 100 "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0", 101 "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0", 102 "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1", 103 "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0", 104 "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0", 105 "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0", 106 "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0", 107 "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0", 108 "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0", 109 "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0", 110 "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0", 111 "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0", 112 "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0", 113 "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0", 114} 115 116template_rule( 117 name = "dnnl_config_h", 118 src = "include/oneapi/dnnl/dnnl_config.h.in", 119 out = "include/oneapi/dnnl/dnnl_config.h", 120 substitutions = select({ 121 "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_openmp": _DNNL_RUNTIME_OMP, 122 "//conditions:default": _DNNL_RUNTIME_THREADPOOL, 123 }), 124) 125 126# Create the file dnnl_version.h with DNNL version numbers. 127# Currently, the version numbers are hard coded here. If DNNL is upgraded then 128# the version numbers have to be updated manually. The version numbers can be 129# obtained from the PROJECT_VERSION settings in CMakeLists.txt. The variable is 130# set to "version_major.version_minor.version_patch". The git hash version can 131# be set to NA. 132# TODO(agramesh1): Automatically get the version numbers from CMakeLists.txt. 133template_rule( 134 name = "dnnl_version_h", 135 src = "include/oneapi/dnnl/dnnl_version.h.in", 136 out = "include/oneapi/dnnl/dnnl_version.h", 137 substitutions = { 138 "@DNNL_VERSION_MAJOR@": "2", 139 "@DNNL_VERSION_MINOR@": "6", 140 "@DNNL_VERSION_PATCH@": "0", 141 "@DNNL_VERSION_HASH@": "N/A", 142 }, 143) 144 145_COPTS_LIST = select({ 146 "@org_tensorflow//tensorflow:windows": [], 147 "//conditions:default": ["-fexceptions"], 148}) + [ 149 "-UUSE_MKL", 150 "-UUSE_CBLAS", 151 "-DDNNL_ENABLE_MAX_CPU_ISA", 152 "-DDNNL_DISABLE_PRIMITIVE_CACHE", 153] + tf_openmp_copts() 154 155_INCLUDES_LIST = [ 156 "include", 157 "src", 158 "src/common", 159 "src/common/ittnotify", 160 "src/cpu", 161 "src/cpu/gemm", 162 "src/cpu/x64/xbyak", 163] 164 165_TEXTUAL_HDRS_LIST = glob([ 166 "include/**/*", 167 "src/common/*.hpp", 168 "src/common/ittnotify/**/*.h", 169 "src/cpu/*.hpp", 170 "src/cpu/**/*.hpp", 171 "src/cpu/jit_utils/**/*.hpp", 172 "src/cpu/x64/xbyak/*.h", 173]) + [ 174 ":dnnl_config_h", 175 ":dnnl_version_h", 176] 177 178# Large autogen files take too long time to compile with usual optimization 179# flags. These files just generate binary kernels and are not the hot spots, 180# so we factor them out to lower compiler optimizations in ":dnnl_autogen". 181# Using -O1 to enable optimizations to reduce stack consumption. (With -O0, 182# compiler doesn't clean up stack from temporary objects.) 183cc_library( 184 name = "onednn_autogen", 185 srcs = glob(["src/cpu/x64/gemm/**/*_kern_autogen*.cpp"]), 186 copts = [ 187 "-O1", 188 "-U_FORTIFY_SOURCE", 189 ] + _COPTS_LIST, 190 includes = _INCLUDES_LIST, 191 textual_hdrs = _TEXTUAL_HDRS_LIST, 192 visibility = ["//visibility:public"], 193) 194 195cc_library( 196 name = "mkl_dnn", 197 srcs = glob( 198 [ 199 "src/common/*.cpp", 200 "src/cpu/*.cpp", 201 "src/cpu/**/*.cpp", 202 "src/common/ittnotify/*.c", 203 "src/cpu/jit_utils/**/*.cpp", 204 ], 205 exclude = [ 206 "src/cpu/aarch64/**", 207 "src/cpu/x64/gemm/**/*_kern_autogen.cpp", 208 ], 209 ), 210 copts = _COPTS_LIST, 211 includes = _INCLUDES_LIST, 212 # TODO(penpornk): Use lrt_if_needed from tensorflow.bzl instead. 213 linkopts = select({ 214 "@org_tensorflow//tensorflow:linux_aarch64": ["-lrt"], 215 "@org_tensorflow//tensorflow:linux_x86_64": ["-lrt"], 216 "@org_tensorflow//tensorflow:linux_ppc64le": ["-lrt"], 217 "//conditions:default": [], 218 }), 219 textual_hdrs = _TEXTUAL_HDRS_LIST, 220 visibility = ["//visibility:public"], 221 deps = [":onednn_autogen"] + if_mkl_ml( 222 ["@org_tensorflow//third_party/mkl:intel_binary_blob"], 223 [], 224 ), 225) 226