1exports_files(["LICENSE"]) 2 3load( 4 "@org_tensorflow//third_party:common.bzl", 5 "template_rule", 6) 7 8_DNNL_COPTS_THREADPOOL = [ 9 "-fopenmp-simd", 10 "-fexceptions", 11 "-UUSE_MKL", 12 "-UUSE_CBLAS", 13] 14 15_DNNL_COPTS_OMP = [ 16 "-fopenmp", 17 "-fexceptions", 18 "-UUSE_MKL", 19 "-UUSE_CBLAS", 20] 21 22_DNNL_RUNTIME_THREADPOOL = { 23 "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_THREADPOOL", 24 "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_THREADPOOL", 25 "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE", 26 "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE", 27 "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL", 28 "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO", 29 "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA", 30 "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", 31 "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", 32 "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", 33 "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", 34 "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", 35 "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", 36 "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0", 37 "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0", 38 "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0", 39 "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0", 40 "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0", 41 "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0", 42 "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0", 43 "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0", 44 "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0", 45 "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0", 46 "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0", 47 "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0", 48 "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0", 49 "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0", 50 "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0", 51 "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0", 52 "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0", 53 "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0", 54 "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0", 55 "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 0", 56 "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0", 57 "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0", 58 "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0", 59 "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0", 60 "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0", 61 "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0", 62 "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0", 63 "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0", 64 "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0", 65 "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0", 66 "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0", 67} 68 69_DNNL_RUNTIME_OMP = { 70 "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP", 71 "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP", 72 "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE", 73 "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE", 74 "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL", 75 "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO", 76 "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA", 77 "#cmakedefine DNNL_SYCL_HIP": "#undef DNNL_SYCL_HIP", 78 "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER", 79 "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL", 80 "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1", 81 "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0", 82 "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1", 83 "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0", 84 "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0", 85 "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0", 86 "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0", 87 "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0", 88 "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0", 89 "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0", 90 "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0", 91 "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0", 92 "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0", 93 "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0", 94 "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0", 95 "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0", 96 "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0", 97 "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0", 98 "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0", 99 "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0", 100 "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0", 101 "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0", 102 "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 0", 103 "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0", 104 "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0", 105 "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0", 106 "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0", 107 "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0", 108 "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0", 109 "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0", 110 "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0", 111 "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0", 112 "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0", 113 "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0", 114} 115 116template_rule( 117 name = "dnnl_config_h", 118 src = "include/oneapi/dnnl/dnnl_config.h.in", 119 out = "include/oneapi/dnnl/dnnl_config.h", 120 substitutions = select({ 121 "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_aarch64_openmp": _DNNL_RUNTIME_OMP, 122 "//conditions:default": _DNNL_RUNTIME_THREADPOOL, 123 }), 124) 125 126template_rule( 127 name = "dnnl_version_h", 128 src = "include/oneapi/dnnl/dnnl_version.h.in", 129 out = "include/oneapi/dnnl/dnnl_version.h", 130 substitutions = { 131 "@DNNL_VERSION_MAJOR@": "2", 132 "@DNNL_VERSION_MINOR@": "6", 133 "@DNNL_VERSION_PATCH@": "0", 134 "@DNNL_VERSION_HASH@": "N/A", 135 }, 136) 137 138cc_library( 139 name = "mkl_dnn_acl", 140 srcs = glob( 141 [ 142 "src/common/*.cpp", 143 "src/cpu/**/*.cpp", 144 "src/cpu/*.cpp", 145 ], 146 exclude = [ 147 "src/cpu/x64/**", 148 ], 149 ), 150 copts = select({ 151 "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_aarch64_openmp": _DNNL_COPTS_OMP, 152 "//conditions:default": _DNNL_COPTS_THREADPOOL, 153 }), 154 defines = ["DNNL_AARCH64_USE_ACL=1"], 155 includes = [ 156 "include", 157 "src", 158 "src/common", 159 "src/cpu", 160 "src/cpu/aarch64/xbyak_aarch64/src", 161 "src/cpu/aarch64/xbyak_aarch64/xbyak_aarch64", 162 "src/cpu/gemm", 163 ], 164 linkopts = ["-lgomp"], 165 textual_hdrs = glob( 166 [ 167 "include/**/*", 168 "include/*", 169 "src/common/*.hpp", 170 "src/cpu/**/*.hpp", 171 "src/cpu/*.hpp", 172 "src/cpu/aarch64/xbyak_aarch64/**/*.h", 173 ], 174 ) + [ 175 ":dnnl_config_h", 176 ":dnnl_version_h", 177 ], 178 visibility = ["//visibility:public"], 179 deps = [ 180 "@compute_library//:arm_compute", 181 ], 182) 183