• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1exports_files(["LICENSE"])
2
3load(
4    "@org_tensorflow//third_party/mkl:build_defs.bzl",
5    "if_mkl",
6)
7load(
8    "@org_tensorflow//tensorflow:tensorflow.bzl",
9    "tf_openmp_copts",
10)
11load(
12    "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl",
13    "if_mkldnn_openmp",
14)
15load(
16    "@org_tensorflow//third_party/mkl:build_defs.bzl",
17    "if_mkl_ml",
18)
19load(
20    "@org_tensorflow//third_party:common.bzl",
21    "template_rule",
22)
23
24_DNNL_RUNTIME_OMP = {
25    "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
26    "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
27    "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE",
28    "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE",
29    "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL",
30    "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO",
31    "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA",
32    "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER",
33    "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL",
34    "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1",
35    "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0",
36    "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1",
37    "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0",
38    "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0",
39    "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0",
40    "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0",
41    "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0",
42    "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0",
43    "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0",
44    "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0",
45    "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0",
46    "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0",
47    "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0",
48    "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0",
49    "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0",
50    "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0",
51    "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0",
52    "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0",
53    "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0",
54    "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0",
55    "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0",
56    "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1",
57    "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0",
58    "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0",
59    "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0",
60    "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0",
61    "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0",
62    "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0",
63    "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0",
64    "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0",
65    "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0",
66    "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0",
67    "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0",
68}
69
70_DNNL_RUNTIME_THREADPOOL = {
71    "#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_THREADPOOL",
72    "#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_THREADPOOL",
73    "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE",
74    "#cmakedefine DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE": "#undef DNNL_USE_RT_OBJECTS_IN_PRIMITIVE_CACHE",
75    "#cmakedefine DNNL_WITH_SYCL": "#undef DNNL_WITH_SYCL",
76    "#cmakedefine DNNL_WITH_LEVEL_ZERO": "#undef DNNL_WITH_LEVEL_ZERO",
77    "#cmakedefine DNNL_SYCL_CUDA": "#undef DNNL_SYCL_CUDA",
78    "#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER",
79    "#cmakedefine DNNL_EXPERIMENTAL": "#undef DNNL_EXPERIMENTAL",
80    "#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1",
81    "#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0",
82    "#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1",
83    "#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0",
84    "#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0",
85    "#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0",
86    "#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0",
87    "#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0",
88    "#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0",
89    "#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0",
90    "#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0",
91    "#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0",
92    "#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0",
93    "#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0",
94    "#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0",
95    "#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0",
96    "#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0",
97    "#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0",
98    "#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0",
99    "#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0",
100    "#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0",
101    "#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0",
102    "#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1",
103    "#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0",
104    "#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0",
105    "#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0",
106    "#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0",
107    "#cmakedefine01 BUILD_PRIMITIVE_GPU_ISA_ALL": "#define BUILD_PRIMITIVE_GPU_ISA_ALL 0",
108    "#cmakedefine01 BUILD_GEN9": "#define BUILD_GEN9 0",
109    "#cmakedefine01 BUILD_GEN11": "#define BUILD_GEN11 0",
110    "#cmakedefine01 BUILD_XELP": "#define BUILD_XELP 0",
111    "#cmakedefine01 BUILD_XEHPG": "#define BUILD_XEHPG 0",
112    "#cmakedefine01 BUILD_XEHPC": "#define BUILD_XEHPC 0",
113    "#cmakedefine01 BUILD_XEHP": "#define BUILD_XEHP 0",
114}
115
116template_rule(
117    name = "dnnl_config_h",
118    src = "include/oneapi/dnnl/dnnl_config.h.in",
119    out = "include/oneapi/dnnl/dnnl_config.h",
120    substitutions = select({
121        "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_openmp": _DNNL_RUNTIME_OMP,
122        "//conditions:default": _DNNL_RUNTIME_THREADPOOL,
123    }),
124)
125
126# Create the file dnnl_version.h with DNNL version numbers.
127# Currently, the version numbers are hard coded here. If DNNL is upgraded then
128# the version numbers have to be updated manually. The version numbers can be
129# obtained from the PROJECT_VERSION settings in CMakeLists.txt. The variable is
130# set to "version_major.version_minor.version_patch". The git hash version can
131# be set to NA.
132# TODO(agramesh1): Automatically get the version numbers from CMakeLists.txt.
133template_rule(
134    name = "dnnl_version_h",
135    src = "include/oneapi/dnnl/dnnl_version.h.in",
136    out = "include/oneapi/dnnl/dnnl_version.h",
137    substitutions = {
138        "@DNNL_VERSION_MAJOR@": "2",
139        "@DNNL_VERSION_MINOR@": "6",
140        "@DNNL_VERSION_PATCH@": "0",
141        "@DNNL_VERSION_HASH@": "N/A",
142    },
143)
144
145_COPTS_LIST = select({
146    "@org_tensorflow//tensorflow:windows": [],
147    "//conditions:default": ["-fexceptions"],
148}) + [
149    "-UUSE_MKL",
150    "-UUSE_CBLAS",
151    "-DDNNL_ENABLE_MAX_CPU_ISA",
152    "-DDNNL_DISABLE_PRIMITIVE_CACHE",
153] + tf_openmp_copts()
154
155_INCLUDES_LIST = [
156    "include",
157    "src",
158    "src/common",
159    "src/common/ittnotify",
160    "src/cpu",
161    "src/cpu/gemm",
162    "src/cpu/x64/xbyak",
163]
164
165_TEXTUAL_HDRS_LIST = glob([
166    "include/**/*",
167    "src/common/*.hpp",
168    "src/common/ittnotify/**/*.h",
169    "src/cpu/*.hpp",
170    "src/cpu/**/*.hpp",
171    "src/cpu/jit_utils/**/*.hpp",
172    "src/cpu/x64/xbyak/*.h",
173]) + [
174    ":dnnl_config_h",
175    ":dnnl_version_h",
176]
177
178# Large autogen files take too long time to compile with usual optimization
179# flags. These files just generate binary kernels and are not the hot spots,
180# so we factor them out to lower compiler optimizations in ":dnnl_autogen".
181# Using -O1 to enable optimizations to reduce stack consumption. (With -O0,
182# compiler doesn't clean up stack from temporary objects.)
183cc_library(
184    name = "onednn_autogen",
185    srcs = glob(["src/cpu/x64/gemm/**/*_kern_autogen*.cpp"]),
186    copts = [
187        "-O1",
188        "-U_FORTIFY_SOURCE",
189    ] + _COPTS_LIST,
190    includes = _INCLUDES_LIST,
191    textual_hdrs = _TEXTUAL_HDRS_LIST,
192    visibility = ["//visibility:public"],
193)
194
195cc_library(
196    name = "mkl_dnn",
197    srcs = glob(
198        [
199            "src/common/*.cpp",
200            "src/cpu/*.cpp",
201            "src/cpu/**/*.cpp",
202            "src/common/ittnotify/*.c",
203            "src/cpu/jit_utils/**/*.cpp",
204        ],
205        exclude = [
206            "src/cpu/aarch64/**",
207            "src/cpu/x64/gemm/**/*_kern_autogen.cpp",
208        ],
209    ),
210    copts = _COPTS_LIST,
211    includes = _INCLUDES_LIST,
212    # TODO(penpornk): Use lrt_if_needed from tensorflow.bzl instead.
213    linkopts = select({
214        "@org_tensorflow//tensorflow:linux_aarch64": ["-lrt"],
215        "@org_tensorflow//tensorflow:linux_x86_64": ["-lrt"],
216        "@org_tensorflow//tensorflow:linux_ppc64le": ["-lrt"],
217        "//conditions:default": [],
218    }),
219    textual_hdrs = _TEXTUAL_HDRS_LIST,
220    visibility = ["//visibility:public"],
221    deps = [":onednn_autogen"] + if_mkl_ml(
222        ["@org_tensorflow//third_party/mkl:intel_binary_blob"],
223        [],
224    ),
225)
226