• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From a80a695b3f6ce3118cc01c31359762cfac35f02d Mon Sep 17 00:00:00 2001
2From: zhangyanhui <zhangyanhui17@huawei.com>
3Date: Thu, 13 Jun 2024 14:23:56 +0800
4Subject: [PATCH] 0023-support-x86-emulator-build
5
6---
7 .../plugin/device/cpu/kernel/nnacl/BUILD.gn   | 92 +++++++++++++++----
8 mindspore/lite/BUILD.gn                       | 60 ++++++------
9 mindspore/lite/src/common/thread_utils.cc     |  2 +-
10 mindspore/lite/src/litert/kernel/cpu/BUILD.gn | 28 +++++-
11 .../cpu/fp32/convolution_delegate_fp32.cc     |  2 +
12 ...volution_depthwise_slidewindow_x86_fp32.cc |  4 +-
13 ...nvolution_depthwise_slidewindow_x86_fp32.h |  2 +-
14 7 files changed, 138 insertions(+), 52 deletions(-)
15
16diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
17index d27817be..387a675a 100644
18--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
19+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
20@@ -46,7 +46,6 @@ config("nnacl_public_config") {
21     }
22   } else if (target_cpu == "x86_64") {
23     cflags_c += [
24-      "-mavx512f",
25       "-mavx",
26       "-mavx2",
27       "-mfma",
28@@ -56,8 +55,16 @@ config("nnacl_public_config") {
29     defines += [
30       "ENABLE_SSE",
31       "ENABLE_AVX",
32-      "ENABLE_AVX512",
33     ]
34+    # emulator not support avx512
35+    if (!is_emulator) {
36+      cflags_c += [
37+        "-mavx512f",
38+      ]
39+      defines += [
40+        "ENABLE_AVX512",
41+      ]
42+    }
43   }
44 }
45
46@@ -102,7 +109,6 @@ c_kernel_sources = [
47   "kernel/convolution_depthwise_sw.c",
48   "kernel/convolution_im2col_arm32.c",
49   "kernel/convolution_im2col_arm64.c",
50-  "kernel/convolution_im2col_avx512.c",
51   "kernel/convolution_im2col_avx.c",
52   "kernel/convolution_im2col_base.c",
53   "kernel/convolution_im2col.c",
54@@ -136,7 +142,6 @@ c_kernel_sources = [
55   "kernel/log_softmax.c",
56   "kernel/matmul_arm32.c",
57   "kernel/matmul_arm64.c",
58-  "kernel/matmul_avx512.c",
59   "kernel/matmul_avx.c",
60   "kernel/matmul_base.c",
61   "kernel/matmul.c",
62@@ -169,10 +174,6 @@ c_kernel_sources = [
63   "kernel/zeros_like.c",
64 ]
65
66-# list of ${NNACL_DIR}/experimental/*.c
67-experimental_kernel_sources = [
68-]
69-
70 # list of ${NNACL_DIR}/base/*.c
71 base_kernel_sources = [
72   "base/arithmetic_base.c",
73@@ -221,7 +222,6 @@ fp32_kernel_sources = [
74   "fp32/conv_common_fp32.c",
75   "fp32/conv_depthwise_avx_fp32.c",
76   "fp32/conv_depthwise_fp32.c",
77-  "fp32/conv_im2col_avx512_fp32.c",
78   "fp32/conv_im2col_fp32.c",
79   "fp32/conv_sw_arm64_fp32.c",
80   "fp32/conv_sw_avx_fp32.c",
81@@ -246,8 +246,6 @@ fp32_kernel_sources = [
82   "fp32/local_response_norm_fp32.c",
83   "fp32/log_softmax_fp32.c",
84   "fp32/lstm_fp32.c",
85-  "fp32/matmul_avx512_fp32.c",
86-  "fp32/matmul_avx512_mask_fp32.c",
87   "fp32/matmul_avx_fp32.c",
88   "fp32/matmul_fp32.c",
89   "fp32/mul_fp32.c",
90@@ -784,6 +782,13 @@ sse_avx_avx512_sources = [
91   "assembly/avx/MatmulAvx.S",
92 ]
93
94+# only x86_64 real machine support avx512
95+if (target_cpu == "x86_64" && !is_emulator) {
96+  sse_avx_avx512_sources += [
97+    "assembly/avx512/ConvDwFp32RowAVX512.S",
98+  ]
99+}
100+
101 gemm_avx512_kernel_sources = [
102   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x16_kernel_nhwc_fp32.c",
103   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x32_kernel_nhwc_fp32.c",
104@@ -834,16 +839,64 @@ gemm_avx512_kernel_sources = [
105   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x32_kernel_nhwc_fp32.c",
106 ]
107
108+gemm_mask_avx512_kernel_sources = [
109+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x16_mask_kernel_nhwc_fp32.c",
110+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x32_mask_kernel_nhwc_fp32.c",
111+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x16_mask_kernel_nhwc_fp32.c",
112+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x32_mask_kernel_nhwc_fp32.c",
113+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x16_mask_kernel_nhwc_fp32.c",
114+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x32_mask_kernel_nhwc_fp32.c",
115+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x16_mask_kernel_nhwc_fp32.c",
116+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x32_mask_kernel_nhwc_fp32.c",
117+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x48_mask_kernel_nhwc_fp32.c",
118+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x64_mask_kernel_nhwc_fp32.c",
119+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x80_mask_kernel_nhwc_fp32.c",
120+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x96_mask_kernel_nhwc_fp32.c",
121+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x16_mask_kernel_nhwc_fp32.c",
122+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x32_mask_kernel_nhwc_fp32.c",
123+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x48_mask_kernel_nhwc_fp32.c",
124+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x64_mask_kernel_nhwc_fp32.c",
125+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x80_mask_kernel_nhwc_fp32.c",
126+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x96_mask_kernel_nhwc_fp32.c",
127+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x16_mask_kernel_nhwc_fp32.c",
128+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x32_mask_kernel_nhwc_fp32.c",
129+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x48_mask_kernel_nhwc_fp32.c",
130+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x64_mask_kernel_nhwc_fp32.c",
131+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x80_mask_kernel_nhwc_fp32.c",
132+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x96_mask_kernel_nhwc_fp32.c",
133+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x16_mask_kernel_nhwc_fp32.c",
134+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x32_mask_kernel_nhwc_fp32.c",
135+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x48_mask_kernel_nhwc_fp32.c",
136+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x64_mask_kernel_nhwc_fp32.c",
137+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x80_mask_kernel_nhwc_fp32.c",
138+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x96_mask_kernel_nhwc_fp32.c",
139+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x16_mask_kernel_nhwc_fp32.c",
140+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x32_mask_kernel_nhwc_fp32.c",
141+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x48_mask_kernel_nhwc_fp32.c",
142+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x64_mask_kernel_nhwc_fp32.c",
143+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x80_mask_kernel_nhwc_fp32.c",
144+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x16_mask_kernel_nhwc_fp32.c",
145+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x32_mask_kernel_nhwc_fp32.c",
146+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x48_mask_kernel_nhwc_fp32.c",
147+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x64_mask_kernel_nhwc_fp32.c",
148+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x16_mask_kernel_nhwc_fp32.c",
149+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x32_mask_kernel_nhwc_fp32.c",
150+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x48_mask_kernel_nhwc_fp32.c",
151+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x16_mask_kernel_nhwc_fp32.c",
152+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x32_mask_kernel_nhwc_fp32.c",
153+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x48_mask_kernel_nhwc_fp32.c",
154+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x16_mask_kernel_nhwc_fp32.c",
155+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x32_mask_kernel_nhwc_fp32.c",
156+]
157+
158 fp32_kernel_sources -= no_fast_math_fp32_kernel_sources
159 fp32_kernel_sources -= avx_fp32_kernel_sources
160-fp32_kernel_sources -= avx512_fp32_kernel_sources
161 fp32_kernel_sources -= arm64_fp32_kernel_sources
162
163 # source files on all target
164 nnacl_sources = common_sources
165 nnacl_sources += base_kernel_sources
166 nnacl_sources += c_kernel_sources
167-nnacl_sources += experimental_kernel_sources
168 nnacl_sources += fp32_kernel_sources
169 nnacl_sources += fp32_sparse_kernel_sources
170 nnacl_sources += fp32_grad_kernel_sources
171@@ -854,7 +907,6 @@ nnacl_sources += infer_control_sources
172
173 # source files on arm32
174 arm_only_sources = arm32_assembly_sources
175-#arm_only_sources += arm32_fp16_assembly_sources
176 not_needed(arm32_fp16_assembly_sources)
177
178 # source files on arm64
179@@ -868,8 +920,16 @@ arm64_only_sources += arm64_fp32_kernel_sources
180 # sources files on x86_64
181 x86_64_only_sources = sse_avx_avx512_sources
182 x86_64_only_sources += avx_fp32_kernel_sources
183-x86_64_only_sources += avx512_fp32_kernel_sources
184-x86_64_only_sources += gemm_avx512_kernel_sources
185+# emulator not support avx512
186+if (is_emulator) {
187+  not_needed(avx512_fp32_kernel_sources)
188+  not_needed(gemm_avx512_kernel_sources)
189+  not_needed(gemm_mask_avx512_kernel_sources)
190+} else {
191+  x86_64_only_sources += avx512_fp32_kernel_sources
192+  x86_64_only_sources += gemm_avx512_kernel_sources
193+  x86_64_only_sources += gemm_mask_avx512_kernel_sources
194+}
195
196 if (target_cpu == "arm") {
197   nnacl_sources += arm_only_sources
198diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
199index 467cdb6a..124c84c9 100644
200--- a/mindspore/lite/BUILD.gn
201+++ b/mindspore/lite/BUILD.gn
202@@ -118,12 +118,6 @@ control_flow_kernel_sources = [
203   "src/control_flow/kernel/identity_kernel.cc",
204 ]
205
206-experimental_sources = [
207-]
208-
209-string_kernel_source = [
210-]
211-
212 auto_parallel_source = [
213   "src/litert/sub_graph_split.cc"
214 ]
215@@ -186,19 +180,11 @@ lite_mindrt_sources = [
216 all_lite_sources += cxx_api_sources
217 all_lite_sources += api_source
218 all_lite_sources += control_flow_kernel_sources
219-all_lite_sources += experimental_sources
220-all_lite_sources += string_kernel_source
221 all_lite_sources += auto_parallel_source
222 all_lite_sources += custom_registry_sources
223 all_lite_sources += weight_decode_source
224 all_lite_sources += lite_mindrt_sources
225
226-ops_base_sources = [
227-#  "src/common/ops/anf_utils.cc", # disable runtiem convert
228-#  "src/common/ops/ops_def.cc", # disable kernel executor
229-#  "src/common/ops/ops_utils.cc" # disable kernel executor
230-]
231-
232 basic_populate_sources = [
233   "src/common/ops/populate/activation_grad_populate.cc",
234   "src/common/ops/populate/activation_populate.cc",
235@@ -346,8 +332,7 @@ control_populate_sources = [
236   "src/common/ops/populate/control/tensorliststack_populate.cc",
237 ]
238
239-all_ops_sources = ops_base_sources
240-all_ops_sources += basic_populate_sources
241+all_ops_sources = basic_populate_sources
242 all_ops_sources += string_populate_sources
243 all_ops_sources += control_populate_sources
244
245@@ -360,6 +345,12 @@ missing_sources = [
246
247 all_sources += missing_sources
248
249+SUPPORT_NNRT = false
250+# currently, only arm/arm64 real machine support nnrt
251+if ((target_cpu == "arm" || target_cpu == "arm64") && !is_emulator) {
252+  SUPPORT_NNRT = true
253+}
254+
255 ohos_shared_library("mindspore_lib") {
256   deps = [
257     "../ccsrc/plugin/device/cpu/kernel/nnacl/:nnacl_obj",
258@@ -387,7 +378,6 @@ ohos_shared_library("mindspore_lib") {
259     "../ccsrc/",
260     "src/litert/kernel/cpu/",
261     "../core/mindrt/src/",
262-    "//foundation/ai/neural_network_runtime/",
263   ]
264
265   defines = [
266@@ -418,6 +408,17 @@ ohos_shared_library("mindspore_lib") {
267       "CL_HPP_TARGET_OPENCL_VERSION=120",
268       "CL_HPP_MINIMUM_OPENCL_VERSION=120",
269     ]
270+  } else if (target_cpu == "x86_64") {
271+    defines += [
272+      "ENABLE_SSE",
273+      "ENABLE_AVX",
274+    ]
275+    # emulator not support avx512
276+    if (!is_emulator) {
277+      defines += [
278+        "ENABLE_AVX512",
279+      ]
280+    }
281   }
282
283   configs = [
284@@ -434,10 +435,10 @@ ohos_shared_library("mindspore_lib") {
285   output_name = "libmindspore-lite"
286   output_extension = "so"
287   innerapi_tags = [ "platformsdk" ]
288-  SUPPORT_NNRT = true
289   if (SUPPORT_NNRT) {
290     if (mindspore_feature_nnrt_metagraph) {
291       defines += [ "SUPPORT_NNRT_METAGRAPH" ]
292+      sources += [ "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", ]
293       print("enabled feature: mindspore_feature_nnrt_metagraph")
294     }
295     sources += [
296@@ -445,7 +446,6 @@ ohos_shared_library("mindspore_lib") {
297       "src/litert/delegate/nnrt/nnrt_delegate.cc",
298       "src/litert/delegate/nnrt/nnrt_model_kernel.cc",
299       "src/litert/delegate/nnrt/nnrt_allocator.cc",
300-      "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc",
301       "src/litert/delegate/nnrt/extension_options_parser.cc",
302     ]
303     include_dirs += [
304@@ -453,6 +453,7 @@ ohos_shared_library("mindspore_lib") {
305       "../../mindspore/core/ir",
306       "mindir/include",
307       "mindir/inner_headers",
308+      "//foundation/ai/neural_network_runtime/",
309     ]
310
311     external_deps += [ "neural_network_runtime:nnrt_target" ]
312@@ -499,11 +500,9 @@ ohos_shared_library("mindspore_ndk") {
313     "../../third_party/",
314     "./schema/",
315     "../ccsrc/",
316-    "//foundation/ai/neural_network_runtime/",
317   ]
318
319   defines = [
320-    "SUPPORT_NNRT",
321     "MS_COMPILE_OHOS",
322     "PRIMITIVE_WRITEABLE",
323     "RUNTIME_PASS_CLIP",
324@@ -512,9 +511,18 @@ ohos_shared_library("mindspore_ndk") {
325     "ENABLE_HI_APP_EVENT",
326   ]
327
328-  if (mindspore_feature_nnrt_metagraph) {
329-    defines += [ "SUPPORT_NNRT_METAGRAPH" ]
330-    print("enabled feature: mindspore_feature_nnrt_metagraph")
331+  if (SUPPORT_NNRT) {
332+    include_dirs += [
333+      "//foundation/ai/neural_network_runtime/",
334+    ]
335+    defines += [
336+      "SUPPORT_NNRT",
337+    ]
338+    if (mindspore_feature_nnrt_metagraph) {
339+      defines += [ "SUPPORT_NNRT_METAGRAPH" ]
340+      print("enabled feature: mindspore_feature_nnrt_metagraph")
341+    }
342+    external_deps = [ "neural_network_runtime:nnrt_target" ]
343   }
344
345   configs = [
346@@ -523,8 +531,6 @@ ohos_shared_library("mindspore_ndk") {
347     ":secure_option",
348   ]
349
350-  external_deps = [ "neural_network_runtime:nnrt_target" ]
351-
352   remove_configs = [ "//build/config/compiler:no_rtti" ]
353
354   output_name = "libmindspore_lite_ndk"
355@@ -749,4 +755,4 @@ config("secure_option") {
356
357 config("train_kernel_option") {
358   cflags_cc = [ "-fno-finite-math-only" ]
359-}
360+}
361\ No newline at end of file
362diff --git a/mindspore/lite/src/common/thread_utils.cc b/mindspore/lite/src/common/thread_utils.cc
363index 28c8e1cd..28c7acab 100644
364--- a/mindspore/lite/src/common/thread_utils.cc
365+++ b/mindspore/lite/src/common/thread_utils.cc
366@@ -17,7 +17,7 @@
367 #if defined(__linux__) && !defined(ENABLE_ARM)
368 #include "src/common/thread_utils.h"
369 #include <sys/stat.h>
370-#include <wait.h>
371+#include <sys/wait.h>
372 #include "src/common/log_adapter.h"
373
374 namespace mindspore {
375diff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
376index 297fc6f6..d51b9f4a 100644
377--- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
378+++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
379@@ -52,7 +52,6 @@ cpu_kernel_sources = [
380     "fp32/convolution_fp32.cc",
381     "fp32/convolution_im2col_arm32_fp32.cc",
382     "fp32/convolution_im2col_arm64_fp32.cc",
383-    "fp32/convolution_im2col_avx512_fp32.cc",
384     "fp32/convolution_im2col_avx_fp32.cc",
385     "fp32/convolution_im2col_base_fp32.cc",
386     "fp32/convolution_im2col_fp32.cc",
387@@ -90,7 +89,6 @@ cpu_kernel_sources = [
388     "fp32/lstm_non_mindir_fp32.cc",
389     "fp32/matmul_fp32_arm32.cc",
390     "fp32/matmul_fp32_arm64.cc",
391-    "fp32/matmul_fp32_avx512.cc",
392     "fp32/matmul_fp32_avx.cc",
393     "fp32/matmul_fp32_base.cc",
394     "fp32/matmul_fp32.cc",
395@@ -125,7 +123,7 @@ cpu_kernel_sources = [
396 ]
397
398 if ((target_cpu != "arm") && (target_cpu != "arm64")) {
399-    cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ]
400+    cpu_kernel_sources += [ "fp32/cast_for_x86_fp16.cc" ]
401 }
402
403 arm64_cpu_kernel_sources = [
404@@ -148,8 +146,6 @@ sse_avx_avx512_kernel_sources = [
405   "fp32/convolution_im2col_avx_fp32.cc",
406   "fp32/matmul_fp32_avx.cc",
407   "fp32/convolution_winograd_avx_fp32.cc",
408-  "fp32/convolution_im2col_avx512_fp32.cc",
409-  "fp32/matmul_fp32_avx512.cc",
410 ]
411
412 fp16_kernel_sources = [
413@@ -272,6 +268,18 @@ control_kernel_sources = [
414     "control/tensorlist_stack.cc",
415 ]
416
417+# emulator not support avx512
418+if (!is_emulator) {
419+  cpu_kernel_sources += [
420+    "fp32/convolution_im2col_avx512_fp32.cc",
421+    "fp32/matmul_fp32_avx512.cc",
422+  ]
423+  sse_avx_avx512_kernel_sources += [
424+    "fp32/convolution_im2col_avx512_fp32.cc",
425+    "fp32/matmul_fp32_avx512.cc",
426+  ]
427+}
428+
429 all_cpu_kernel_sources = cpu_kernel_sources
430 all_cpu_kernel_sources += int8_kernel_sources
431 all_cpu_kernel_sources += string_kernel_sources
432@@ -348,6 +356,16 @@ ohos_source_set("cpu_kernel_obj") {
433       "CL_HPP_TARGET_OPENCL_VERSION=120",
434       "CL_HPP_MINIMUM_OPENCL_VERSION=120",
435     ]
436+  } else if (target_cpu == "x86_64") {
437+    defines += [
438+      "ENABLE_SSE",
439+      "ENABLE_AVX",
440+    ]
441+    if (!is_emulator) {
442+      defines += [
443+        "ENABLE_AVX512",
444+      ]
445+    }
446   }
447
448   cflags_cc = [
449diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
450index f907bbbf..ac693c44 100644
451--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
452+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
453@@ -49,7 +49,9 @@ using mindspore::schema::PrimitiveType_Conv2DFusion;
454
455 namespace mindspore::kernel {
456 namespace {
457+#ifndef ENABLE_AVX
458 constexpr int kMaxDwConvSWSize = 32;
459+#endif
460 }  // namespace
461
462 float *ConvolutionDelegateCPUKernel::CopyData(const lite::Tensor *tensor) {
463diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
464index 568b9463..d35669ce 100644
465--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
466+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
467@@ -106,7 +106,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() {
468   return RET_OK;
469 }
470
471-int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
472+int ConvolutionDepthwiseSWCPUKernelX86::DoExecute(int task_id) {
473   DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
474                      reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
475   return RET_OK;
476@@ -114,7 +114,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
477
478 int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
479   auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata);
480-  auto ret = conv_dw->Execute(task_id);
481+  auto ret = conv_dw->DoExecute(task_id);
482   if (ret != RET_OK) {
483     MS_LOG(ERROR) << "ConvolutionDepthwiseSWRun in x86 error task_id[" << task_id << "] error_code[" << ret << "]";
484     return RET_ERROR;
485diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
486index e959fe45..928321e5 100644
487--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
488+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
489@@ -35,7 +35,7 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel {
490   int ReSize() override;
491   int Run() override;
492
493-  int Execute(int task_id);
494+  int DoExecute(int task_id);
495
496  private:
497   void FreePackedInputOutput();
498--
4992.25.1
500
501