1From a80a695b3f6ce3118cc01c31359762cfac35f02d Mon Sep 17 00:00:00 2001 2From: zhangyanhui <zhangyanhui17@huawei.com> 3Date: Thu, 13 Jun 2024 14:23:56 +0800 4Subject: [PATCH] 0023-support-x86-emulator-build 5 6--- 7 .../plugin/device/cpu/kernel/nnacl/BUILD.gn | 92 +++++++++++++++---- 8 mindspore/lite/BUILD.gn | 60 ++++++------ 9 mindspore/lite/src/common/thread_utils.cc | 2 +- 10 mindspore/lite/src/litert/kernel/cpu/BUILD.gn | 28 +++++- 11 .../cpu/fp32/convolution_delegate_fp32.cc | 2 + 12 ...volution_depthwise_slidewindow_x86_fp32.cc | 4 +- 13 ...nvolution_depthwise_slidewindow_x86_fp32.h | 2 +- 14 7 files changed, 138 insertions(+), 52 deletions(-) 15 16diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 17index d27817be..387a675a 100644 18--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 19+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 20@@ -46,7 +46,6 @@ config("nnacl_public_config") { 21 } 22 } else if (target_cpu == "x86_64") { 23 cflags_c += [ 24- "-mavx512f", 25 "-mavx", 26 "-mavx2", 27 "-mfma", 28@@ -56,8 +55,16 @@ config("nnacl_public_config") { 29 defines += [ 30 "ENABLE_SSE", 31 "ENABLE_AVX", 32- "ENABLE_AVX512", 33 ] 34+ # emulator not support avx512 35+ if (!is_emulator) { 36+ cflags_c += [ 37+ "-mavx512f", 38+ ] 39+ defines += [ 40+ "ENABLE_AVX512", 41+ ] 42+ } 43 } 44 } 45 46@@ -102,7 +109,6 @@ c_kernel_sources = [ 47 "kernel/convolution_depthwise_sw.c", 48 "kernel/convolution_im2col_arm32.c", 49 "kernel/convolution_im2col_arm64.c", 50- "kernel/convolution_im2col_avx512.c", 51 "kernel/convolution_im2col_avx.c", 52 "kernel/convolution_im2col_base.c", 53 "kernel/convolution_im2col.c", 54@@ -136,7 +142,6 @@ c_kernel_sources = [ 55 "kernel/log_softmax.c", 56 "kernel/matmul_arm32.c", 57 "kernel/matmul_arm64.c", 58- "kernel/matmul_avx512.c", 59 "kernel/matmul_avx.c", 60 "kernel/matmul_base.c", 61 "kernel/matmul.c", 62@@ -169,10 +174,6 @@ c_kernel_sources = [ 63 "kernel/zeros_like.c", 64 ] 65 66-# list of ${NNACL_DIR}/experimental/*.c 67-experimental_kernel_sources = [ 68-] 69- 70 # list of ${NNACL_DIR}/base/*.c 71 base_kernel_sources = [ 72 "base/arithmetic_base.c", 73@@ -221,7 +222,6 @@ fp32_kernel_sources = [ 74 "fp32/conv_common_fp32.c", 75 "fp32/conv_depthwise_avx_fp32.c", 76 "fp32/conv_depthwise_fp32.c", 77- "fp32/conv_im2col_avx512_fp32.c", 78 "fp32/conv_im2col_fp32.c", 79 "fp32/conv_sw_arm64_fp32.c", 80 "fp32/conv_sw_avx_fp32.c", 81@@ -246,8 +246,6 @@ fp32_kernel_sources = [ 82 "fp32/local_response_norm_fp32.c", 83 "fp32/log_softmax_fp32.c", 84 "fp32/lstm_fp32.c", 85- "fp32/matmul_avx512_fp32.c", 86- "fp32/matmul_avx512_mask_fp32.c", 87 "fp32/matmul_avx_fp32.c", 88 "fp32/matmul_fp32.c", 89 "fp32/mul_fp32.c", 90@@ -784,6 +782,13 @@ sse_avx_avx512_sources = [ 91 "assembly/avx/MatmulAvx.S", 92 ] 93 94+# only x86_64 real machine support avx512 95+if (target_cpu == "x86_64" && !is_emulator) { 96+ sse_avx_avx512_sources += [ 97+ "assembly/avx512/ConvDwFp32RowAVX512.S", 98+ ] 99+} 100+ 101 gemm_avx512_kernel_sources = [ 102 "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x16_kernel_nhwc_fp32.c", 103 "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x32_kernel_nhwc_fp32.c", 104@@ -834,16 +839,64 @@ gemm_avx512_kernel_sources = [ 105 "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x32_kernel_nhwc_fp32.c", 106 ] 107 108+gemm_mask_avx512_kernel_sources = [ 109+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x16_mask_kernel_nhwc_fp32.c", 110+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x32_mask_kernel_nhwc_fp32.c", 111+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x16_mask_kernel_nhwc_fp32.c", 112+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x32_mask_kernel_nhwc_fp32.c", 113+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x16_mask_kernel_nhwc_fp32.c", 114+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x32_mask_kernel_nhwc_fp32.c", 115+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x16_mask_kernel_nhwc_fp32.c", 116+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x32_mask_kernel_nhwc_fp32.c", 117+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x48_mask_kernel_nhwc_fp32.c", 118+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x64_mask_kernel_nhwc_fp32.c", 119+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x80_mask_kernel_nhwc_fp32.c", 120+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x96_mask_kernel_nhwc_fp32.c", 121+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x16_mask_kernel_nhwc_fp32.c", 122+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x32_mask_kernel_nhwc_fp32.c", 123+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x48_mask_kernel_nhwc_fp32.c", 124+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x64_mask_kernel_nhwc_fp32.c", 125+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x80_mask_kernel_nhwc_fp32.c", 126+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x96_mask_kernel_nhwc_fp32.c", 127+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x16_mask_kernel_nhwc_fp32.c", 128+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x32_mask_kernel_nhwc_fp32.c", 129+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x48_mask_kernel_nhwc_fp32.c", 130+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x64_mask_kernel_nhwc_fp32.c", 131+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x80_mask_kernel_nhwc_fp32.c", 132+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x96_mask_kernel_nhwc_fp32.c", 133+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x16_mask_kernel_nhwc_fp32.c", 134+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x32_mask_kernel_nhwc_fp32.c", 135+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x48_mask_kernel_nhwc_fp32.c", 136+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x64_mask_kernel_nhwc_fp32.c", 137+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x80_mask_kernel_nhwc_fp32.c", 138+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x96_mask_kernel_nhwc_fp32.c", 139+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x16_mask_kernel_nhwc_fp32.c", 140+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x32_mask_kernel_nhwc_fp32.c", 141+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x48_mask_kernel_nhwc_fp32.c", 142+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x64_mask_kernel_nhwc_fp32.c", 143+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x80_mask_kernel_nhwc_fp32.c", 144+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x16_mask_kernel_nhwc_fp32.c", 145+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x32_mask_kernel_nhwc_fp32.c", 146+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x48_mask_kernel_nhwc_fp32.c", 147+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x64_mask_kernel_nhwc_fp32.c", 148+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x16_mask_kernel_nhwc_fp32.c", 149+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x32_mask_kernel_nhwc_fp32.c", 150+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x48_mask_kernel_nhwc_fp32.c", 151+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x16_mask_kernel_nhwc_fp32.c", 152+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x32_mask_kernel_nhwc_fp32.c", 153+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x48_mask_kernel_nhwc_fp32.c", 154+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x16_mask_kernel_nhwc_fp32.c", 155+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x32_mask_kernel_nhwc_fp32.c", 156+] 157+ 158 fp32_kernel_sources -= no_fast_math_fp32_kernel_sources 159 fp32_kernel_sources -= avx_fp32_kernel_sources 160-fp32_kernel_sources -= avx512_fp32_kernel_sources 161 fp32_kernel_sources -= arm64_fp32_kernel_sources 162 163 # source files on all target 164 nnacl_sources = common_sources 165 nnacl_sources += base_kernel_sources 166 nnacl_sources += c_kernel_sources 167-nnacl_sources += experimental_kernel_sources 168 nnacl_sources += fp32_kernel_sources 169 nnacl_sources += fp32_sparse_kernel_sources 170 nnacl_sources += fp32_grad_kernel_sources 171@@ -854,7 +907,6 @@ nnacl_sources += infer_control_sources 172 173 # source files on arm32 174 arm_only_sources = arm32_assembly_sources 175-#arm_only_sources += arm32_fp16_assembly_sources 176 not_needed(arm32_fp16_assembly_sources) 177 178 # source files on arm64 179@@ -868,8 +920,16 @@ arm64_only_sources += arm64_fp32_kernel_sources 180 # sources files on x86_64 181 x86_64_only_sources = sse_avx_avx512_sources 182 x86_64_only_sources += avx_fp32_kernel_sources 183-x86_64_only_sources += avx512_fp32_kernel_sources 184-x86_64_only_sources += gemm_avx512_kernel_sources 185+# emulator not support avx512 186+if (is_emulator) { 187+ not_needed(avx512_fp32_kernel_sources) 188+ not_needed(gemm_avx512_kernel_sources) 189+ not_needed(gemm_mask_avx512_kernel_sources) 190+} else { 191+ x86_64_only_sources += avx512_fp32_kernel_sources 192+ x86_64_only_sources += gemm_avx512_kernel_sources 193+ x86_64_only_sources += gemm_mask_avx512_kernel_sources 194+} 195 196 if (target_cpu == "arm") { 197 nnacl_sources += arm_only_sources 198diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn 199index 467cdb6a..124c84c9 100644 200--- a/mindspore/lite/BUILD.gn 201+++ b/mindspore/lite/BUILD.gn 202@@ -118,12 +118,6 @@ control_flow_kernel_sources = [ 203 "src/control_flow/kernel/identity_kernel.cc", 204 ] 205 206-experimental_sources = [ 207-] 208- 209-string_kernel_source = [ 210-] 211- 212 auto_parallel_source = [ 213 "src/litert/sub_graph_split.cc" 214 ] 215@@ -186,19 +180,11 @@ lite_mindrt_sources = [ 216 all_lite_sources += cxx_api_sources 217 all_lite_sources += api_source 218 all_lite_sources += control_flow_kernel_sources 219-all_lite_sources += experimental_sources 220-all_lite_sources += string_kernel_source 221 all_lite_sources += auto_parallel_source 222 all_lite_sources += custom_registry_sources 223 all_lite_sources += weight_decode_source 224 all_lite_sources += lite_mindrt_sources 225 226-ops_base_sources = [ 227-# "src/common/ops/anf_utils.cc", # disable runtiem convert 228-# "src/common/ops/ops_def.cc", # disable kernel executor 229-# "src/common/ops/ops_utils.cc" # disable kernel executor 230-] 231- 232 basic_populate_sources = [ 233 "src/common/ops/populate/activation_grad_populate.cc", 234 "src/common/ops/populate/activation_populate.cc", 235@@ -346,8 +332,7 @@ control_populate_sources = [ 236 "src/common/ops/populate/control/tensorliststack_populate.cc", 237 ] 238 239-all_ops_sources = ops_base_sources 240-all_ops_sources += basic_populate_sources 241+all_ops_sources = basic_populate_sources 242 all_ops_sources += string_populate_sources 243 all_ops_sources += control_populate_sources 244 245@@ -360,6 +345,12 @@ missing_sources = [ 246 247 all_sources += missing_sources 248 249+SUPPORT_NNRT = false 250+# currently, only arm/arm64 real machine support nnrt 251+if ((target_cpu == "arm" || target_cpu == "arm64") && !is_emulator) { 252+ SUPPORT_NNRT = true 253+} 254+ 255 ohos_shared_library("mindspore_lib") { 256 deps = [ 257 "../ccsrc/plugin/device/cpu/kernel/nnacl/:nnacl_obj", 258@@ -387,7 +378,6 @@ ohos_shared_library("mindspore_lib") { 259 "../ccsrc/", 260 "src/litert/kernel/cpu/", 261 "../core/mindrt/src/", 262- "//foundation/ai/neural_network_runtime/", 263 ] 264 265 defines = [ 266@@ -418,6 +408,17 @@ ohos_shared_library("mindspore_lib") { 267 "CL_HPP_TARGET_OPENCL_VERSION=120", 268 "CL_HPP_MINIMUM_OPENCL_VERSION=120", 269 ] 270+ } else if (target_cpu == "x86_64") { 271+ defines += [ 272+ "ENABLE_SSE", 273+ "ENABLE_AVX", 274+ ] 275+ # emulator not support avx512 276+ if (!is_emulator) { 277+ defines += [ 278+ "ENABLE_AVX512", 279+ ] 280+ } 281 } 282 283 configs = [ 284@@ -434,10 +435,10 @@ ohos_shared_library("mindspore_lib") { 285 output_name = "libmindspore-lite" 286 output_extension = "so" 287 innerapi_tags = [ "platformsdk" ] 288- SUPPORT_NNRT = true 289 if (SUPPORT_NNRT) { 290 if (mindspore_feature_nnrt_metagraph) { 291 defines += [ "SUPPORT_NNRT_METAGRAPH" ] 292+ sources += [ "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", ] 293 print("enabled feature: mindspore_feature_nnrt_metagraph") 294 } 295 sources += [ 296@@ -445,7 +446,6 @@ ohos_shared_library("mindspore_lib") { 297 "src/litert/delegate/nnrt/nnrt_delegate.cc", 298 "src/litert/delegate/nnrt/nnrt_model_kernel.cc", 299 "src/litert/delegate/nnrt/nnrt_allocator.cc", 300- "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", 301 "src/litert/delegate/nnrt/extension_options_parser.cc", 302 ] 303 include_dirs += [ 304@@ -453,6 +453,7 @@ ohos_shared_library("mindspore_lib") { 305 "../../mindspore/core/ir", 306 "mindir/include", 307 "mindir/inner_headers", 308+ "//foundation/ai/neural_network_runtime/", 309 ] 310 311 external_deps += [ "neural_network_runtime:nnrt_target" ] 312@@ -499,11 +500,9 @@ ohos_shared_library("mindspore_ndk") { 313 "../../third_party/", 314 "./schema/", 315 "../ccsrc/", 316- "//foundation/ai/neural_network_runtime/", 317 ] 318 319 defines = [ 320- "SUPPORT_NNRT", 321 "MS_COMPILE_OHOS", 322 "PRIMITIVE_WRITEABLE", 323 "RUNTIME_PASS_CLIP", 324@@ -512,9 +511,18 @@ ohos_shared_library("mindspore_ndk") { 325 "ENABLE_HI_APP_EVENT", 326 ] 327 328- if (mindspore_feature_nnrt_metagraph) { 329- defines += [ "SUPPORT_NNRT_METAGRAPH" ] 330- print("enabled feature: mindspore_feature_nnrt_metagraph") 331+ if (SUPPORT_NNRT) { 332+ include_dirs += [ 333+ "//foundation/ai/neural_network_runtime/", 334+ ] 335+ defines += [ 336+ "SUPPORT_NNRT", 337+ ] 338+ if (mindspore_feature_nnrt_metagraph) { 339+ defines += [ "SUPPORT_NNRT_METAGRAPH" ] 340+ print("enabled feature: mindspore_feature_nnrt_metagraph") 341+ } 342+ external_deps = [ "neural_network_runtime:nnrt_target" ] 343 } 344 345 configs = [ 346@@ -523,8 +531,6 @@ ohos_shared_library("mindspore_ndk") { 347 ":secure_option", 348 ] 349 350- external_deps = [ "neural_network_runtime:nnrt_target" ] 351- 352 remove_configs = [ "//build/config/compiler:no_rtti" ] 353 354 output_name = "libmindspore_lite_ndk" 355@@ -749,4 +755,4 @@ config("secure_option") { 356 357 config("train_kernel_option") { 358 cflags_cc = [ "-fno-finite-math-only" ] 359-} 360+} 361\ No newline at end of file 362diff --git a/mindspore/lite/src/common/thread_utils.cc b/mindspore/lite/src/common/thread_utils.cc 363index 28c8e1cd..28c7acab 100644 364--- a/mindspore/lite/src/common/thread_utils.cc 365+++ b/mindspore/lite/src/common/thread_utils.cc 366@@ -17,7 +17,7 @@ 367 #if defined(__linux__) && !defined(ENABLE_ARM) 368 #include "src/common/thread_utils.h" 369 #include <sys/stat.h> 370-#include <wait.h> 371+#include <sys/wait.h> 372 #include "src/common/log_adapter.h" 373 374 namespace mindspore { 375diff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 376index 297fc6f6..d51b9f4a 100644 377--- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 378+++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 379@@ -52,7 +52,6 @@ cpu_kernel_sources = [ 380 "fp32/convolution_fp32.cc", 381 "fp32/convolution_im2col_arm32_fp32.cc", 382 "fp32/convolution_im2col_arm64_fp32.cc", 383- "fp32/convolution_im2col_avx512_fp32.cc", 384 "fp32/convolution_im2col_avx_fp32.cc", 385 "fp32/convolution_im2col_base_fp32.cc", 386 "fp32/convolution_im2col_fp32.cc", 387@@ -90,7 +89,6 @@ cpu_kernel_sources = [ 388 "fp32/lstm_non_mindir_fp32.cc", 389 "fp32/matmul_fp32_arm32.cc", 390 "fp32/matmul_fp32_arm64.cc", 391- "fp32/matmul_fp32_avx512.cc", 392 "fp32/matmul_fp32_avx.cc", 393 "fp32/matmul_fp32_base.cc", 394 "fp32/matmul_fp32.cc", 395@@ -125,7 +123,7 @@ cpu_kernel_sources = [ 396 ] 397 398 if ((target_cpu != "arm") && (target_cpu != "arm64")) { 399- cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ] 400+ cpu_kernel_sources += [ "fp32/cast_for_x86_fp16.cc" ] 401 } 402 403 arm64_cpu_kernel_sources = [ 404@@ -148,8 +146,6 @@ sse_avx_avx512_kernel_sources = [ 405 "fp32/convolution_im2col_avx_fp32.cc", 406 "fp32/matmul_fp32_avx.cc", 407 "fp32/convolution_winograd_avx_fp32.cc", 408- "fp32/convolution_im2col_avx512_fp32.cc", 409- "fp32/matmul_fp32_avx512.cc", 410 ] 411 412 fp16_kernel_sources = [ 413@@ -272,6 +268,18 @@ control_kernel_sources = [ 414 "control/tensorlist_stack.cc", 415 ] 416 417+# emulator not support avx512 418+if (!is_emulator) { 419+ cpu_kernel_sources += [ 420+ "fp32/convolution_im2col_avx512_fp32.cc", 421+ "fp32/matmul_fp32_avx512.cc", 422+ ] 423+ sse_avx_avx512_kernel_sources += [ 424+ "fp32/convolution_im2col_avx512_fp32.cc", 425+ "fp32/matmul_fp32_avx512.cc", 426+ ] 427+} 428+ 429 all_cpu_kernel_sources = cpu_kernel_sources 430 all_cpu_kernel_sources += int8_kernel_sources 431 all_cpu_kernel_sources += string_kernel_sources 432@@ -348,6 +356,16 @@ ohos_source_set("cpu_kernel_obj") { 433 "CL_HPP_TARGET_OPENCL_VERSION=120", 434 "CL_HPP_MINIMUM_OPENCL_VERSION=120", 435 ] 436+ } else if (target_cpu == "x86_64") { 437+ defines += [ 438+ "ENABLE_SSE", 439+ "ENABLE_AVX", 440+ ] 441+ if (!is_emulator) { 442+ defines += [ 443+ "ENABLE_AVX512", 444+ ] 445+ } 446 } 447 448 cflags_cc = [ 449diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 450index f907bbbf..ac693c44 100644 451--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 452+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 453@@ -49,7 +49,9 @@ using mindspore::schema::PrimitiveType_Conv2DFusion; 454 455 namespace mindspore::kernel { 456 namespace { 457+#ifndef ENABLE_AVX 458 constexpr int kMaxDwConvSWSize = 32; 459+#endif 460 } // namespace 461 462 float *ConvolutionDelegateCPUKernel::CopyData(const lite::Tensor *tensor) { 463diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 464index 568b9463..d35669ce 100644 465--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 466+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 467@@ -106,7 +106,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() { 468 return RET_OK; 469 } 470 471-int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) { 472+int ConvolutionDepthwiseSWCPUKernelX86::DoExecute(int task_id) { 473 DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_), 474 reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id); 475 return RET_OK; 476@@ -114,7 +114,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) { 477 478 int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { 479 auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata); 480- auto ret = conv_dw->Execute(task_id); 481+ auto ret = conv_dw->DoExecute(task_id); 482 if (ret != RET_OK) { 483 MS_LOG(ERROR) << "ConvolutionDepthwiseSWRun in x86 error task_id[" << task_id << "] error_code[" << ret << "]"; 484 return RET_ERROR; 485diff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 486index e959fe45..928321e5 100644 487--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 488+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 489@@ -35,7 +35,7 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel { 490 int ReSize() override; 491 int Run() override; 492 493- int Execute(int task_id); 494+ int DoExecute(int task_id); 495 496 private: 497 void FreePackedInputOutput(); 498-- 4992.25.1 500 501