1From b79a91593152ec4a57321c9b35646ef847be66ce Mon Sep 17 00:00:00 2001 2From: Zhu Guodong <zhuguodong0001@163.com> 3Date: Sat, 8 Jul 2023 16:36:20 +0800 4Subject: [PATCH] auto-apply 0033-support-fp16-for-arm64-arch.patch 5 6--- 7 mindspore/lite/BUILD.gn | 117 +++++++++++++++++++++++++++++++++++++++- 8 1 file changed, 116 insertions(+), 1 deletion(-) 9 10diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn 11index 48b0461d..cf0a74ab 100644 12--- a/mindspore/lite/BUILD.gn 13+++ b/mindspore/lite/BUILD.gn 14@@ -70,6 +70,7 @@ ohos_group("mindspore") { 15 ":mindspore_lib", 16 ":mindspore_train_lib", 17 "mindir:mindir_lib", 18+ "tools/benchmark:benchmark_bin", 19 "src/runtime/js_api:mindsporelite_napi" 20 ] 21 } 22@@ -238,7 +239,6 @@ cpu_kernel_sources = [ 23 "src/runtime/kernel/cpu/fp32/batch_to_space_fp32.cc", 24 "src/runtime/kernel/cpu/fp32/bias_fp32.cc", 25 "src/runtime/kernel/cpu/fp32/broadcast_to_fp32.cc", 26- "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc", 27 "src/runtime/kernel/cpu/fp32/cast_fp32.cc", 28 "src/runtime/kernel/cpu/fp32/convolution_1x1_fp32.cc", 29 "src/runtime/kernel/cpu/fp32/convolution_delegate_fp32.cc", 30@@ -324,6 +324,114 @@ cpu_kernel_sources = [ 31 "src/runtime/kernel/cpu/fp32/custom_gru_fp32.cc", 32 ] 33 34+if ((target_cpu != "arm") && (target_cpu != "arm64")) { 35+ cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ] 36+} 37+ 38+fp16_kernel_sources = [ 39+ "src/runtime/kernel/cpu/fp16/activation_fp16.cc", 40+ "src/runtime/kernel/cpu/fp16/activation_fp16.h", 41+ "src/runtime/kernel/cpu/fp16/addn_fp16.cc", 42+ "src/runtime/kernel/cpu/fp16/addn_fp16.h", 43+ "src/runtime/kernel/cpu/fp16/arithmetic_compare_fp16.cc", 44+ "src/runtime/kernel/cpu/fp16/arithmetic_compare_fp16.h", 45+ "src/runtime/kernel/cpu/fp16/arithmetic_fp16.cc", 46+ "src/runtime/kernel/cpu/fp16/arithmetic_fp16.h", 47+ "src/runtime/kernel/cpu/fp16/arithmetic_self_fp16.cc", 48+ "src/runtime/kernel/cpu/fp16/arithmetic_self_fp16.h", 49+ "src/runtime/kernel/cpu/fp16/batchnorm_fp16.cc", 50+ "src/runtime/kernel/cpu/fp16/batchnorm_fp16.h", 51+ "src/runtime/kernel/cpu/fp16/biasadd_fp16.cc", 52+ "src/runtime/kernel/cpu/fp16/biasadd_fp16.h", 53+ "src/runtime/kernel/cpu/fp16/cast_fp16.cc", 54+ "src/runtime/kernel/cpu/fp16/cast_fp16.h", 55+ "src/runtime/kernel/cpu/fp16/common_fp16.cc", 56+ "src/runtime/kernel/cpu/fp16/common_fp16.h", 57+ "src/runtime/kernel/cpu/fp16/concat_fp16.cc", 58+ "src/runtime/kernel/cpu/fp16/concat_fp16.h", 59+ "src/runtime/kernel/cpu/fp16/convolution_1x1_fp16.cc", 60+ "src/runtime/kernel/cpu/fp16/convolution_1x1_fp16.h", 61+ "src/runtime/kernel/cpu/fp16/convolution_delegate_fp16.cc", 62+ "src/runtime/kernel/cpu/fp16/convolution_delegate_fp16.h", 63+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_3x3_fp16.cc", 64+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_3x3_fp16.h", 65+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_fp16.cc", 66+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_fp16.h", 67+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_slidewindow_fp16.cc", 68+ "src/runtime/kernel/cpu/fp16/convolution_depthwise_slidewindow_fp16.h", 69+ "src/runtime/kernel/cpu/fp16/convolution_fp16.cc", 70+ "src/runtime/kernel/cpu/fp16/convolution_fp16.h", 71+ "src/runtime/kernel/cpu/fp16/convolution_winograd_fp16.cc", 72+ "src/runtime/kernel/cpu/fp16/convolution_winograd_fp16.h", 73+ "src/runtime/kernel/cpu/fp16/crop_fp16.cc", 74+ "src/runtime/kernel/cpu/fp16/crop_fp16.h", 75+ "src/runtime/kernel/cpu/fp16/custom_gru_fp16.cc", 76+ "src/runtime/kernel/cpu/fp16/custom_gru_fp16.h", 77+ "src/runtime/kernel/cpu/fp16/deconvolution_depthwise_fp16.cc", 78+ "src/runtime/kernel/cpu/fp16/deconvolution_depthwise_fp16.h", 79+ "src/runtime/kernel/cpu/fp16/deconvolution_fp16.cc", 80+ "src/runtime/kernel/cpu/fp16/deconvolution_fp16.h", 81+ "src/runtime/kernel/cpu/fp16/deconvolution_winograd_fp16.cc", 82+ "src/runtime/kernel/cpu/fp16/deconvolution_winograd_fp16.h", 83+ "src/runtime/kernel/cpu/fp16/depth_to_space_fp16.cc", 84+ "src/runtime/kernel/cpu/fp16/depth_to_space_fp16.h", 85+ "src/runtime/kernel/cpu/fp16/fill_fp16.cc", 86+ "src/runtime/kernel/cpu/fp16/fill_fp16.h", 87+ "src/runtime/kernel/cpu/fp16/fp16_op_handler.h", 88+ "src/runtime/kernel/cpu/fp16/fullconnection_fp16.cc", 89+ "src/runtime/kernel/cpu/fp16/fullconnection_fp16.h", 90+ "src/runtime/kernel/cpu/fp16/fused_batchnorm_fp16.cc", 91+ "src/runtime/kernel/cpu/fp16/fused_batchnorm_fp16.h", 92+ "src/runtime/kernel/cpu/fp16/gather_fp16.cc", 93+ "src/runtime/kernel/cpu/fp16/gather_fp16.h", 94+ "src/runtime/kernel/cpu/fp16/group_convolution_fp16.cc", 95+ "src/runtime/kernel/cpu/fp16/group_convolution_fp16.h", 96+ "src/runtime/kernel/cpu/fp16/gru_fp16.cc", 97+ "src/runtime/kernel/cpu/fp16/gru_fp16.h", 98+ "src/runtime/kernel/cpu/fp16/instance_norm_fp16.cc", 99+ "src/runtime/kernel/cpu/fp16/instance_norm_fp16.h", 100+ "src/runtime/kernel/cpu/fp16/layer_norm_fp16.cc", 101+ "src/runtime/kernel/cpu/fp16/layer_norm_fp16.h", 102+ "src/runtime/kernel/cpu/fp16/layout_transform_fp16.cc", 103+ "src/runtime/kernel/cpu/fp16/layout_transform_fp16.h", 104+ "src/runtime/kernel/cpu/fp16/log_softmax_fp16.cc", 105+ "src/runtime/kernel/cpu/fp16/log_softmax_fp16.h", 106+ "src/runtime/kernel/cpu/fp16/lstm_fp16.cc", 107+ "src/runtime/kernel/cpu/fp16/lstm_fp16.h", 108+ "src/runtime/kernel/cpu/fp16/matmul_base_fp16.cc", 109+ "src/runtime/kernel/cpu/fp16/matmul_base_fp16.h", 110+ "src/runtime/kernel/cpu/fp16/matmul_fp16.cc", 111+ "src/runtime/kernel/cpu/fp16/matmul_fp16.h", 112+ "src/runtime/kernel/cpu/fp16/pad_fp16.cc", 113+ "src/runtime/kernel/cpu/fp16/pad_fp16.h", 114+ "src/runtime/kernel/cpu/fp16/pooling_fp16.cc", 115+ "src/runtime/kernel/cpu/fp16/pooling_fp16.h", 116+ "src/runtime/kernel/cpu/fp16/power_fp16.cc", 117+ "src/runtime/kernel/cpu/fp16/power_fp16.h", 118+ "src/runtime/kernel/cpu/fp16/prelu_fp16.cc", 119+ "src/runtime/kernel/cpu/fp16/prelu_fp16.h", 120+ "src/runtime/kernel/cpu/fp16/quant_dtype_cast_fp16.cc", 121+ "src/runtime/kernel/cpu/fp16/quant_dtype_cast_fp16.h", 122+ "src/runtime/kernel/cpu/fp16/ragged_range_fp16.cc", 123+ "src/runtime/kernel/cpu/fp16/ragged_range_fp16.h", 124+ "src/runtime/kernel/cpu/fp16/reduce_fp16.cc", 125+ "src/runtime/kernel/cpu/fp16/reduce_fp16.h", 126+ "src/runtime/kernel/cpu/fp16/resize_fp16.cc", 127+ "src/runtime/kernel/cpu/fp16/resize_fp16.h", 128+ "src/runtime/kernel/cpu/fp16/scale_fp16.cc", 129+ "src/runtime/kernel/cpu/fp16/scale_fp16.h", 130+ "src/runtime/kernel/cpu/fp16/slice_fp16.cc", 131+ "src/runtime/kernel/cpu/fp16/slice_fp16.h", 132+ "src/runtime/kernel/cpu/fp16/softmax_fp16.cc", 133+ "src/runtime/kernel/cpu/fp16/softmax_fp16.h", 134+ "src/runtime/kernel/cpu/fp16/stack_fp16.cc", 135+ "src/runtime/kernel/cpu/fp16/stack_fp16.h", 136+ "src/runtime/kernel/cpu/fp16/transpose_fp16.cc", 137+ "src/runtime/kernel/cpu/fp16/transpose_fp16.h", 138+ "src/runtime/kernel/cpu/fp16/where_fp16.cc", 139+ "src/runtime/kernel/cpu/fp16/where_fp16.h" 140+] 141+ 142 int8_kernel_sources = [ 143 "src/runtime/kernel/cpu/int8/activation_int8.cc", 144 "src/runtime/kernel/cpu/int8/add_int8.cc", 145@@ -409,6 +517,13 @@ all_cpu_kernel_sources += int8_kernel_sources 146 all_cpu_kernel_sources += string_kernel_sources 147 all_cpu_kernel_sources += control_kernel_sources 148 149+if (target_cpu == "arm64") { 150+ all_cpu_kernel_sources += fp16_kernel_sources 151+} else { 152+ not_needed(fp16_kernel_sources) 153+} 154+ 155+ 156 ops_base_sources = [ 157 # "src/common/ops/anf_utils.cc", # disable runtiem convert 158 # "src/common/ops/ops_def.cc", # disable kernel executor 159-- 1602.34.1 161 162