• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From b79a91593152ec4a57321c9b35646ef847be66ce Mon Sep 17 00:00:00 2001
2From: Zhu Guodong <zhuguodong0001@163.com>
3Date: Sat, 8 Jul 2023 16:36:20 +0800
4Subject: [PATCH] auto-apply 0033-support-fp16-for-arm64-arch.patch
5
6---
7 mindspore/lite/BUILD.gn | 117 +++++++++++++++++++++++++++++++++++++++-
8 1 file changed, 116 insertions(+), 1 deletion(-)
9
10diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
11index 48b0461d..cf0a74ab 100644
12--- a/mindspore/lite/BUILD.gn
13+++ b/mindspore/lite/BUILD.gn
14@@ -70,6 +70,7 @@ ohos_group("mindspore") {
15     ":mindspore_lib",
16     ":mindspore_train_lib",
17     "mindir:mindir_lib",
18+    "tools/benchmark:benchmark_bin",
19     "src/runtime/js_api:mindsporelite_napi"
20   ]
21 }
22@@ -238,7 +239,6 @@ cpu_kernel_sources = [
23   "src/runtime/kernel/cpu/fp32/batch_to_space_fp32.cc",
24   "src/runtime/kernel/cpu/fp32/bias_fp32.cc",
25   "src/runtime/kernel/cpu/fp32/broadcast_to_fp32.cc",
26-  "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc",
27   "src/runtime/kernel/cpu/fp32/cast_fp32.cc",
28   "src/runtime/kernel/cpu/fp32/convolution_1x1_fp32.cc",
29   "src/runtime/kernel/cpu/fp32/convolution_delegate_fp32.cc",
30@@ -324,6 +324,114 @@ cpu_kernel_sources = [
31   "src/runtime/kernel/cpu/fp32/custom_gru_fp32.cc",
32 ]
33
34+if ((target_cpu != "arm") && (target_cpu != "arm64")) {
35+    cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ]
36+}
37+
38+fp16_kernel_sources = [
39+    "src/runtime/kernel/cpu/fp16/activation_fp16.cc",
40+    "src/runtime/kernel/cpu/fp16/activation_fp16.h",
41+    "src/runtime/kernel/cpu/fp16/addn_fp16.cc",
42+    "src/runtime/kernel/cpu/fp16/addn_fp16.h",
43+    "src/runtime/kernel/cpu/fp16/arithmetic_compare_fp16.cc",
44+    "src/runtime/kernel/cpu/fp16/arithmetic_compare_fp16.h",
45+    "src/runtime/kernel/cpu/fp16/arithmetic_fp16.cc",
46+    "src/runtime/kernel/cpu/fp16/arithmetic_fp16.h",
47+    "src/runtime/kernel/cpu/fp16/arithmetic_self_fp16.cc",
48+    "src/runtime/kernel/cpu/fp16/arithmetic_self_fp16.h",
49+    "src/runtime/kernel/cpu/fp16/batchnorm_fp16.cc",
50+    "src/runtime/kernel/cpu/fp16/batchnorm_fp16.h",
51+    "src/runtime/kernel/cpu/fp16/biasadd_fp16.cc",
52+    "src/runtime/kernel/cpu/fp16/biasadd_fp16.h",
53+    "src/runtime/kernel/cpu/fp16/cast_fp16.cc",
54+    "src/runtime/kernel/cpu/fp16/cast_fp16.h",
55+    "src/runtime/kernel/cpu/fp16/common_fp16.cc",
56+    "src/runtime/kernel/cpu/fp16/common_fp16.h",
57+    "src/runtime/kernel/cpu/fp16/concat_fp16.cc",
58+    "src/runtime/kernel/cpu/fp16/concat_fp16.h",
59+    "src/runtime/kernel/cpu/fp16/convolution_1x1_fp16.cc",
60+    "src/runtime/kernel/cpu/fp16/convolution_1x1_fp16.h",
61+    "src/runtime/kernel/cpu/fp16/convolution_delegate_fp16.cc",
62+    "src/runtime/kernel/cpu/fp16/convolution_delegate_fp16.h",
63+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_3x3_fp16.cc",
64+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_3x3_fp16.h",
65+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_fp16.cc",
66+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_fp16.h",
67+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_slidewindow_fp16.cc",
68+    "src/runtime/kernel/cpu/fp16/convolution_depthwise_slidewindow_fp16.h",
69+    "src/runtime/kernel/cpu/fp16/convolution_fp16.cc",
70+    "src/runtime/kernel/cpu/fp16/convolution_fp16.h",
71+    "src/runtime/kernel/cpu/fp16/convolution_winograd_fp16.cc",
72+    "src/runtime/kernel/cpu/fp16/convolution_winograd_fp16.h",
73+    "src/runtime/kernel/cpu/fp16/crop_fp16.cc",
74+    "src/runtime/kernel/cpu/fp16/crop_fp16.h",
75+    "src/runtime/kernel/cpu/fp16/custom_gru_fp16.cc",
76+    "src/runtime/kernel/cpu/fp16/custom_gru_fp16.h",
77+    "src/runtime/kernel/cpu/fp16/deconvolution_depthwise_fp16.cc",
78+    "src/runtime/kernel/cpu/fp16/deconvolution_depthwise_fp16.h",
79+    "src/runtime/kernel/cpu/fp16/deconvolution_fp16.cc",
80+    "src/runtime/kernel/cpu/fp16/deconvolution_fp16.h",
81+    "src/runtime/kernel/cpu/fp16/deconvolution_winograd_fp16.cc",
82+    "src/runtime/kernel/cpu/fp16/deconvolution_winograd_fp16.h",
83+    "src/runtime/kernel/cpu/fp16/depth_to_space_fp16.cc",
84+    "src/runtime/kernel/cpu/fp16/depth_to_space_fp16.h",
85+    "src/runtime/kernel/cpu/fp16/fill_fp16.cc",
86+    "src/runtime/kernel/cpu/fp16/fill_fp16.h",
87+    "src/runtime/kernel/cpu/fp16/fp16_op_handler.h",
88+    "src/runtime/kernel/cpu/fp16/fullconnection_fp16.cc",
89+    "src/runtime/kernel/cpu/fp16/fullconnection_fp16.h",
90+    "src/runtime/kernel/cpu/fp16/fused_batchnorm_fp16.cc",
91+    "src/runtime/kernel/cpu/fp16/fused_batchnorm_fp16.h",
92+    "src/runtime/kernel/cpu/fp16/gather_fp16.cc",
93+    "src/runtime/kernel/cpu/fp16/gather_fp16.h",
94+    "src/runtime/kernel/cpu/fp16/group_convolution_fp16.cc",
95+    "src/runtime/kernel/cpu/fp16/group_convolution_fp16.h",
96+    "src/runtime/kernel/cpu/fp16/gru_fp16.cc",
97+    "src/runtime/kernel/cpu/fp16/gru_fp16.h",
98+    "src/runtime/kernel/cpu/fp16/instance_norm_fp16.cc",
99+    "src/runtime/kernel/cpu/fp16/instance_norm_fp16.h",
100+    "src/runtime/kernel/cpu/fp16/layer_norm_fp16.cc",
101+    "src/runtime/kernel/cpu/fp16/layer_norm_fp16.h",
102+    "src/runtime/kernel/cpu/fp16/layout_transform_fp16.cc",
103+    "src/runtime/kernel/cpu/fp16/layout_transform_fp16.h",
104+    "src/runtime/kernel/cpu/fp16/log_softmax_fp16.cc",
105+    "src/runtime/kernel/cpu/fp16/log_softmax_fp16.h",
106+    "src/runtime/kernel/cpu/fp16/lstm_fp16.cc",
107+    "src/runtime/kernel/cpu/fp16/lstm_fp16.h",
108+    "src/runtime/kernel/cpu/fp16/matmul_base_fp16.cc",
109+    "src/runtime/kernel/cpu/fp16/matmul_base_fp16.h",
110+    "src/runtime/kernel/cpu/fp16/matmul_fp16.cc",
111+    "src/runtime/kernel/cpu/fp16/matmul_fp16.h",
112+    "src/runtime/kernel/cpu/fp16/pad_fp16.cc",
113+    "src/runtime/kernel/cpu/fp16/pad_fp16.h",
114+    "src/runtime/kernel/cpu/fp16/pooling_fp16.cc",
115+    "src/runtime/kernel/cpu/fp16/pooling_fp16.h",
116+    "src/runtime/kernel/cpu/fp16/power_fp16.cc",
117+    "src/runtime/kernel/cpu/fp16/power_fp16.h",
118+    "src/runtime/kernel/cpu/fp16/prelu_fp16.cc",
119+    "src/runtime/kernel/cpu/fp16/prelu_fp16.h",
120+    "src/runtime/kernel/cpu/fp16/quant_dtype_cast_fp16.cc",
121+    "src/runtime/kernel/cpu/fp16/quant_dtype_cast_fp16.h",
122+    "src/runtime/kernel/cpu/fp16/ragged_range_fp16.cc",
123+    "src/runtime/kernel/cpu/fp16/ragged_range_fp16.h",
124+    "src/runtime/kernel/cpu/fp16/reduce_fp16.cc",
125+    "src/runtime/kernel/cpu/fp16/reduce_fp16.h",
126+    "src/runtime/kernel/cpu/fp16/resize_fp16.cc",
127+    "src/runtime/kernel/cpu/fp16/resize_fp16.h",
128+    "src/runtime/kernel/cpu/fp16/scale_fp16.cc",
129+    "src/runtime/kernel/cpu/fp16/scale_fp16.h",
130+    "src/runtime/kernel/cpu/fp16/slice_fp16.cc",
131+    "src/runtime/kernel/cpu/fp16/slice_fp16.h",
132+    "src/runtime/kernel/cpu/fp16/softmax_fp16.cc",
133+    "src/runtime/kernel/cpu/fp16/softmax_fp16.h",
134+    "src/runtime/kernel/cpu/fp16/stack_fp16.cc",
135+    "src/runtime/kernel/cpu/fp16/stack_fp16.h",
136+    "src/runtime/kernel/cpu/fp16/transpose_fp16.cc",
137+    "src/runtime/kernel/cpu/fp16/transpose_fp16.h",
138+    "src/runtime/kernel/cpu/fp16/where_fp16.cc",
139+    "src/runtime/kernel/cpu/fp16/where_fp16.h"
140+]
141+
142 int8_kernel_sources = [
143   "src/runtime/kernel/cpu/int8/activation_int8.cc",
144   "src/runtime/kernel/cpu/int8/add_int8.cc",
145@@ -409,6 +517,13 @@ all_cpu_kernel_sources += int8_kernel_sources
146 all_cpu_kernel_sources += string_kernel_sources
147 all_cpu_kernel_sources += control_kernel_sources
148
149+if (target_cpu == "arm64") {
150+    all_cpu_kernel_sources += fp16_kernel_sources
151+} else {
152+    not_needed(fp16_kernel_sources)
153+}
154+
155+
156 ops_base_sources = [
157 #  "src/common/ops/anf_utils.cc", # disable runtiem convert
158 #  "src/common/ops/ops_def.cc", # disable kernel executor
159--
1602.34.1
161
162