1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 #include <xnnpack.h>
12 #include <xnnpack/log.h>
13 #include <xnnpack/params.h>
14 #include <xnnpack/subgraph.h>
15
16
create_add_operator(const struct xnn_node * node,const struct xnn_value * values,size_t num_values,struct xnn_operator_data * opdata)17 static enum xnn_status create_add_operator(
18 const struct xnn_node* node,
19 const struct xnn_value* values,
20 size_t num_values,
21 struct xnn_operator_data* opdata)
22 {
23 assert(node->num_inputs == 2);
24 const uint32_t input1_id = node->inputs[0];
25 assert(input1_id != XNN_INVALID_VALUE_ID);
26 assert(input1_id < num_values);
27 const uint32_t input2_id = node->inputs[1];
28 assert(input2_id != XNN_INVALID_VALUE_ID);
29 assert(input2_id < num_values);
30
31 assert(node->num_outputs == 1);
32 const uint32_t output_id = node->outputs[0];
33 assert(output_id != XNN_INVALID_VALUE_ID);
34 assert(output_id < num_values);
35
36 enum xnn_status status;
37 switch (node->compute_type) {
38 case xnn_compute_type_fp32:
39 status = xnn_create_add_nd_f32(
40 node->activation.output_min,
41 node->activation.output_max,
42 node->flags,
43 &opdata->operator_object);
44 break;
45 #ifndef XNN_NO_F16_OPERATORS
46 case xnn_compute_type_fp16:
47 status = xnn_create_add_nd_f16(
48 node->activation.output_min,
49 node->activation.output_max,
50 node->flags,
51 &opdata->operator_object);
52 break;
53 #endif // !defined(XNN_NO_F16_OPERATORS)
54 #ifndef XNN_NO_QS8_OPERATORS
55 case xnn_compute_type_qs8:
56 {
57 const float output_scale = values[output_id].quantization.scale;
58 const int32_t output_zero_point = values[output_id].quantization.zero_point;
59 const int8_t output_min =
60 (int8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, -128.0f), 127.0f));
61 const int8_t output_max =
62 (int8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, -128.0f), 127.0f));
63 status = xnn_create_add_nd_qs8(
64 (int8_t) values[input1_id].quantization.zero_point,
65 values[input1_id].quantization.scale,
66 (int8_t) values[input2_id].quantization.zero_point,
67 values[input2_id].quantization.scale,
68 (int8_t) output_zero_point,
69 output_scale, output_min, output_max, node->flags,
70 &opdata->operator_object);
71 break;
72 }
73 #endif // !defined(XNN_NO_QS8_OPERATORS)
74 #ifndef XNN_NO_QU8_OPERATORS
75 case xnn_compute_type_qu8:
76 {
77 const float output_scale = values[output_id].quantization.scale;
78 const int32_t output_zero_point = values[output_id].quantization.zero_point;
79 const uint8_t output_min =
80 (uint8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, 0.0f), 255.0f));
81 const uint8_t output_max =
82 (uint8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, 0.0f), 255.0f));
83 status = xnn_create_add_nd_qu8(
84 (uint8_t) values[input1_id].quantization.zero_point,
85 values[input1_id].quantization.scale,
86 (uint8_t) values[input2_id].quantization.zero_point,
87 values[input2_id].quantization.scale,
88 (uint8_t) output_zero_point,
89 output_scale, output_min, output_max, node->flags,
90 &opdata->operator_object);
91 break;
92 }
93 #endif // !defined(XNN_NO_QU8_OPERATORS)
94 default:
95 XNN_UNREACHABLE;
96 }
97 if (status == xnn_status_success) {
98 opdata->shape1.num_dims = values[input1_id].shape.num_dims;
99 opdata->shape2.num_dims = values[input2_id].shape.num_dims;
100 if (values[output_id].layout == xnn_layout_type_nchw) {
101 assert(values[input1_id].layout == xnn_layout_type_nchw);
102 assert(values[input2_id].layout == xnn_layout_type_nchw);
103 opdata->shape1.dim[0] = values[input1_id].shape.dim[0];
104 opdata->shape1.dim[1] = values[input1_id].shape.dim[values[input1_id].shape.num_dims - 1];
105 if (values[input1_id].shape.num_dims > 2) {
106 memcpy(&opdata->shape1.dim[2], &values[input1_id].shape.dim[1], (values[input1_id].shape.num_dims - 2) * sizeof(size_t));
107 }
108 opdata->shape2.dim[0] = values[input2_id].shape.dim[0];
109 opdata->shape2.dim[1] = values[input2_id].shape.dim[values[input2_id].shape.num_dims - 1];
110 if (values[input1_id].shape.num_dims > 2) {
111 memcpy(&opdata->shape2.dim[2], &values[input2_id].shape.dim[1], (values[input2_id].shape.num_dims - 2) * sizeof(size_t));
112 }
113 } else {
114 assert(values[output_id].layout == xnn_layout_type_nhwc);
115 assert(values[input1_id].layout == xnn_layout_type_nhwc);
116 assert(values[input2_id].layout == xnn_layout_type_nhwc);
117 memcpy(opdata->shape1.dim, values[input1_id].shape.dim, values[input1_id].shape.num_dims * sizeof(size_t));
118 memcpy(opdata->shape2.dim, values[input2_id].shape.dim, values[input2_id].shape.num_dims * sizeof(size_t));
119 }
120 opdata->inputs[0] = input1_id;
121 opdata->inputs[1] = input2_id;
122 opdata->outputs[0] = output_id;
123 }
124 return status;
125 }
126
setup_add_operator(const struct xnn_operator_data * opdata,const struct xnn_blob * blobs,size_t num_blobs,pthreadpool_t threadpool)127 static enum xnn_status setup_add_operator(
128 const struct xnn_operator_data* opdata,
129 const struct xnn_blob* blobs,
130 size_t num_blobs,
131 pthreadpool_t threadpool)
132 {
133 const uint32_t input1_id = opdata->inputs[0];
134 assert(input1_id != XNN_INVALID_VALUE_ID);
135 assert(input1_id < num_blobs);
136
137 const uint32_t input2_id = opdata->inputs[1];
138 assert(input2_id != XNN_INVALID_VALUE_ID);
139 assert(input2_id < num_blobs);
140
141 const uint32_t output_id = opdata->outputs[0];
142 assert(output_id != XNN_INVALID_VALUE_ID);
143 assert(output_id < num_blobs);
144
145 const struct xnn_blob* input1_blob = blobs + input1_id;
146 const void* input1_data = input1_blob->data;
147 assert(input1_data != NULL);
148
149 const struct xnn_blob* input2_blob = blobs + input2_id;
150 const void* input2_data = input2_blob->data;
151 assert(input2_data != NULL);
152
153 const struct xnn_blob* output_blob = blobs + output_id;
154 void* output_data = output_blob->data;
155 assert(output_data != NULL);
156
157 switch (opdata->operator_object->type) {
158 case xnn_operator_type_add_nd_f32:
159 return xnn_setup_add_nd_f32(
160 opdata->operator_object,
161 opdata->shape1.num_dims,
162 opdata->shape1.dim,
163 opdata->shape2.num_dims,
164 opdata->shape2.dim,
165 input1_data, input2_data, output_data,
166 threadpool);
167 #ifndef XNN_NO_F16_OPERATORS
168 case xnn_operator_type_add_nd_f16:
169 return xnn_setup_add_nd_f16(
170 opdata->operator_object,
171 opdata->shape1.num_dims,
172 opdata->shape1.dim,
173 opdata->shape2.num_dims,
174 opdata->shape2.dim,
175 input1_data, input2_data, output_data,
176 threadpool);
177 #endif // !defined(XNN_NO_F16_OPERATORS)
178 #ifndef XNN_NO_QS8_OPERATORS
179 case xnn_operator_type_add_nd_qs8:
180 return xnn_setup_add_nd_qs8(
181 opdata->operator_object,
182 opdata->shape1.num_dims,
183 opdata->shape1.dim,
184 opdata->shape2.num_dims,
185 opdata->shape2.dim,
186 input1_data, input2_data, output_data,
187 threadpool);
188 #endif // !defined(XNN_NO_QS8_OPERATORS)
189 #ifndef XNN_NO_QU8_OPERATORS
190 case xnn_operator_type_add_nd_qu8:
191 return xnn_setup_add_nd_qu8(
192 opdata->operator_object,
193 opdata->shape1.num_dims,
194 opdata->shape1.dim,
195 opdata->shape2.num_dims,
196 opdata->shape2.dim,
197 input1_data, input2_data, output_data,
198 threadpool);
199 #endif // !defined(XNN_NO_QU8_OPERATORS)
200 default:
201 XNN_UNREACHABLE;
202 }
203 }
204
xnn_define_add2(xnn_subgraph_t subgraph,float output_min,float output_max,uint32_t input1_id,uint32_t input2_id,uint32_t output_id,uint32_t flags)205 enum xnn_status xnn_define_add2(
206 xnn_subgraph_t subgraph,
207 float output_min,
208 float output_max,
209 uint32_t input1_id,
210 uint32_t input2_id,
211 uint32_t output_id,
212 uint32_t flags)
213 {
214 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
215 xnn_log_error("failed to define %s operator: XNNPACK is not initialized",
216 xnn_node_type_to_string(xnn_node_type_add2));
217 return xnn_status_uninitialized;
218 }
219
220 if (isnan(output_min)) {
221 xnn_log_error(
222 "failed to define %s operator with NaN output lower bound: lower bound must be non-NaN",
223 xnn_node_type_to_string(xnn_node_type_add2));
224 return xnn_status_invalid_parameter;
225 }
226
227 if (isnan(output_max)) {
228 xnn_log_error(
229 "failed to define %s operator with NaN output upper bound: upper bound must be non-NaN",
230 xnn_node_type_to_string(xnn_node_type_add2));
231 return xnn_status_invalid_parameter;
232 }
233
234 if (output_min >= output_max) {
235 xnn_log_error(
236 "failed to define %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
237 xnn_node_type_to_string(xnn_node_type_add2), output_min, output_max);
238 return xnn_status_invalid_parameter;
239 }
240
241 if (input1_id >= subgraph->num_values) {
242 xnn_log_error(
243 "failed to define %s operator with the first input ID #%" PRIu32 ": invalid Value ID",
244 xnn_node_type_to_string(xnn_node_type_add2), input1_id);
245 return xnn_status_invalid_parameter;
246 }
247
248 const struct xnn_value* input1_value = &subgraph->values[input1_id];
249 if (input1_value->type != xnn_value_type_dense_tensor) {
250 xnn_log_error(
251 "failed to define %s operator with the first input ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
252 xnn_node_type_to_string(xnn_node_type_add2), input1_id, input1_value->type);
253 return xnn_status_invalid_parameter;
254 }
255
256 switch (input1_value->datatype) {
257 case xnn_datatype_fp32:
258 #ifndef XNN_NO_QS8_OPERATORS
259 case xnn_datatype_qint8:
260 #endif // !defined(XNN_NO_QS8_OPERATORS)
261 #ifndef XNN_NO_QU8_OPERATORS
262 case xnn_datatype_quint8:
263 #endif // !defined(XNN_NO_QU8_OPERATORS)
264 break;
265 default:
266 xnn_log_error(
267 "failed to define %s operator with the first input ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
268 xnn_node_type_to_string(xnn_node_type_add2), input1_id,
269 xnn_datatype_to_string(input1_value->datatype), input1_value->datatype);
270 return xnn_status_invalid_parameter;
271 }
272
273 if (input2_id >= subgraph->num_values) {
274 xnn_log_error(
275 "failed to define %s operator with the second input ID #%" PRIu32 ": invalid Value ID",
276 xnn_node_type_to_string(xnn_node_type_add2), input2_id);
277 return xnn_status_invalid_parameter;
278 }
279
280 const struct xnn_value* input2_value = &subgraph->values[input2_id];
281 if (input2_value->type != xnn_value_type_dense_tensor) {
282 xnn_log_error(
283 "failed to define %s operator with the second input ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
284 xnn_node_type_to_string(xnn_node_type_add2), input2_id, input2_value->type);
285 return xnn_status_invalid_parameter;
286 }
287
288 switch (input2_value->datatype) {
289 case xnn_datatype_fp32:
290 #ifndef XNN_NO_QS8_OPERATORS
291 case xnn_datatype_qint8:
292 #endif // !defined(XNN_NO_QS8_OPERATORS)
293 #ifndef XNN_NO_QU8_OPERATORS
294 case xnn_datatype_quint8:
295 #endif // !defined(XNN_NO_QU8_OPERATORS)
296 break;
297 default:
298 xnn_log_error(
299 "failed to define %s operator with the second input ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
300 xnn_node_type_to_string(xnn_node_type_add2), input2_id,
301 xnn_datatype_to_string(input2_value->datatype), input2_value->datatype);
302 return xnn_status_invalid_parameter;
303 }
304
305 if (output_id >= subgraph->num_values) {
306 xnn_log_error(
307 "failed to define %s operator with output ID #%" PRIu32 ": invalid Value ID",
308 xnn_node_type_to_string(xnn_node_type_add2), output_id);
309 return xnn_status_invalid_parameter;
310 }
311
312 const struct xnn_value* output_value = &subgraph->values[output_id];
313 if (output_value->type != xnn_value_type_dense_tensor) {
314 xnn_log_error(
315 "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
316 xnn_node_type_to_string(xnn_node_type_add2), output_id, output_value->type);
317 return xnn_status_invalid_parameter;
318 }
319
320 enum xnn_compute_type compute_type = xnn_compute_type_invalid;
321 switch (output_value->datatype) {
322 case xnn_datatype_fp32:
323 compute_type = xnn_compute_type_fp32;
324 break;
325 #ifndef XNN_NO_QS8_OPERATORS
326 case xnn_datatype_qint8:
327 compute_type = xnn_compute_type_qs8;
328 break;
329 #endif // !defined(XNN_NO_QS8_OPERATORS)
330 #ifndef XNN_NO_QU8_OPERATORS
331 case xnn_datatype_quint8:
332 compute_type = xnn_compute_type_qu8;
333 break;
334 #endif // !defined(XNN_NO_QU8_OPERATORS)
335 default:
336 xnn_log_error(
337 "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
338 xnn_node_type_to_string(xnn_node_type_add2), output_id,
339 xnn_datatype_to_string(output_value->datatype), output_value->datatype);
340 return xnn_status_invalid_parameter;
341 }
342
343 if (input1_value->datatype != input2_value->datatype ||
344 input1_value->datatype != output_value->datatype)
345 {
346 xnn_log_error(
347 "failed to define %s operator with input IDs #%" PRIu32 " and #%" PRIu32 " and output ID #%" PRIu32
348 ": mismatching datatypes across the first input (%s), the second input (%s), and output (%s)",
349 xnn_node_type_to_string(xnn_node_type_add2), input1_id, input2_id, output_id,
350 xnn_datatype_to_string(input1_value->datatype),
351 xnn_datatype_to_string(input2_value->datatype),
352 xnn_datatype_to_string(output_value->datatype));
353 return xnn_status_invalid_parameter;
354 }
355
356 struct xnn_node* node = xnn_subgraph_new_node(subgraph);
357 if (node == NULL) {
358 return xnn_status_out_of_memory;
359 }
360
361 node->type = xnn_node_type_add2;
362 node->compute_type = compute_type;
363 node->activation.output_min = output_min;
364 node->activation.output_max = output_max;
365 node->num_inputs = 2;
366 node->inputs[0] = input1_id;
367 node->inputs[1] = input2_id;
368 node->num_outputs = 1;
369 node->outputs[0] = output_id;
370 node->flags = flags;
371
372 node->create = create_add_operator;
373 node->setup = setup_add_operator;
374
375 return xnn_status_success;
376 }
377