1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <xnnpack.h>
11 #include <xnnpack/log.h>
12 #include <xnnpack/params.h>
13 #include <xnnpack/subgraph.h>
14
15
create_convolution_operator(const struct xnn_node * node,const struct xnn_value * values,size_t num_values,struct xnn_operator_data * opdata)16 static enum xnn_status create_convolution_operator(
17 const struct xnn_node* node,
18 const struct xnn_value* values,
19 size_t num_values,
20 struct xnn_operator_data* opdata)
21 {
22 assert(node->num_inputs >= 2);
23 assert(node->num_inputs <= 3);
24 const uint32_t input_id = node->inputs[0];
25 assert(input_id != XNN_INVALID_VALUE_ID);
26 assert(input_id < num_values);
27 const uint32_t filter_id = node->inputs[1];
28 assert(filter_id != XNN_INVALID_VALUE_ID);
29 assert(filter_id < num_values);
30
31 assert(node->num_outputs == 1);
32 const uint32_t output_id = node->outputs[0];
33 assert(output_id != XNN_INVALID_VALUE_ID);
34 assert(output_id < num_values);
35
36 const void* filter_data = values[filter_id].data;
37 assert(filter_data != NULL);
38
39 const void* bias_data = NULL;
40 if (node->num_inputs > 2) {
41 const uint32_t bias_id = node->inputs[2];
42 assert(bias_id != XNN_INVALID_VALUE_ID);
43 assert(bias_id < num_values);
44
45 bias_data = values[bias_id].data;
46 assert(bias_data != NULL);
47 }
48
49 enum xnn_status status;
50 if (values[output_id].layout == xnn_layout_type_nchw) {
51 assert(values[input_id].layout == xnn_layout_type_nchw);
52 assert(node->compute_type == xnn_compute_type_fp32);
53 status = xnn_create_convolution2d_nchw_f32(
54 node->params.depthwise_convolution_2d.input_padding_top,
55 node->params.depthwise_convolution_2d.input_padding_right,
56 node->params.depthwise_convolution_2d.input_padding_bottom,
57 node->params.depthwise_convolution_2d.input_padding_left,
58 node->params.depthwise_convolution_2d.kernel_height,
59 node->params.depthwise_convolution_2d.kernel_width,
60 node->params.depthwise_convolution_2d.subsampling_height,
61 node->params.depthwise_convolution_2d.subsampling_width,
62 node->params.depthwise_convolution_2d.dilation_height,
63 node->params.depthwise_convolution_2d.dilation_width,
64 node->params.depthwise_convolution_2d.input_channels /* groups */,
65 1 /* group_input_channels */,
66 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
67 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
68 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
69 filter_data,
70 bias_data,
71 node->activation.output_min,
72 node->activation.output_max,
73 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
74 &opdata->operator_object);
75 } else {
76 assert(values[input_id].layout == xnn_layout_type_nhwc);
77 assert(values[output_id].layout == xnn_layout_type_nhwc);
78 switch (node->compute_type) {
79 case xnn_compute_type_fp32:
80 status = xnn_create_convolution2d_nhwc_f32(
81 node->params.depthwise_convolution_2d.input_padding_top,
82 node->params.depthwise_convolution_2d.input_padding_right,
83 node->params.depthwise_convolution_2d.input_padding_bottom,
84 node->params.depthwise_convolution_2d.input_padding_left,
85 node->params.depthwise_convolution_2d.kernel_height,
86 node->params.depthwise_convolution_2d.kernel_width,
87 node->params.depthwise_convolution_2d.subsampling_height,
88 node->params.depthwise_convolution_2d.subsampling_width,
89 node->params.depthwise_convolution_2d.dilation_height,
90 node->params.depthwise_convolution_2d.dilation_width,
91 node->params.depthwise_convolution_2d.input_channels /* groups */,
92 1 /* group_input_channels */,
93 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
94 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
95 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
96 filter_data,
97 bias_data,
98 node->activation.output_min,
99 node->activation.output_max,
100 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
101 &opdata->operator_object);
102 break;
103 #ifndef XNN_NO_F16_OPERATORS
104 case xnn_compute_type_fp16:
105 status = xnn_create_convolution2d_nhwc_f16(
106 node->params.depthwise_convolution_2d.input_padding_top,
107 node->params.depthwise_convolution_2d.input_padding_right,
108 node->params.depthwise_convolution_2d.input_padding_bottom,
109 node->params.depthwise_convolution_2d.input_padding_left,
110 node->params.depthwise_convolution_2d.kernel_height,
111 node->params.depthwise_convolution_2d.kernel_width,
112 node->params.depthwise_convolution_2d.subsampling_height,
113 node->params.depthwise_convolution_2d.subsampling_width,
114 node->params.depthwise_convolution_2d.dilation_height,
115 node->params.depthwise_convolution_2d.dilation_width,
116 node->params.depthwise_convolution_2d.input_channels /* groups */,
117 1 /* group_input_channels */,
118 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
119 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
120 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
121 filter_data,
122 bias_data,
123 node->activation.output_min,
124 node->activation.output_max,
125 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION | XNN_FLAG_FP32_STATIC_WEIGHTS,
126 &opdata->operator_object);
127 break;
128 #endif // XNN_NO_F16_OPERATORS
129 #ifndef XNN_NO_QS8_OPERATORS
130 case xnn_compute_type_qs8:
131 {
132 const float output_scale = values[output_id].quantization.scale;
133 const int32_t output_zero_point = values[output_id].quantization.zero_point;
134 const int8_t output_min =
135 (int8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, -128.0f), 127.0f));
136 const int8_t output_max =
137 (int8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, -128.0f), 127.0f));
138 status = xnn_create_convolution2d_nhwc_qs8(
139 node->params.depthwise_convolution_2d.input_padding_top,
140 node->params.depthwise_convolution_2d.input_padding_right,
141 node->params.depthwise_convolution_2d.input_padding_bottom,
142 node->params.depthwise_convolution_2d.input_padding_left,
143 node->params.depthwise_convolution_2d.kernel_height,
144 node->params.depthwise_convolution_2d.kernel_width,
145 node->params.depthwise_convolution_2d.subsampling_height,
146 node->params.depthwise_convolution_2d.subsampling_width,
147 node->params.depthwise_convolution_2d.dilation_height,
148 node->params.depthwise_convolution_2d.dilation_width,
149 node->params.depthwise_convolution_2d.input_channels /* groups */,
150 1 /* group_input_channels */,
151 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
152 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
153 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
154 (int8_t) values[input_id].quantization.zero_point,
155 values[input_id].quantization.scale,
156 values[filter_id].quantization.scale,
157 filter_data,
158 bias_data,
159 (int8_t) output_zero_point,
160 output_scale, output_min, output_max,
161 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
162 &opdata->operator_object);
163 break;
164 }
165 case xnn_compute_type_qc8:
166 {
167 const float output_scale = values[output_id].quantization.scale;
168 const int32_t output_zero_point = values[output_id].quantization.zero_point;
169 const int8_t output_min =
170 (int8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, -128.0f), 127.0f));
171 const int8_t output_max =
172 (int8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, -128.0f), 127.0f));
173 status = xnn_create_convolution2d_nhwc_qc8(
174 node->params.depthwise_convolution_2d.input_padding_top,
175 node->params.depthwise_convolution_2d.input_padding_right,
176 node->params.depthwise_convolution_2d.input_padding_bottom,
177 node->params.depthwise_convolution_2d.input_padding_left,
178 node->params.depthwise_convolution_2d.kernel_height,
179 node->params.depthwise_convolution_2d.kernel_width,
180 node->params.depthwise_convolution_2d.subsampling_height,
181 node->params.depthwise_convolution_2d.subsampling_width,
182 node->params.depthwise_convolution_2d.dilation_height,
183 node->params.depthwise_convolution_2d.dilation_width,
184 node->params.depthwise_convolution_2d.input_channels /* groups */,
185 1 /* group_input_channels */,
186 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
187 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
188 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
189 (int8_t) values[input_id].quantization.zero_point,
190 values[input_id].quantization.scale,
191 values[filter_id].quantization.channelwise_scale,
192 filter_data,
193 bias_data,
194 (int8_t) output_zero_point,
195 output_scale, output_min, output_max,
196 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
197 &opdata->operator_object);
198 break;
199 }
200 #endif // !defined(XNN_NO_QS8_OPERATORS)
201 #ifndef XNN_NO_QU8_OPERATORS
202 case xnn_compute_type_qu8:
203 {
204 const float output_scale = values[output_id].quantization.scale;
205 const int32_t output_zero_point = values[output_id].quantization.zero_point;
206 const uint8_t output_min =
207 (uint8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, 0.0f), 255.0f));
208 const uint8_t output_max =
209 (uint8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, 0.0f), 255.0f));
210 status = xnn_create_convolution2d_nhwc_qu8(
211 node->params.depthwise_convolution_2d.input_padding_top,
212 node->params.depthwise_convolution_2d.input_padding_right,
213 node->params.depthwise_convolution_2d.input_padding_bottom,
214 node->params.depthwise_convolution_2d.input_padding_left,
215 node->params.depthwise_convolution_2d.kernel_height,
216 node->params.depthwise_convolution_2d.kernel_width,
217 node->params.depthwise_convolution_2d.subsampling_height,
218 node->params.depthwise_convolution_2d.subsampling_width,
219 node->params.depthwise_convolution_2d.dilation_height,
220 node->params.depthwise_convolution_2d.dilation_width,
221 node->params.depthwise_convolution_2d.input_channels /* groups */,
222 1 /* group_input_channels */,
223 node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
224 node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
225 node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
226 (uint8_t) values[input_id].quantization.zero_point,
227 values[input_id].quantization.scale,
228 (uint8_t) values[filter_id].quantization.zero_point,
229 values[filter_id].quantization.scale,
230 filter_data,
231 bias_data,
232 (uint8_t) output_zero_point,
233 output_scale, output_min, output_max,
234 node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
235 &opdata->operator_object);
236 break;
237 }
238 #endif // !defined(XNN_NO_QU8_OPERATORS)
239 default:
240 XNN_UNREACHABLE;
241 }
242 }
243 if (status == xnn_status_success) {
244 opdata->batch_size = values[input_id].shape.dim[0];
245 opdata->input_height = values[input_id].shape.dim[1];
246 opdata->input_width = values[input_id].shape.dim[2];
247 opdata->inputs[0] = input_id;
248 opdata->outputs[0] = output_id;
249 }
250 return status;
251 }
252
setup_convolution_operator(const struct xnn_operator_data * opdata,const struct xnn_blob * blobs,size_t num_blobs,pthreadpool_t threadpool)253 static enum xnn_status setup_convolution_operator(
254 const struct xnn_operator_data* opdata,
255 const struct xnn_blob* blobs,
256 size_t num_blobs,
257 pthreadpool_t threadpool)
258 {
259 const uint32_t input_id = opdata->inputs[0];
260 assert(input_id != XNN_INVALID_VALUE_ID);
261 assert(input_id < num_blobs);
262
263 const uint32_t output_id = opdata->outputs[0];
264 assert(output_id != XNN_INVALID_VALUE_ID);
265 assert(output_id < num_blobs);
266
267 const struct xnn_blob* input_blob = blobs + input_id;
268 const void* input_data = input_blob->data;
269 assert(input_data != NULL);
270
271 const struct xnn_blob* output_blob = blobs + output_id;
272 void* output_data = output_blob->data;
273 assert(output_data != NULL);
274
275 switch (opdata->operator_object->type) {
276 case xnn_operator_type_convolution_nchw_f32:
277 return xnn_setup_convolution2d_nchw_f32(
278 opdata->operator_object,
279 opdata->batch_size,
280 opdata->input_height,
281 opdata->input_width,
282 input_data,
283 output_data,
284 threadpool);
285 break;
286 case xnn_operator_type_convolution_nhwc_f32:
287 return xnn_setup_convolution2d_nhwc_f32(
288 opdata->operator_object,
289 opdata->batch_size,
290 opdata->input_height,
291 opdata->input_width,
292 input_data,
293 output_data,
294 threadpool);
295 break;
296 #ifndef XNN_NO_F16_OPERATORS
297 case xnn_operator_type_convolution_nhwc_f16:
298 return xnn_setup_convolution2d_nhwc_f16(
299 opdata->operator_object,
300 opdata->batch_size,
301 opdata->input_height,
302 opdata->input_width,
303 input_data,
304 output_data,
305 threadpool);
306 break;
307 #endif // !defined(XNN_NO_F16_OPERATORS)
308 #ifndef XNN_NO_QS8_OPERATORS
309 case xnn_operator_type_convolution_nhwc_qc8:
310 return xnn_setup_convolution2d_nhwc_qc8(
311 opdata->operator_object,
312 opdata->batch_size,
313 opdata->input_height,
314 opdata->input_width,
315 input_data,
316 output_data,
317 threadpool);
318 break;
319 case xnn_operator_type_convolution_nhwc_qs8:
320 return xnn_setup_convolution2d_nhwc_qs8(
321 opdata->operator_object,
322 opdata->batch_size,
323 opdata->input_height,
324 opdata->input_width,
325 input_data,
326 output_data,
327 threadpool);
328 break;
329 #endif // !defined(XNN_NO_QS8_OPERATORS)
330 #ifndef XNN_NO_QU8_OPERATORS
331 case xnn_operator_type_convolution_nhwc_qu8:
332 return xnn_setup_convolution2d_nhwc_qu8(
333 opdata->operator_object,
334 opdata->batch_size,
335 opdata->input_height,
336 opdata->input_width,
337 input_data,
338 output_data,
339 threadpool);
340 break;
341 #endif // !defined(XNN_NO_QU8_OPERATORS)
342 default:
343 XNN_UNREACHABLE;
344 }
345 }
346
validate_datatypes_with_bias(enum xnn_datatype input_datatype,enum xnn_datatype filter_datatype,enum xnn_datatype bias_datatype,enum xnn_datatype output_datatype)347 static inline enum xnn_compute_type validate_datatypes_with_bias(
348 enum xnn_datatype input_datatype,
349 enum xnn_datatype filter_datatype,
350 enum xnn_datatype bias_datatype,
351 enum xnn_datatype output_datatype)
352 {
353 switch (filter_datatype) {
354 case xnn_datatype_fp32:
355 if (input_datatype == xnn_datatype_fp32 &&
356 bias_datatype == xnn_datatype_fp32 &&
357 output_datatype == xnn_datatype_fp32)
358 {
359 return xnn_compute_type_fp32;
360 }
361 break;
362 #ifndef XNN_NO_QS8_OPERATORS
363 case xnn_datatype_qint8:
364 if (input_datatype == xnn_datatype_qint8 &&
365 bias_datatype == xnn_datatype_qint32 &&
366 output_datatype == xnn_datatype_qint8)
367 {
368 return xnn_compute_type_qs8;
369 }
370 break;
371 case xnn_datatype_qcint8:
372 if (input_datatype == xnn_datatype_qint8 &&
373 bias_datatype == xnn_datatype_qcint32 &&
374 output_datatype == xnn_datatype_qint8)
375 {
376 return xnn_compute_type_qc8;
377 }
378 break;
379 #endif // !defined(XNN_NO_QS8_OPERATORS)
380 #ifndef XNN_NO_QU8_OPERATORS
381 case xnn_datatype_quint8:
382 if (input_datatype == xnn_datatype_quint8 &&
383 bias_datatype == xnn_datatype_qint32 &&
384 output_datatype == xnn_datatype_quint8)
385 {
386 return xnn_compute_type_qu8;
387 }
388 break;
389 #endif // !defined(XNN_NO_QU8_OPERATORS)
390 default:
391 XNN_UNREACHABLE;
392 }
393 return xnn_compute_type_invalid;
394 }
395
validate_datatypes_without_bias(enum xnn_datatype input_datatype,enum xnn_datatype filter_datatype,enum xnn_datatype output_datatype)396 static inline enum xnn_compute_type validate_datatypes_without_bias(
397 enum xnn_datatype input_datatype,
398 enum xnn_datatype filter_datatype,
399 enum xnn_datatype output_datatype)
400 {
401 switch (filter_datatype) {
402 case xnn_datatype_fp32:
403 if (input_datatype == xnn_datatype_fp32 && output_datatype == xnn_datatype_fp32) {
404 return xnn_compute_type_fp32;
405 }
406 break;
407 #ifndef XNN_NO_QS8_OPERATORS
408 case xnn_datatype_qint8:
409 if (input_datatype == xnn_datatype_qint8 && output_datatype == xnn_datatype_qint8) {
410 return xnn_compute_type_qs8;
411 }
412 break;
413 case xnn_datatype_qcint8:
414 if (input_datatype == xnn_datatype_qint8 && output_datatype == xnn_datatype_qint8) {
415 return xnn_compute_type_qc8;
416 }
417 break;
418 #endif // !defined(XNN_NO_QS8_OPERATORS)
419 #ifndef XNN_NO_QU8_OPERATORS
420 case xnn_datatype_quint8:
421 if (input_datatype == xnn_datatype_quint8 && output_datatype == xnn_datatype_quint8) {
422 return xnn_compute_type_qu8;
423 }
424 break;
425 #endif // !defined(XNN_NO_QU8_OPERATORS)
426 default:
427 XNN_UNREACHABLE;
428 }
429 return xnn_compute_type_invalid;
430 }
431
xnn_define_depthwise_convolution_2d(xnn_subgraph_t subgraph,uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t kernel_height,uint32_t kernel_width,uint32_t subsampling_height,uint32_t subsampling_width,uint32_t dilation_height,uint32_t dilation_width,uint32_t depth_multiplier,size_t input_channels,float output_min,float output_max,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id,uint32_t flags)432 enum xnn_status xnn_define_depthwise_convolution_2d(
433 xnn_subgraph_t subgraph,
434 uint32_t input_padding_top,
435 uint32_t input_padding_right,
436 uint32_t input_padding_bottom,
437 uint32_t input_padding_left,
438 uint32_t kernel_height,
439 uint32_t kernel_width,
440 uint32_t subsampling_height,
441 uint32_t subsampling_width,
442 uint32_t dilation_height,
443 uint32_t dilation_width,
444 uint32_t depth_multiplier,
445 size_t input_channels,
446 float output_min,
447 float output_max,
448 uint32_t input_id,
449 uint32_t filter_id,
450 uint32_t bias_id,
451 uint32_t output_id,
452 uint32_t flags)
453 {
454 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
455 xnn_log_error("failed to define %s operator: XNNPACK is not initialized",
456 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
457 return xnn_status_uninitialized;
458 }
459
460 if (kernel_width == 0 || kernel_height == 0) {
461 xnn_log_error(
462 "failed to define %s operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
463 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), kernel_width, kernel_height);
464 return xnn_status_invalid_parameter;
465 }
466
467 if (subsampling_width == 0 || subsampling_height == 0) {
468 xnn_log_error(
469 "failed to define %s operator with %" PRIu32 "x%" PRIu32 " subsampling: subsampling dimensions must be non-zero",
470 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), subsampling_width, subsampling_height);
471 return xnn_status_invalid_parameter;
472 }
473
474 if (dilation_width == 0 || dilation_height == 0) {
475 xnn_log_error(
476 "failed to define %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
477 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), dilation_width, dilation_height);
478 return xnn_status_invalid_parameter;
479 }
480
481 if (depth_multiplier == 0) {
482 xnn_log_error(
483 "failed to define %s operator with %" PRIu32 " depth multiplier: depth multiplier must be non-zero",
484 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), depth_multiplier);
485 return xnn_status_invalid_parameter;
486 }
487
488 if (input_channels == 0) {
489 xnn_log_error(
490 "failed to define %s operator with %zu input channels: number of channels must be non-zero",
491 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_channels);
492 return xnn_status_invalid_parameter;
493 }
494
495 if (isnan(output_min)) {
496 xnn_log_error(
497 "failed to define %s operator with NaN output lower bound: lower bound must be non-NaN",
498 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
499 return xnn_status_invalid_parameter;
500 }
501
502 if (isnan(output_max)) {
503 xnn_log_error(
504 "failed to define %s operator with NaN output upper bound: upper bound must be non-NaN",
505 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
506 return xnn_status_invalid_parameter;
507 }
508
509 if (output_min >= output_max) {
510 xnn_log_error(
511 "failed to define %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
512 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_min, output_max);
513 return xnn_status_invalid_parameter;
514 }
515
516 const uint32_t supported_flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
517 const uint32_t invalid_flags = flags & ~supported_flags;
518 if (invalid_flags != 0) {
519 xnn_log_error(
520 "failed to define %s operator with 0x%08" PRIx32 " flags: invalid flags 0x%08" PRIx32,
521 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), flags, invalid_flags);
522 return xnn_status_invalid_parameter;
523 }
524
525 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
526 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && any_padding) {
527 xnn_log_error(
528 "failed to define %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
529 "TensorFlow SAME padding can't be combined with explicit padding specification",
530 xnn_node_type_to_string(xnn_node_type_convolution_2d),
531 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
532 return xnn_status_invalid_parameter;
533 }
534
535 // Convert TensorFlow SAME padding to explicit padding specification whenever possible
536 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && (subsampling_height | subsampling_width) == 1) {
537 flags &= ~XNN_FLAG_TENSORFLOW_SAME_PADDING;
538 const uint32_t padding_height = (kernel_height - 1) * dilation_height;
539 const uint32_t padding_width = (kernel_width - 1) * dilation_width;
540 input_padding_left = padding_width / 2;
541 input_padding_top = padding_height / 2;
542 input_padding_right = padding_width - input_padding_left;
543 input_padding_bottom = padding_height - input_padding_top;
544 }
545
546 if (input_id >= subgraph->num_values) {
547 xnn_log_error(
548 "failed to define %s operator with input ID #%" PRIu32 ": invalid Value ID",
549 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id);
550 return xnn_status_invalid_parameter;
551 }
552
553 const struct xnn_value* input_value = &subgraph->values[input_id];
554 if (input_value->type != xnn_value_type_dense_tensor) {
555 xnn_log_error(
556 "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
557 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, input_value->type);
558 return xnn_status_invalid_parameter;
559 }
560
561 switch (input_value->datatype) {
562 case xnn_datatype_fp32:
563 #ifndef XNN_NO_QS8_OPERATORS
564 case xnn_datatype_qint8:
565 #endif // !defined(XNN_NO_QS8_OPERATORS)
566 #ifndef XNN_NO_QU8_OPERATORS
567 case xnn_datatype_quint8:
568 #endif // !defined(XNN_NO_QU8_OPERATORS)
569 break;
570 default:
571 xnn_log_error(
572 "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
573 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id,
574 xnn_datatype_to_string(input_value->datatype), input_value->datatype);
575 return xnn_status_invalid_parameter;
576 }
577
578 if (filter_id >= subgraph->num_values) {
579 xnn_log_error(
580 "failed to define %s operator with filter ID #%" PRIu32 ": invalid Value ID",
581 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id);
582 return xnn_status_invalid_parameter;
583 }
584
585 const struct xnn_value* filter_value = &subgraph->values[filter_id];
586 if (filter_value->type != xnn_value_type_dense_tensor) {
587 xnn_log_error(
588 "failed to define %s operator with filter ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
589 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id, filter_value->type);
590 return xnn_status_invalid_parameter;
591 }
592
593 if (filter_value->data == NULL) {
594 xnn_log_error(
595 "failed to define %s operator with filter ID #%" PRIu32 ": non-static Value",
596 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id);
597 return xnn_status_invalid_parameter;
598 }
599
600 switch (filter_value->datatype) {
601 case xnn_datatype_fp32:
602 break;
603 #ifndef XNN_NO_QS8_OPERATORS
604 case xnn_datatype_qint8:
605 if (filter_value->quantization.zero_point != 0) {
606 xnn_log_error(
607 "failed to define %s operator with filter ID #%" PRIu32 ": unsupported quantization zero point %" PRId32 " for datatype %s",
608 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id,
609 filter_value->quantization.zero_point, xnn_datatype_to_string(filter_value->datatype));
610 }
611 break;
612 case xnn_datatype_qcint8:
613 break;
614 #endif // !defined(XNN_NO_QS8_OPERATORS)
615 #ifndef XNN_NO_QU8_OPERATORS
616 case xnn_datatype_quint8:
617 break;
618 #endif // !defined(XNN_NO_QU8_OPERATORS)
619 default:
620 xnn_log_error(
621 "failed to define %s operator with filter ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
622 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id,
623 xnn_datatype_to_string(filter_value->datatype), filter_value->datatype);
624 return xnn_status_invalid_parameter;
625 }
626
627 const struct xnn_value* bias_value = NULL;
628 if (bias_id != XNN_INVALID_VALUE_ID) {
629 if (bias_id >= subgraph->num_values) {
630 xnn_log_error(
631 "failed to define %s operator with bias ID #%" PRIu32 ": invalid Value ID",
632 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id);
633 return xnn_status_invalid_parameter;
634 }
635
636 bias_value = &subgraph->values[bias_id];
637 if (bias_value->type != xnn_value_type_dense_tensor) {
638 xnn_log_error(
639 "failed to define %s operator with bias ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
640 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id, bias_value->type);
641 return xnn_status_invalid_parameter;
642 }
643
644 if (bias_value->data == NULL) {
645 xnn_log_error(
646 "failed to define %s operator with bias ID #%" PRIu32 ": non-static Value",
647 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id);
648 return xnn_status_invalid_parameter;
649 }
650
651 switch (bias_value->datatype) {
652 case xnn_datatype_fp32:
653 #if !defined(XNN_NO_QS8_OPERATORS) || !defined(XNN_NO_QU8_OPERATORS)
654 case xnn_datatype_qint32:
655 #endif // !defined(XNN_NO_QS8_OPERATORS) || !defined(XNN_NO_QU8_OPERATORS)
656 #ifndef XNN_NO_QS8_OPERATORS
657 case xnn_datatype_qcint32:
658 #endif // !defined(XNN_NO_QS8_OPERATORS)
659 break;
660 default:
661 xnn_log_error(
662 "failed to define %s operator with bias ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
663 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id,
664 xnn_datatype_to_string(bias_value->datatype), bias_value->datatype);
665 return xnn_status_invalid_parameter;
666 }
667 }
668
669 if (output_id >= subgraph->num_values) {
670 xnn_log_error(
671 "failed to define %s operator with output ID #%" PRIu32 ": invalid Value ID",
672 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id);
673 return xnn_status_invalid_parameter;
674 }
675
676 const struct xnn_value* output_value = &subgraph->values[output_id];
677 if (output_value->type != xnn_value_type_dense_tensor) {
678 xnn_log_error(
679 "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
680 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id, output_value->type);
681 return xnn_status_invalid_parameter;
682 }
683
684 switch (output_value->datatype) {
685 case xnn_datatype_fp32:
686 #ifndef XNN_NO_QS8_OPERATORS
687 case xnn_datatype_qint8:
688 #endif // !defined(XNN_NO_QS8_OPERATORS)
689 #ifndef XNN_NO_QU8_OPERATORS
690 case xnn_datatype_quint8:
691 #endif // !defined(XNN_NO_QU8_OPERATORS)
692 break;
693 default:
694 xnn_log_error(
695 "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
696 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id,
697 xnn_datatype_to_string(output_value->datatype), output_value->datatype);
698 return xnn_status_invalid_parameter;
699 }
700
701 enum xnn_compute_type compute_type = xnn_compute_type_invalid;
702 if (bias_value != NULL) {
703 compute_type = validate_datatypes_with_bias(
704 input_value->datatype, filter_value->datatype, bias_value->datatype, output_value->datatype);
705 if (compute_type == xnn_compute_type_invalid) {
706 xnn_log_error(
707 "failed to define %s operator with input ID #%" PRIu32 ", filter ID #%" PRIu32 ", bias ID #%" PRIu32 ", and output ID #%" PRIu32
708 ": mismatching datatypes across input (%s), filter (%s), bias (%s), and output (%s)",
709 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, filter_id, bias_id, output_id,
710 xnn_datatype_to_string(input_value->datatype),
711 xnn_datatype_to_string(filter_value->datatype),
712 xnn_datatype_to_string(bias_value->datatype),
713 xnn_datatype_to_string(output_value->datatype));
714 return xnn_status_invalid_parameter;
715 }
716 } else {
717 compute_type = validate_datatypes_without_bias(input_value->datatype, filter_value->datatype, output_value->datatype);
718 if (compute_type == xnn_compute_type_invalid) {
719 xnn_log_error(
720 "failed to define %s operator with input ID #%" PRIu32 ", filter ID #%" PRIu32 ", and output ID #%" PRIu32
721 ": mismatching datatypes across input (%s), filter (%s), and output (%s)",
722 xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, filter_id, output_id,
723 xnn_datatype_to_string(input_value->datatype),
724 xnn_datatype_to_string(filter_value->datatype),
725 xnn_datatype_to_string(output_value->datatype));
726 return xnn_status_invalid_parameter;
727 }
728 }
729
730 #ifndef XNN_NO_QS8_OPERATORS
731 if (filter_value->datatype == xnn_datatype_qcint8) {
732 if (filter_value->quantization.channel_dimension != filter_value->shape.num_dims - 1) {
733 xnn_log_error(
734 "failed to define %s operator with filter ID #%" PRIu32 ": invalid channel dimension %zu",
735 xnn_node_type_to_string(xnn_node_type_convolution_2d), input_id, filter_value->quantization.channel_dimension);
736 return xnn_status_invalid_parameter;
737 }
738
739 if (bias_value != NULL) {
740 assert(bias_value->datatype == xnn_datatype_qcint32);
741 if (bias_value->quantization.channel_dimension != 0) {
742 xnn_log_error(
743 "failed to define %s operator with bias ID #%" PRIu32 ": invalid channel dimension %zu",
744 xnn_node_type_to_string(xnn_node_type_convolution_2d), bias_id, bias_value->quantization.channel_dimension);
745 return xnn_status_invalid_parameter;
746 }
747 }
748 }
749 #endif // !defined(XNN_NO_QS8_OPERATORS)
750
751 struct xnn_node* node = xnn_subgraph_new_node(subgraph);
752 if (node == NULL) {
753 return xnn_status_out_of_memory;
754 }
755
756 node->type = xnn_node_type_depthwise_convolution_2d;
757 node->compute_type = compute_type;
758 node->params.depthwise_convolution_2d.input_padding_top = input_padding_top;
759 node->params.depthwise_convolution_2d.input_padding_right = input_padding_right;
760 node->params.depthwise_convolution_2d.input_padding_bottom = input_padding_bottom;
761 node->params.depthwise_convolution_2d.input_padding_left = input_padding_left;
762 node->params.depthwise_convolution_2d.kernel_height = kernel_height;
763 node->params.depthwise_convolution_2d.kernel_width = kernel_width;
764 node->params.depthwise_convolution_2d.subsampling_height = subsampling_height;
765 node->params.depthwise_convolution_2d.subsampling_width = subsampling_width;
766 node->params.depthwise_convolution_2d.dilation_height = dilation_height;
767 node->params.depthwise_convolution_2d.dilation_width = dilation_width;
768 node->params.depthwise_convolution_2d.depth_multiplier = depth_multiplier;
769 node->params.depthwise_convolution_2d.input_channels = input_channels;
770 node->activation.output_min = output_min;
771 node->activation.output_max = output_max;
772 node->num_inputs = 2 + (size_t) (bias_id != XNN_INVALID_VALUE_ID);
773 node->inputs[0] = input_id;
774 node->inputs[1] = filter_id;
775 node->inputs[2] = bias_id;
776 node->num_outputs = 1;
777 node->outputs[0] = output_id;
778 node->flags = flags;
779
780 node->create = create_convolution_operator;
781 node->setup = setup_convolution_operator;
782
783 return xnn_status_success;
784 };
785