• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <math.h>
7 #include <stddef.h>
8 #include <stdint.h>
9 
10 #include <xnnpack.h>
11 #include <xnnpack/log.h>
12 #include <xnnpack/params.h>
13 #include <xnnpack/subgraph.h>
14 
15 
create_convolution_operator(const struct xnn_node * node,const struct xnn_value * values,size_t num_values,struct xnn_operator_data * opdata)16 static enum xnn_status create_convolution_operator(
17   const struct xnn_node* node,
18   const struct xnn_value* values,
19   size_t num_values,
20   struct xnn_operator_data* opdata)
21 {
22   assert(node->num_inputs >= 2);
23   assert(node->num_inputs <= 3);
24   const uint32_t input_id = node->inputs[0];
25   assert(input_id != XNN_INVALID_VALUE_ID);
26   assert(input_id < num_values);
27   const uint32_t filter_id = node->inputs[1];
28   assert(filter_id != XNN_INVALID_VALUE_ID);
29   assert(filter_id < num_values);
30 
31   assert(node->num_outputs == 1);
32   const uint32_t output_id = node->outputs[0];
33   assert(output_id != XNN_INVALID_VALUE_ID);
34   assert(output_id < num_values);
35 
36   const void* filter_data = values[filter_id].data;
37   assert(filter_data != NULL);
38 
39   const void* bias_data = NULL;
40   if (node->num_inputs > 2) {
41     const uint32_t bias_id = node->inputs[2];
42     assert(bias_id != XNN_INVALID_VALUE_ID);
43     assert(bias_id < num_values);
44 
45     bias_data = values[bias_id].data;
46     assert(bias_data != NULL);
47   }
48 
49   enum xnn_status status;
50   if (values[output_id].layout == xnn_layout_type_nchw) {
51     assert(values[input_id].layout == xnn_layout_type_nchw);
52     assert(node->compute_type == xnn_compute_type_fp32);
53     status = xnn_create_convolution2d_nchw_f32(
54       node->params.depthwise_convolution_2d.input_padding_top,
55       node->params.depthwise_convolution_2d.input_padding_right,
56       node->params.depthwise_convolution_2d.input_padding_bottom,
57       node->params.depthwise_convolution_2d.input_padding_left,
58       node->params.depthwise_convolution_2d.kernel_height,
59       node->params.depthwise_convolution_2d.kernel_width,
60       node->params.depthwise_convolution_2d.subsampling_height,
61       node->params.depthwise_convolution_2d.subsampling_width,
62       node->params.depthwise_convolution_2d.dilation_height,
63       node->params.depthwise_convolution_2d.dilation_width,
64       node->params.depthwise_convolution_2d.input_channels /* groups */,
65       1 /* group_input_channels */,
66       node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
67       node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
68       node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
69       filter_data,
70       bias_data,
71       node->activation.output_min,
72       node->activation.output_max,
73       node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
74       &opdata->operator_object);
75   } else {
76     assert(values[input_id].layout == xnn_layout_type_nhwc);
77     assert(values[output_id].layout == xnn_layout_type_nhwc);
78     switch (node->compute_type) {
79       case xnn_compute_type_fp32:
80         status = xnn_create_convolution2d_nhwc_f32(
81           node->params.depthwise_convolution_2d.input_padding_top,
82           node->params.depthwise_convolution_2d.input_padding_right,
83           node->params.depthwise_convolution_2d.input_padding_bottom,
84           node->params.depthwise_convolution_2d.input_padding_left,
85           node->params.depthwise_convolution_2d.kernel_height,
86           node->params.depthwise_convolution_2d.kernel_width,
87           node->params.depthwise_convolution_2d.subsampling_height,
88           node->params.depthwise_convolution_2d.subsampling_width,
89           node->params.depthwise_convolution_2d.dilation_height,
90           node->params.depthwise_convolution_2d.dilation_width,
91           node->params.depthwise_convolution_2d.input_channels /* groups */,
92           1 /* group_input_channels */,
93           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
94           node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
95           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
96           filter_data,
97           bias_data,
98           node->activation.output_min,
99           node->activation.output_max,
100           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
101           &opdata->operator_object);
102         break;
103 #ifndef XNN_NO_F16_OPERATORS
104       case xnn_compute_type_fp16:
105         status = xnn_create_convolution2d_nhwc_f16(
106           node->params.depthwise_convolution_2d.input_padding_top,
107           node->params.depthwise_convolution_2d.input_padding_right,
108           node->params.depthwise_convolution_2d.input_padding_bottom,
109           node->params.depthwise_convolution_2d.input_padding_left,
110           node->params.depthwise_convolution_2d.kernel_height,
111           node->params.depthwise_convolution_2d.kernel_width,
112           node->params.depthwise_convolution_2d.subsampling_height,
113           node->params.depthwise_convolution_2d.subsampling_width,
114           node->params.depthwise_convolution_2d.dilation_height,
115           node->params.depthwise_convolution_2d.dilation_width,
116           node->params.depthwise_convolution_2d.input_channels /* groups */,
117           1 /* group_input_channels */,
118           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
119           node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
120           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
121           filter_data,
122           bias_data,
123           node->activation.output_min,
124           node->activation.output_max,
125           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION | XNN_FLAG_FP32_STATIC_WEIGHTS,
126           &opdata->operator_object);
127         break;
128 #endif  // XNN_NO_F16_OPERATORS
129 #ifndef XNN_NO_QS8_OPERATORS
130       case xnn_compute_type_qs8:
131       {
132         const float output_scale = values[output_id].quantization.scale;
133         const int32_t output_zero_point = values[output_id].quantization.zero_point;
134         const int8_t output_min =
135           (int8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, -128.0f), 127.0f));
136         const int8_t output_max =
137           (int8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, -128.0f), 127.0f));
138         status = xnn_create_convolution2d_nhwc_qs8(
139           node->params.depthwise_convolution_2d.input_padding_top,
140           node->params.depthwise_convolution_2d.input_padding_right,
141           node->params.depthwise_convolution_2d.input_padding_bottom,
142           node->params.depthwise_convolution_2d.input_padding_left,
143           node->params.depthwise_convolution_2d.kernel_height,
144           node->params.depthwise_convolution_2d.kernel_width,
145           node->params.depthwise_convolution_2d.subsampling_height,
146           node->params.depthwise_convolution_2d.subsampling_width,
147           node->params.depthwise_convolution_2d.dilation_height,
148           node->params.depthwise_convolution_2d.dilation_width,
149           node->params.depthwise_convolution_2d.input_channels /* groups */,
150           1 /* group_input_channels */,
151           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
152           node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
153           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
154           (int8_t) values[input_id].quantization.zero_point,
155           values[input_id].quantization.scale,
156           values[filter_id].quantization.scale,
157           filter_data,
158           bias_data,
159           (int8_t) output_zero_point,
160           output_scale, output_min, output_max,
161           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
162           &opdata->operator_object);
163         break;
164       }
165       case xnn_compute_type_qc8:
166       {
167         const float output_scale = values[output_id].quantization.scale;
168         const int32_t output_zero_point = values[output_id].quantization.zero_point;
169         const int8_t output_min =
170           (int8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, -128.0f), 127.0f));
171         const int8_t output_max =
172           (int8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, -128.0f), 127.0f));
173         status = xnn_create_convolution2d_nhwc_qc8(
174           node->params.depthwise_convolution_2d.input_padding_top,
175           node->params.depthwise_convolution_2d.input_padding_right,
176           node->params.depthwise_convolution_2d.input_padding_bottom,
177           node->params.depthwise_convolution_2d.input_padding_left,
178           node->params.depthwise_convolution_2d.kernel_height,
179           node->params.depthwise_convolution_2d.kernel_width,
180           node->params.depthwise_convolution_2d.subsampling_height,
181           node->params.depthwise_convolution_2d.subsampling_width,
182           node->params.depthwise_convolution_2d.dilation_height,
183           node->params.depthwise_convolution_2d.dilation_width,
184           node->params.depthwise_convolution_2d.input_channels /* groups */,
185           1 /* group_input_channels */,
186           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
187           node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
188           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
189           (int8_t) values[input_id].quantization.zero_point,
190           values[input_id].quantization.scale,
191           values[filter_id].quantization.channelwise_scale,
192           filter_data,
193           bias_data,
194           (int8_t) output_zero_point,
195           output_scale, output_min, output_max,
196           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
197           &opdata->operator_object);
198         break;
199       }
200 #endif  // !defined(XNN_NO_QS8_OPERATORS)
201 #ifndef XNN_NO_QU8_OPERATORS
202       case xnn_compute_type_qu8:
203       {
204         const float output_scale = values[output_id].quantization.scale;
205         const int32_t output_zero_point = values[output_id].quantization.zero_point;
206         const uint8_t output_min =
207           (uint8_t) lrintf(fminf(fmaxf(node->activation.output_min / output_scale + (float) output_zero_point, 0.0f), 255.0f));
208         const uint8_t output_max =
209           (uint8_t) lrintf(fminf(fmaxf(node->activation.output_max / output_scale + (float) output_zero_point, 0.0f), 255.0f));
210         status = xnn_create_convolution2d_nhwc_qu8(
211           node->params.depthwise_convolution_2d.input_padding_top,
212           node->params.depthwise_convolution_2d.input_padding_right,
213           node->params.depthwise_convolution_2d.input_padding_bottom,
214           node->params.depthwise_convolution_2d.input_padding_left,
215           node->params.depthwise_convolution_2d.kernel_height,
216           node->params.depthwise_convolution_2d.kernel_width,
217           node->params.depthwise_convolution_2d.subsampling_height,
218           node->params.depthwise_convolution_2d.subsampling_width,
219           node->params.depthwise_convolution_2d.dilation_height,
220           node->params.depthwise_convolution_2d.dilation_width,
221           node->params.depthwise_convolution_2d.input_channels /* groups */,
222           1 /* group_input_channels */,
223           node->params.depthwise_convolution_2d.depth_multiplier /* group_output_channels */,
224           node->params.depthwise_convolution_2d.input_channels /* input_channel_stride */,
225           node->params.depthwise_convolution_2d.input_channels * node->params.depthwise_convolution_2d.depth_multiplier /* output_channel_stride */,
226           (uint8_t) values[input_id].quantization.zero_point,
227           values[input_id].quantization.scale,
228           (uint8_t) values[filter_id].quantization.zero_point,
229           values[filter_id].quantization.scale,
230           filter_data,
231           bias_data,
232           (uint8_t) output_zero_point,
233           output_scale, output_min, output_max,
234           node->flags | XNN_FLAG_DEPTHWISE_CONVOLUTION,
235           &opdata->operator_object);
236         break;
237       }
238 #endif  // !defined(XNN_NO_QU8_OPERATORS)
239       default:
240         XNN_UNREACHABLE;
241     }
242   }
243   if (status == xnn_status_success) {
244     opdata->batch_size = values[input_id].shape.dim[0];
245     opdata->input_height = values[input_id].shape.dim[1];
246     opdata->input_width = values[input_id].shape.dim[2];
247     opdata->inputs[0] = input_id;
248     opdata->outputs[0] = output_id;
249   }
250   return status;
251 }
252 
setup_convolution_operator(const struct xnn_operator_data * opdata,const struct xnn_blob * blobs,size_t num_blobs,pthreadpool_t threadpool)253 static enum xnn_status setup_convolution_operator(
254   const struct xnn_operator_data* opdata,
255   const struct xnn_blob* blobs,
256   size_t num_blobs,
257   pthreadpool_t threadpool)
258 {
259   const uint32_t input_id = opdata->inputs[0];
260   assert(input_id != XNN_INVALID_VALUE_ID);
261   assert(input_id < num_blobs);
262 
263   const uint32_t output_id = opdata->outputs[0];
264   assert(output_id != XNN_INVALID_VALUE_ID);
265   assert(output_id < num_blobs);
266 
267   const struct xnn_blob* input_blob = blobs + input_id;
268   const void* input_data = input_blob->data;
269   assert(input_data != NULL);
270 
271   const struct xnn_blob* output_blob = blobs + output_id;
272   void* output_data = output_blob->data;
273   assert(output_data != NULL);
274 
275   switch (opdata->operator_object->type) {
276     case xnn_operator_type_convolution_nchw_f32:
277       return xnn_setup_convolution2d_nchw_f32(
278         opdata->operator_object,
279         opdata->batch_size,
280         opdata->input_height,
281         opdata->input_width,
282         input_data,
283         output_data,
284         threadpool);
285       break;
286     case xnn_operator_type_convolution_nhwc_f32:
287       return xnn_setup_convolution2d_nhwc_f32(
288         opdata->operator_object,
289         opdata->batch_size,
290         opdata->input_height,
291         opdata->input_width,
292         input_data,
293         output_data,
294         threadpool);
295       break;
296 #ifndef XNN_NO_F16_OPERATORS
297     case xnn_operator_type_convolution_nhwc_f16:
298       return xnn_setup_convolution2d_nhwc_f16(
299         opdata->operator_object,
300         opdata->batch_size,
301         opdata->input_height,
302         opdata->input_width,
303         input_data,
304         output_data,
305         threadpool);
306       break;
307 #endif  // !defined(XNN_NO_F16_OPERATORS)
308 #ifndef XNN_NO_QS8_OPERATORS
309     case xnn_operator_type_convolution_nhwc_qc8:
310       return xnn_setup_convolution2d_nhwc_qc8(
311         opdata->operator_object,
312         opdata->batch_size,
313         opdata->input_height,
314         opdata->input_width,
315         input_data,
316         output_data,
317         threadpool);
318       break;
319     case xnn_operator_type_convolution_nhwc_qs8:
320       return xnn_setup_convolution2d_nhwc_qs8(
321         opdata->operator_object,
322         opdata->batch_size,
323         opdata->input_height,
324         opdata->input_width,
325         input_data,
326         output_data,
327         threadpool);
328       break;
329 #endif  // !defined(XNN_NO_QS8_OPERATORS)
330 #ifndef XNN_NO_QU8_OPERATORS
331     case xnn_operator_type_convolution_nhwc_qu8:
332       return xnn_setup_convolution2d_nhwc_qu8(
333         opdata->operator_object,
334         opdata->batch_size,
335         opdata->input_height,
336         opdata->input_width,
337         input_data,
338         output_data,
339         threadpool);
340       break;
341 #endif  // !defined(XNN_NO_QU8_OPERATORS)
342     default:
343       XNN_UNREACHABLE;
344   }
345 }
346 
validate_datatypes_with_bias(enum xnn_datatype input_datatype,enum xnn_datatype filter_datatype,enum xnn_datatype bias_datatype,enum xnn_datatype output_datatype)347 static inline enum xnn_compute_type validate_datatypes_with_bias(
348   enum xnn_datatype input_datatype,
349   enum xnn_datatype filter_datatype,
350   enum xnn_datatype bias_datatype,
351   enum xnn_datatype output_datatype)
352 {
353   switch (filter_datatype) {
354     case xnn_datatype_fp32:
355       if (input_datatype == xnn_datatype_fp32 &&
356           bias_datatype == xnn_datatype_fp32 &&
357           output_datatype == xnn_datatype_fp32)
358       {
359         return xnn_compute_type_fp32;
360       }
361       break;
362 #ifndef XNN_NO_QS8_OPERATORS
363     case xnn_datatype_qint8:
364       if (input_datatype == xnn_datatype_qint8 &&
365           bias_datatype == xnn_datatype_qint32 &&
366           output_datatype == xnn_datatype_qint8)
367       {
368         return xnn_compute_type_qs8;
369       }
370       break;
371     case xnn_datatype_qcint8:
372       if (input_datatype == xnn_datatype_qint8 &&
373           bias_datatype == xnn_datatype_qcint32 &&
374           output_datatype == xnn_datatype_qint8)
375       {
376         return xnn_compute_type_qc8;
377       }
378       break;
379 #endif  // !defined(XNN_NO_QS8_OPERATORS)
380 #ifndef XNN_NO_QU8_OPERATORS
381     case xnn_datatype_quint8:
382       if (input_datatype == xnn_datatype_quint8 &&
383           bias_datatype == xnn_datatype_qint32 &&
384           output_datatype == xnn_datatype_quint8)
385       {
386         return xnn_compute_type_qu8;
387       }
388       break;
389 #endif  // !defined(XNN_NO_QU8_OPERATORS)
390     default:
391       XNN_UNREACHABLE;
392   }
393   return xnn_compute_type_invalid;
394 }
395 
validate_datatypes_without_bias(enum xnn_datatype input_datatype,enum xnn_datatype filter_datatype,enum xnn_datatype output_datatype)396 static inline enum xnn_compute_type validate_datatypes_without_bias(
397   enum xnn_datatype input_datatype,
398   enum xnn_datatype filter_datatype,
399   enum xnn_datatype output_datatype)
400 {
401   switch (filter_datatype) {
402     case xnn_datatype_fp32:
403       if (input_datatype == xnn_datatype_fp32 && output_datatype == xnn_datatype_fp32) {
404         return xnn_compute_type_fp32;
405       }
406       break;
407 #ifndef XNN_NO_QS8_OPERATORS
408     case xnn_datatype_qint8:
409       if (input_datatype == xnn_datatype_qint8 && output_datatype == xnn_datatype_qint8) {
410         return xnn_compute_type_qs8;
411       }
412       break;
413     case xnn_datatype_qcint8:
414       if (input_datatype == xnn_datatype_qint8 && output_datatype == xnn_datatype_qint8) {
415         return xnn_compute_type_qc8;
416       }
417       break;
418 #endif  // !defined(XNN_NO_QS8_OPERATORS)
419 #ifndef XNN_NO_QU8_OPERATORS
420     case xnn_datatype_quint8:
421       if (input_datatype == xnn_datatype_quint8 && output_datatype == xnn_datatype_quint8) {
422         return xnn_compute_type_qu8;
423       }
424       break;
425 #endif  // !defined(XNN_NO_QU8_OPERATORS)
426     default:
427       XNN_UNREACHABLE;
428   }
429   return xnn_compute_type_invalid;
430 }
431 
xnn_define_depthwise_convolution_2d(xnn_subgraph_t subgraph,uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t kernel_height,uint32_t kernel_width,uint32_t subsampling_height,uint32_t subsampling_width,uint32_t dilation_height,uint32_t dilation_width,uint32_t depth_multiplier,size_t input_channels,float output_min,float output_max,uint32_t input_id,uint32_t filter_id,uint32_t bias_id,uint32_t output_id,uint32_t flags)432 enum xnn_status xnn_define_depthwise_convolution_2d(
433   xnn_subgraph_t subgraph,
434   uint32_t input_padding_top,
435   uint32_t input_padding_right,
436   uint32_t input_padding_bottom,
437   uint32_t input_padding_left,
438   uint32_t kernel_height,
439   uint32_t kernel_width,
440   uint32_t subsampling_height,
441   uint32_t subsampling_width,
442   uint32_t dilation_height,
443   uint32_t dilation_width,
444   uint32_t depth_multiplier,
445   size_t input_channels,
446   float output_min,
447   float output_max,
448   uint32_t input_id,
449   uint32_t filter_id,
450   uint32_t bias_id,
451   uint32_t output_id,
452   uint32_t flags)
453 {
454   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
455     xnn_log_error("failed to define %s operator: XNNPACK is not initialized",
456       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
457     return xnn_status_uninitialized;
458   }
459 
460   if (kernel_width == 0 || kernel_height == 0) {
461     xnn_log_error(
462       "failed to define %s operator with %" PRIu32 "x%" PRIu32 " kernel: kernel dimensions must be non-zero",
463       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), kernel_width, kernel_height);
464     return xnn_status_invalid_parameter;
465   }
466 
467   if (subsampling_width == 0 || subsampling_height == 0) {
468     xnn_log_error(
469       "failed to define %s operator with %" PRIu32 "x%" PRIu32 " subsampling: subsampling dimensions must be non-zero",
470       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), subsampling_width, subsampling_height);
471     return xnn_status_invalid_parameter;
472   }
473 
474   if (dilation_width == 0 || dilation_height == 0) {
475     xnn_log_error(
476       "failed to define %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
477       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), dilation_width, dilation_height);
478     return xnn_status_invalid_parameter;
479   }
480 
481   if (depth_multiplier == 0) {
482     xnn_log_error(
483       "failed to define %s operator with %" PRIu32 " depth multiplier: depth multiplier must be non-zero",
484       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), depth_multiplier);
485     return xnn_status_invalid_parameter;
486   }
487 
488   if (input_channels == 0) {
489     xnn_log_error(
490       "failed to define %s operator with %zu input channels: number of channels must be non-zero",
491       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_channels);
492     return xnn_status_invalid_parameter;
493   }
494 
495   if (isnan(output_min)) {
496     xnn_log_error(
497       "failed to define %s operator with NaN output lower bound: lower bound must be non-NaN",
498       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
499     return xnn_status_invalid_parameter;
500   }
501 
502   if (isnan(output_max)) {
503     xnn_log_error(
504       "failed to define %s operator with NaN output upper bound: upper bound must be non-NaN",
505       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d));
506     return xnn_status_invalid_parameter;
507   }
508 
509   if (output_min >= output_max) {
510     xnn_log_error(
511       "failed to define %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
512       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_min, output_max);
513     return xnn_status_invalid_parameter;
514   }
515 
516   const uint32_t supported_flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
517   const uint32_t invalid_flags = flags & ~supported_flags;
518   if (invalid_flags != 0) {
519     xnn_log_error(
520       "failed to define %s operator with 0x%08" PRIx32 " flags: invalid flags 0x%08" PRIx32,
521       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), flags, invalid_flags);
522     return xnn_status_invalid_parameter;
523   }
524 
525   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
526   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && any_padding) {
527     xnn_log_error(
528       "failed to define %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
529       "TensorFlow SAME padding can't be combined with explicit padding specification",
530       xnn_node_type_to_string(xnn_node_type_convolution_2d),
531       input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
532     return xnn_status_invalid_parameter;
533   }
534 
535   // Convert TensorFlow SAME padding to explicit padding specification whenever possible
536   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && (subsampling_height | subsampling_width) == 1) {
537     flags &= ~XNN_FLAG_TENSORFLOW_SAME_PADDING;
538     const uint32_t padding_height = (kernel_height - 1) * dilation_height;
539     const uint32_t padding_width = (kernel_width - 1) * dilation_width;
540     input_padding_left = padding_width / 2;
541     input_padding_top = padding_height / 2;
542     input_padding_right = padding_width - input_padding_left;
543     input_padding_bottom = padding_height - input_padding_top;
544   }
545 
546   if (input_id >= subgraph->num_values) {
547     xnn_log_error(
548       "failed to define %s operator with input ID #%" PRIu32 ": invalid Value ID",
549       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id);
550     return xnn_status_invalid_parameter;
551   }
552 
553   const struct xnn_value* input_value = &subgraph->values[input_id];
554   if (input_value->type != xnn_value_type_dense_tensor) {
555     xnn_log_error(
556       "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
557       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, input_value->type);
558     return xnn_status_invalid_parameter;
559   }
560 
561   switch (input_value->datatype) {
562     case xnn_datatype_fp32:
563 #ifndef XNN_NO_QS8_OPERATORS
564     case xnn_datatype_qint8:
565 #endif  // !defined(XNN_NO_QS8_OPERATORS)
566 #ifndef XNN_NO_QU8_OPERATORS
567     case xnn_datatype_quint8:
568 #endif  // !defined(XNN_NO_QU8_OPERATORS)
569       break;
570     default:
571       xnn_log_error(
572         "failed to define %s operator with input ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
573         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id,
574         xnn_datatype_to_string(input_value->datatype), input_value->datatype);
575       return xnn_status_invalid_parameter;
576   }
577 
578   if (filter_id >= subgraph->num_values) {
579     xnn_log_error(
580       "failed to define %s operator with filter ID #%" PRIu32 ": invalid Value ID",
581       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id);
582     return xnn_status_invalid_parameter;
583   }
584 
585   const struct xnn_value* filter_value = &subgraph->values[filter_id];
586   if (filter_value->type != xnn_value_type_dense_tensor) {
587     xnn_log_error(
588       "failed to define %s operator with filter ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
589       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id, filter_value->type);
590     return xnn_status_invalid_parameter;
591   }
592 
593   if (filter_value->data == NULL) {
594     xnn_log_error(
595       "failed to define %s operator with filter ID #%" PRIu32 ": non-static Value",
596       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id);
597     return xnn_status_invalid_parameter;
598   }
599 
600   switch (filter_value->datatype) {
601     case xnn_datatype_fp32:
602       break;
603 #ifndef XNN_NO_QS8_OPERATORS
604     case xnn_datatype_qint8:
605       if (filter_value->quantization.zero_point != 0) {
606         xnn_log_error(
607           "failed to define %s operator with filter ID #%" PRIu32 ": unsupported quantization zero point %" PRId32 " for datatype %s",
608           xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id,
609           filter_value->quantization.zero_point, xnn_datatype_to_string(filter_value->datatype));
610       }
611       break;
612     case xnn_datatype_qcint8:
613       break;
614 #endif  // !defined(XNN_NO_QS8_OPERATORS)
615 #ifndef XNN_NO_QU8_OPERATORS
616     case xnn_datatype_quint8:
617       break;
618 #endif  // !defined(XNN_NO_QU8_OPERATORS)
619     default:
620       xnn_log_error(
621         "failed to define %s operator with filter ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
622         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), filter_id,
623         xnn_datatype_to_string(filter_value->datatype), filter_value->datatype);
624       return xnn_status_invalid_parameter;
625   }
626 
627   const struct xnn_value* bias_value = NULL;
628   if (bias_id != XNN_INVALID_VALUE_ID) {
629     if (bias_id >= subgraph->num_values) {
630       xnn_log_error(
631         "failed to define %s operator with bias ID #%" PRIu32 ": invalid Value ID",
632         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id);
633       return xnn_status_invalid_parameter;
634     }
635 
636     bias_value = &subgraph->values[bias_id];
637     if (bias_value->type != xnn_value_type_dense_tensor) {
638       xnn_log_error(
639         "failed to define %s operator with bias ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
640         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id, bias_value->type);
641       return xnn_status_invalid_parameter;
642     }
643 
644     if (bias_value->data == NULL) {
645       xnn_log_error(
646         "failed to define %s operator with bias ID #%" PRIu32 ": non-static Value",
647         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id);
648       return xnn_status_invalid_parameter;
649     }
650 
651     switch (bias_value->datatype) {
652       case xnn_datatype_fp32:
653 #if !defined(XNN_NO_QS8_OPERATORS) || !defined(XNN_NO_QU8_OPERATORS)
654       case xnn_datatype_qint32:
655 #endif  // !defined(XNN_NO_QS8_OPERATORS) || !defined(XNN_NO_QU8_OPERATORS)
656 #ifndef XNN_NO_QS8_OPERATORS
657       case xnn_datatype_qcint32:
658 #endif  // !defined(XNN_NO_QS8_OPERATORS)
659         break;
660       default:
661         xnn_log_error(
662           "failed to define %s operator with bias ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
663           xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), bias_id,
664           xnn_datatype_to_string(bias_value->datatype), bias_value->datatype);
665         return xnn_status_invalid_parameter;
666     }
667   }
668 
669   if (output_id >= subgraph->num_values) {
670     xnn_log_error(
671       "failed to define %s operator with output ID #%" PRIu32 ": invalid Value ID",
672       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id);
673     return xnn_status_invalid_parameter;
674   }
675 
676   const struct xnn_value* output_value = &subgraph->values[output_id];
677   if (output_value->type != xnn_value_type_dense_tensor) {
678     xnn_log_error(
679       "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value type %d (expected dense tensor)",
680       xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id, output_value->type);
681     return xnn_status_invalid_parameter;
682   }
683 
684   switch (output_value->datatype) {
685     case xnn_datatype_fp32:
686 #ifndef XNN_NO_QS8_OPERATORS
687     case xnn_datatype_qint8:
688 #endif  // !defined(XNN_NO_QS8_OPERATORS)
689 #ifndef XNN_NO_QU8_OPERATORS
690     case xnn_datatype_quint8:
691 #endif  // !defined(XNN_NO_QU8_OPERATORS)
692       break;
693     default:
694       xnn_log_error(
695         "failed to define %s operator with output ID #%" PRIu32 ": unsupported Value datatype %s (%d)",
696         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), output_id,
697         xnn_datatype_to_string(output_value->datatype), output_value->datatype);
698       return xnn_status_invalid_parameter;
699   }
700 
701   enum xnn_compute_type compute_type = xnn_compute_type_invalid;
702   if (bias_value != NULL) {
703     compute_type = validate_datatypes_with_bias(
704       input_value->datatype, filter_value->datatype, bias_value->datatype, output_value->datatype);
705     if (compute_type == xnn_compute_type_invalid) {
706       xnn_log_error(
707         "failed to define %s operator with input ID #%" PRIu32 ", filter ID #%" PRIu32 ", bias ID #%" PRIu32 ", and output ID #%" PRIu32
708         ": mismatching datatypes across input (%s), filter (%s), bias (%s), and output (%s)",
709         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, filter_id, bias_id, output_id,
710         xnn_datatype_to_string(input_value->datatype),
711         xnn_datatype_to_string(filter_value->datatype),
712         xnn_datatype_to_string(bias_value->datatype),
713         xnn_datatype_to_string(output_value->datatype));
714       return xnn_status_invalid_parameter;
715     }
716   } else {
717     compute_type = validate_datatypes_without_bias(input_value->datatype, filter_value->datatype, output_value->datatype);
718     if (compute_type == xnn_compute_type_invalid) {
719       xnn_log_error(
720         "failed to define %s operator with input ID #%" PRIu32 ", filter ID #%" PRIu32 ", and output ID #%" PRIu32
721         ": mismatching datatypes across input (%s), filter (%s), and output (%s)",
722         xnn_node_type_to_string(xnn_node_type_depthwise_convolution_2d), input_id, filter_id, output_id,
723         xnn_datatype_to_string(input_value->datatype),
724         xnn_datatype_to_string(filter_value->datatype),
725         xnn_datatype_to_string(output_value->datatype));
726       return xnn_status_invalid_parameter;
727     }
728   }
729 
730 #ifndef XNN_NO_QS8_OPERATORS
731   if (filter_value->datatype == xnn_datatype_qcint8) {
732     if (filter_value->quantization.channel_dimension != filter_value->shape.num_dims - 1) {
733       xnn_log_error(
734         "failed to define %s operator with filter ID #%" PRIu32 ": invalid channel dimension %zu",
735         xnn_node_type_to_string(xnn_node_type_convolution_2d), input_id, filter_value->quantization.channel_dimension);
736       return xnn_status_invalid_parameter;
737     }
738 
739     if (bias_value != NULL) {
740       assert(bias_value->datatype == xnn_datatype_qcint32);
741       if (bias_value->quantization.channel_dimension != 0) {
742         xnn_log_error(
743           "failed to define %s operator with bias ID #%" PRIu32 ": invalid channel dimension %zu",
744           xnn_node_type_to_string(xnn_node_type_convolution_2d), bias_id, bias_value->quantization.channel_dimension);
745         return xnn_status_invalid_parameter;
746       }
747     }
748   }
749 #endif  // !defined(XNN_NO_QS8_OPERATORS)
750 
751   struct xnn_node* node = xnn_subgraph_new_node(subgraph);
752   if (node == NULL) {
753     return xnn_status_out_of_memory;
754   }
755 
756   node->type = xnn_node_type_depthwise_convolution_2d;
757   node->compute_type = compute_type;
758   node->params.depthwise_convolution_2d.input_padding_top = input_padding_top;
759   node->params.depthwise_convolution_2d.input_padding_right = input_padding_right;
760   node->params.depthwise_convolution_2d.input_padding_bottom = input_padding_bottom;
761   node->params.depthwise_convolution_2d.input_padding_left = input_padding_left;
762   node->params.depthwise_convolution_2d.kernel_height = kernel_height;
763   node->params.depthwise_convolution_2d.kernel_width = kernel_width;
764   node->params.depthwise_convolution_2d.subsampling_height = subsampling_height;
765   node->params.depthwise_convolution_2d.subsampling_width = subsampling_width;
766   node->params.depthwise_convolution_2d.dilation_height = dilation_height;
767   node->params.depthwise_convolution_2d.dilation_width = dilation_width;
768   node->params.depthwise_convolution_2d.depth_multiplier = depth_multiplier;
769   node->params.depthwise_convolution_2d.input_channels = input_channels;
770   node->activation.output_min = output_min;
771   node->activation.output_max = output_max;
772   node->num_inputs = 2 + (size_t) (bias_id != XNN_INVALID_VALUE_ID);
773   node->inputs[0] = input_id;
774   node->inputs[1] = filter_id;
775   node->inputs[2] = bias_id;
776   node->num_outputs = 1;
777   node->outputs[0] = output_id;
778   node->flags = flags;
779 
780   node->create = create_convolution_operator;
781   node->setup = setup_convolution_operator;
782 
783   return xnn_status_success;
784 };
785