1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12
13 #include <xnnpack.h>
14 #include <xnnpack/allocator.h>
15 #include <xnnpack/log.h>
16 #include <xnnpack/math.h>
17 #include <xnnpack/normalization.h>
18 #include <xnnpack/operator.h>
19
20 /// Reorder the data in array using the indices in loop_order.
21 ///
22 /// Changing the loop order can have dramatic performance implications.
reorder_array(size_t num_dims,const size_t loop_order[restrict XNN_MIN_ELEMENTS (1)],size_t array[restrict XNN_MIN_ELEMENTS (1)])23 static void reorder_array(
24 size_t num_dims,
25 const size_t loop_order[restrict XNN_MIN_ELEMENTS(1) ],
26 size_t array[restrict XNN_MIN_ELEMENTS(1)])
27 {
28 size_t tmp[XNN_MAX_TENSOR_DIMS];
29 memcpy(tmp, array, sizeof(size_t) * num_dims);
30 for (size_t i = 0; i < num_dims; ++i) {
31 array[i] = tmp[loop_order[i]];
32 }
33 }
34
init_transpose_nd(uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t transpose_op)35 static enum xnn_status init_transpose_nd(
36 uint32_t flags,
37 uint32_t datatype_init_flags,
38 enum xnn_operator_type operator_type,
39 xnn_operator_t transpose_op)
40 {
41 enum xnn_status status = xnn_status_unsupported_hardware;
42
43 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
44 xnn_log_error(
45 "failed to create %s operator: operations on data type are not supported",
46 xnn_operator_type_to_string(operator_type));
47 goto error;
48 }
49 transpose_op->flags = flags;
50 transpose_op->type = operator_type;
51
52 return xnn_status_success;
53
54 error:
55 return status;
56 }
57
create_transpose_nd(uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * transpose_op_out)58 static enum xnn_status create_transpose_nd(
59 uint32_t flags,
60 uint32_t datatype_init_flags,
61 enum xnn_operator_type operator_type,
62 xnn_operator_t* transpose_op_out)
63 {
64 enum xnn_status status = xnn_status_uninitialized;
65
66 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
67 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
68 xnn_operator_type_to_string(operator_type));
69 return status;
70 }
71
72 status = xnn_status_out_of_memory;
73 xnn_operator_t transpose_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
74 if (transpose_op == NULL) {
75 xnn_log_error(
76 "failed to allocate %zu bytes for %s operator descriptor",
77 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
78 goto error;
79 }
80
81 status = init_transpose_nd(flags, datatype_init_flags, operator_type, transpose_op);
82 if (status != xnn_status_success) {
83 goto error;
84 }
85 *transpose_op_out = transpose_op;
86
87 return xnn_status_success;
88
89 error:
90 xnn_delete_operator(transpose_op);
91 return status;
92 }
93
94 /// input_stride and output_stride are the number of elements between each
95 /// dimension, not the size of the dimension. This is because depth to space
96 /// splits the input channel dimension into three dimensions - block_size *
97 /// block_size * output_channels but gives input_channel_stride the stride over
98 /// all three dimensions. This must be multiplied by the product of the previous
99 /// dimensions to get the stride in elements. input_channel_stride is not
100 /// requried to be a multiple of block_size * block_size * output_channels so
101 /// the stride in number of elements must be supplied.
102 /// An interface for sub-tensors can easily be built on top of this.
setup_transpose_nd(xnn_operator_t transpose_op,const void * input,void * output,const size_t num_dims,const size_t * input_shape,const size_t * perm,const size_t * input_stride,const size_t * output_stride,size_t element_size)103 static enum xnn_status setup_transpose_nd(
104 xnn_operator_t transpose_op,
105 const void* input,
106 void* output,
107 const size_t num_dims,
108 const size_t* input_shape,
109 const size_t* perm,
110 const size_t* input_stride,
111 const size_t* output_stride,
112 size_t element_size)
113 {
114 enum xnn_status status = xnn_status_invalid_parameter;
115 transpose_op->state = xnn_run_state_invalid;
116
117 if (num_dims == 0) {
118 xnn_log_error(
119 "failed to create %s operator with %zu num_dims: num_dims must be non-zero",
120 xnn_operator_type_to_string(transpose_op->type), num_dims);
121 goto error;
122 }
123
124 if (num_dims > XNN_MAX_TENSOR_DIMS) {
125 xnn_log_error(
126 "failed to create %s operator with %zu num_dims: num_dims must be <= %d",
127 xnn_operator_type_to_string(transpose_op->type), num_dims, XNN_MAX_TENSOR_DIMS);
128 goto error;
129 }
130
131 for (size_t i = 0; i < num_dims; ++i) {
132 if (perm[i] >= num_dims) {
133 xnn_log_error(
134 "failed to create %s operator with %zu perm and %zu num_dims: 0 <= perm < num_dims",
135 xnn_operator_type_to_string(transpose_op->type), perm[i], num_dims);
136 goto error;
137 }
138 }
139
140 for (size_t i = 0; i < num_dims - 1; ++i) {
141 for (size_t j = i + 1; j < num_dims; ++j) {
142 if (perm[i] == perm[j]) {
143 xnn_log_error(
144 "failed to create %s operator with duplicate entries in perm",
145 xnn_operator_type_to_string(transpose_op->type));
146 goto error;
147 }
148 }
149 }
150
151 if (input_stride != NULL) {
152 if (input_stride[num_dims - 1] != 1) {
153 xnn_log_error(
154 "failed to create %s operator with %zu input_stride[num_dims - 1]: input_stride[num_dims - 1] == 1",
155 xnn_operator_type_to_string(transpose_op->type), input_stride[num_dims - 1]);
156 }
157 size_t current_stride = 1;
158 for (size_t i = num_dims - 1; i > 0; --i) {
159 if ((input_stride[i - 1] < input_stride[i] * input_shape[i]) || (input_stride[i - 1] < current_stride)) {
160 xnn_log_error(
161 "failed to create %s operator with %zu input_shape and %zu input_stride: input_stride >= input_shape",
162 xnn_operator_type_to_string(transpose_op->type), input_shape[i], input_stride[i]);
163 }
164 current_stride *= input_shape[i];
165 }
166 }
167
168 if (output_stride != NULL) {
169 if (output_stride[num_dims - 1] != 1) {
170 xnn_log_error(
171 "failed to create %s operator with %zu output_stride[num_dims - 1]: output_stride[num_dims - 1] == 1",
172 xnn_operator_type_to_string(transpose_op->type), output_stride[num_dims - 1]);
173 }
174 size_t current_stride = 1;
175 for (size_t i = num_dims - 1; i > 0; --i) {
176 if ((output_stride[i - 1] < output_stride[i] * input_shape[perm[i]]) || (output_stride[i - 1] < current_stride)) {
177 xnn_log_error(
178 "failed to create %s operator with %zu output_shape and %zu output_stride: output_stride >= output_shape",
179 xnn_operator_type_to_string(transpose_op->type), input_shape[perm[i]], output_stride[i]);
180 }
181 current_stride *= input_shape[perm[i]];
182 }
183 }
184
185 transpose_op->channels = num_dims;
186
187 struct transpose_context* context = &transpose_op->context.transpose;
188 size_t normalized_dims;
189 size_t normalized_shape[XNN_MAX_TENSOR_DIMS];
190 size_t normalized_perm[XNN_MAX_TENSOR_DIMS];
191 size_t normalized_element_size;
192 xnn_normalize_transpose_permutation(num_dims, element_size, perm, input_shape, input_stride, output_stride, &normalized_dims,
193 &normalized_element_size, normalized_perm, normalized_shape, context->input_stride, context->output_stride);
194
195 size_t loop_order[XNN_MAX_TENSOR_DIMS];
196 memcpy(loop_order, normalized_perm, sizeof(size_t) * normalized_dims);
197
198 /// The innermost loop must iterate over the contiguous input dimension and the second most inner loop over the
199 /// contiguous output dimension.
200 if (normalized_dims > 1) {
201 for (size_t i = 0; i < normalized_dims - 2; ++i) {
202 if (loop_order[i] == normalized_dims - 1) {
203 size_t tmp = loop_order[i];
204 loop_order[i] = loop_order[normalized_dims - 2];
205 loop_order[normalized_dims - 2] = tmp;
206 tmp = context->output_stride[i];
207 context->output_stride[i] = context->output_stride[normalized_dims - 2];
208 context->output_stride[normalized_dims - 2] = tmp;
209 break;
210 }
211 }
212 }
213
214 for (size_t i = 0; i < normalized_dims; ++i) {
215 transpose_op->compute.range[i] = normalized_shape[i];
216 }
217 reorder_array(normalized_dims, loop_order, context->input_stride);
218 reorder_array(normalized_dims, loop_order, transpose_op->compute.range);
219
220 bool variable_size_ukernel = false;
221 switch (normalized_element_size) {
222 case 1:
223 context->log2_element_size = 0;
224 context->const_size_ukernel = xnn_params.x8.transpose.const_size_ukernel;
225 transpose_op->compute.tile[0] = xnn_params.x8.transpose.tile_size;
226 transpose_op->compute.tile[1] = xnn_params.x8.transpose.tile_size;
227 break;
228 case 2:
229 context->log2_element_size = 1;
230 transpose_op->compute.tile[0] = xnn_params.x16.transpose.tile_size;
231 transpose_op->compute.tile[1] = xnn_params.x16.transpose.tile_size;
232 context->const_size_ukernel = xnn_params.x16.transpose.const_size_ukernel;
233 break;
234 case 4:
235 context->log2_element_size = 2;
236 transpose_op->compute.tile[0] = xnn_params.x32.transpose.tile_size;
237 transpose_op->compute.tile[1] = xnn_params.x32.transpose.tile_size;
238 context->const_size_ukernel = xnn_params.x32.transpose.const_size_ukernel;
239 break;
240 default:
241 context->element_size = normalized_element_size;
242 transpose_op->compute.tile[0] = xnn_params.xx.transpose.tile_size;
243 transpose_op->compute.tile[1] = xnn_params.xx.transpose.tile_size;
244 context->variable_size_ukernel = xnn_params.xx.transpose.variable_size_ukernel;
245 variable_size_ukernel = true;
246 }
247
248 struct univector_contiguous_context* univector_context = &transpose_op->context.univector_contiguous;
249 switch (normalized_dims) {
250 case 1:
251 transpose_op->compute.type = xnn_parallelization_type_1d_tile_1d;
252 transpose_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_univector_contiguous;
253 transpose_op->compute.range[0] = normalized_element_size;
254 univector_context->ukernel = xnn_params.xx.copy;
255 univector_context->log2_xsize = 0;
256 univector_context->log2_ysize = 0;
257 break;
258 case 2:
259 transpose_op->compute.type = xnn_parallelization_type_2d_tile_2d;
260 if (variable_size_ukernel) {
261 transpose_op->compute.task_2d_tile_2d = (pthreadpool_task_2d_tile_2d_t) xnn_compute_transposev_2d;
262 } else {
263 transpose_op->compute.task_2d_tile_2d = (pthreadpool_task_2d_tile_2d_t) xnn_compute_transposec_2d;
264 }
265 break;
266 case 3:
267 transpose_op->compute.type = xnn_parallelization_type_3d_tile_2d;
268 if (variable_size_ukernel) {
269 transpose_op->compute.task_3d_tile_2d = (pthreadpool_task_3d_tile_2d_t) xnn_compute_transposev_3d;
270 } else {
271 transpose_op->compute.task_3d_tile_2d = (pthreadpool_task_3d_tile_2d_t) xnn_compute_transposec_3d;
272 }
273 break;
274 case 4:
275 transpose_op->compute.type = xnn_parallelization_type_4d_tile_2d;
276 if (variable_size_ukernel) {
277 transpose_op->compute.task_4d_tile_2d = (pthreadpool_task_4d_tile_2d_t) xnn_compute_transposev_4d;
278 } else {
279 transpose_op->compute.task_4d_tile_2d = (pthreadpool_task_4d_tile_2d_t) xnn_compute_transposec_4d;
280 }
281 break;
282 case 5:
283 transpose_op->compute.type = xnn_parallelization_type_5d_tile_2d;
284 if (variable_size_ukernel) {
285 transpose_op->compute.task_5d_tile_2d = (pthreadpool_task_5d_tile_2d_t) xnn_compute_transposev_5d;
286 } else {
287 transpose_op->compute.task_5d_tile_2d = (pthreadpool_task_5d_tile_2d_t) xnn_compute_transposec_5d;
288 }
289 break;
290 case 6:
291 transpose_op->compute.type = xnn_parallelization_type_6d_tile_2d;
292 if (variable_size_ukernel) {
293 transpose_op->compute.task_6d_tile_2d = (pthreadpool_task_6d_tile_2d_t) xnn_compute_transposev_6d;
294 } else {
295 transpose_op->compute.task_6d_tile_2d = (pthreadpool_task_6d_tile_2d_t) xnn_compute_transposec_6d;
296 }
297 break;
298 default:
299 XNN_UNREACHABLE;
300 }
301
302 if (transpose_op->channels == 1) {
303 transpose_op->context.univector_contiguous.x = input;
304 transpose_op->context.univector_contiguous.y = output;
305 } else {
306 transpose_op->context.transpose.x = input;
307 transpose_op->context.transpose.y = output;
308 }
309 transpose_op->state = xnn_run_state_ready;
310
311 return xnn_status_success;
312
313 error:
314 xnn_delete_operator(transpose_op);
315 return status;
316 }
317
xnn_create_transpose_nd_x32(uint32_t flags,xnn_operator_t * transpose_op_out)318 enum xnn_status xnn_create_transpose_nd_x32(
319 uint32_t flags,
320 xnn_operator_t* transpose_op_out)
321 {
322 return create_transpose_nd(
323 flags,
324 XNN_INIT_FLAG_X32,
325 xnn_operator_type_transpose_nd_x32,
326 transpose_op_out);
327 }
328
xnn_create_transpose_nd_x16(uint32_t flags,xnn_operator_t * transpose_op_out)329 enum xnn_status xnn_create_transpose_nd_x16(
330 uint32_t flags,
331 xnn_operator_t* transpose_op_out)
332 {
333 return create_transpose_nd(
334 flags,
335 XNN_INIT_FLAG_X16,
336 xnn_operator_type_transpose_nd_x16,
337 transpose_op_out);
338 }
339
xnn_create_transpose_nd_x8(uint32_t flags,xnn_operator_t * transpose_op_out)340 enum xnn_status xnn_create_transpose_nd_x8(
341 uint32_t flags,
342 xnn_operator_t* transpose_op_out)
343 {
344 return create_transpose_nd(
345 flags,
346 XNN_INIT_FLAG_X8,
347 xnn_operator_type_transpose_nd_x8,
348 transpose_op_out);
349 }
350
xnn_setup_transpose_nd_x32(xnn_operator_t transpose_op,const void * input,void * output,size_t num_dims,const size_t * shape,const size_t * perm,pthreadpool_t threadpool)351 enum xnn_status xnn_setup_transpose_nd_x32(
352 xnn_operator_t transpose_op,
353 const void* input,
354 void* output,
355 size_t num_dims,
356 const size_t* shape,
357 const size_t* perm,
358 pthreadpool_t threadpool)
359 {
360 if (transpose_op->type != xnn_operator_type_transpose_nd_x32) {
361 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
362 xnn_operator_type_to_string(xnn_operator_type_transpose_nd_x32),
363 xnn_operator_type_to_string(transpose_op->type));
364 return xnn_status_invalid_parameter;
365 }
366
367 return setup_transpose_nd(
368 transpose_op,
369 input, output,
370 num_dims, shape, perm, NULL, NULL,
371 sizeof(uint32_t));
372 }
373
xnn_setup_transpose_nd_x16(xnn_operator_t transpose_op,const void * input,void * output,size_t num_dims,const size_t * shape,const size_t * perm,pthreadpool_t threadpool)374 enum xnn_status xnn_setup_transpose_nd_x16(
375 xnn_operator_t transpose_op,
376 const void* input,
377 void* output,
378 size_t num_dims,
379 const size_t* shape,
380 const size_t* perm,
381 pthreadpool_t threadpool)
382 {
383 if (transpose_op->type != xnn_operator_type_transpose_nd_x16) {
384 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
385 xnn_operator_type_to_string(xnn_operator_type_transpose_nd_x16),
386 xnn_operator_type_to_string(transpose_op->type));
387 return xnn_status_invalid_parameter;
388 }
389
390 return setup_transpose_nd(
391 transpose_op,
392 input, output,
393 num_dims, shape, perm, NULL, NULL,
394 sizeof(uint16_t));
395 }
396
xnn_setup_transpose_nd_x8(xnn_operator_t transpose_op,const void * input,void * output,size_t num_dims,const size_t * shape,const size_t * perm,pthreadpool_t threadpool)397 enum xnn_status xnn_setup_transpose_nd_x8(
398 xnn_operator_t transpose_op,
399 const void* input,
400 void* output,
401 size_t num_dims,
402 const size_t* shape,
403 const size_t* perm,
404 pthreadpool_t threadpool)
405 {
406 if (transpose_op->type != xnn_operator_type_transpose_nd_x8) {
407 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
408 xnn_operator_type_to_string(xnn_operator_type_transpose_nd_x8),
409 xnn_operator_type_to_string(transpose_op->type));
410 return xnn_status_invalid_parameter;
411 }
412
413 return setup_transpose_nd(
414 transpose_op,
415 input, output,
416 num_dims, shape, perm, NULL, NULL,
417 sizeof(uint8_t));
418 }
419
run_transpose_nd(uint32_t flags,const void * input,void * output,const size_t num_dims,const size_t * input_shape,const size_t * output_perm,size_t element_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,pthreadpool_t threadpool)420 enum xnn_status run_transpose_nd(
421 uint32_t flags,
422 const void* input,
423 void* output,
424 const size_t num_dims,
425 const size_t* input_shape,
426 const size_t* output_perm,
427 size_t element_size,
428 uint32_t datatype_init_flags,
429 enum xnn_operator_type operator_type,
430 pthreadpool_t threadpool) {
431 enum xnn_status status = xnn_status_uninitialized;
432
433 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
434 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
435 xnn_operator_type_to_string(operator_type));
436 return status;
437 }
438
439 struct xnn_operator transpose_op;
440 memset(&transpose_op, 0, sizeof(transpose_op));
441
442 status = init_transpose_nd(
443 flags,
444 datatype_init_flags,
445 operator_type,
446 &transpose_op);
447 if (status != xnn_status_success) {
448 return status;
449 }
450
451 status = setup_transpose_nd(&transpose_op,
452 input,
453 output,
454 num_dims,
455 input_shape,
456 output_perm,
457 NULL,
458 NULL,
459 element_size);
460 if (status != xnn_status_success) {
461 return status;
462 }
463
464 return xnn_run_operator(&transpose_op, threadpool);
465 }
466
xnn_run_transpose_nd_x32(uint32_t flags,const void * input,void * output,const size_t num_dims,const size_t * input_shape,const size_t * output_perm,pthreadpool_t threadpool)467 enum xnn_status xnn_run_transpose_nd_x32(
468 uint32_t flags,
469 const void* input,
470 void* output,
471 const size_t num_dims,
472 const size_t* input_shape,
473 const size_t* output_perm,
474 pthreadpool_t threadpool) {
475
476 return run_transpose_nd(
477 flags,
478 input,
479 output,
480 num_dims,
481 input_shape,
482 output_perm,
483 sizeof(uint32_t),
484 XNN_INIT_FLAG_X32,
485 xnn_operator_type_transpose_nd_x32,
486 threadpool);
487 }
488
xnn_run_transpose_nd_x16(uint32_t flags,const void * input,void * output,const size_t num_dims,const size_t * input_shape,const size_t * output_perm,pthreadpool_t threadpool)489 enum xnn_status xnn_run_transpose_nd_x16(
490 uint32_t flags,
491 const void* input,
492 void* output,
493 const size_t num_dims,
494 const size_t* input_shape,
495 const size_t* output_perm,
496 pthreadpool_t threadpool) {
497
498 return run_transpose_nd(
499 flags,
500 input,
501 output,
502 num_dims,
503 input_shape,
504 output_perm,
505 sizeof(uint16_t),
506 XNN_INIT_FLAG_X16,
507 xnn_operator_type_transpose_nd_x16,
508 threadpool);
509 }
510
xnn_run_transpose_nd_x8(uint32_t flags,const void * input,void * output,const size_t num_dims,const size_t * input_shape,const size_t * output_perm,pthreadpool_t threadpool)511 enum xnn_status xnn_run_transpose_nd_x8(
512 uint32_t flags,
513 const void* input,
514 void* output,
515 const size_t num_dims,
516 const size_t* input_shape,
517 const size_t* output_perm,
518 pthreadpool_t threadpool) {
519
520 return run_transpose_nd(
521 flags,
522 input,
523 output,
524 num_dims,
525 input_shape,
526 output_perm,
527 sizeof(uint8_t),
528 XNN_INIT_FLAG_X8,
529 xnn_operator_type_transpose_nd_x8,
530 threadpool);
531 }
532
xnn_create_depth_to_space_nchw2nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)533 enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32(
534 size_t output_channels,
535 size_t input_channel_stride,
536 size_t output_channel_stride,
537 uint32_t block_size,
538 uint32_t flags,
539 xnn_operator_t* depth_to_space_op_out)
540 {
541 xnn_operator_t depth_to_space_op = NULL;
542 enum xnn_status status = xnn_status_uninitialized;
543
544 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
545 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
546 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32));
547 goto error;
548 }
549
550 status = xnn_status_invalid_parameter;
551
552 if (output_channels == 0) {
553 xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
554 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32), output_channels);
555 goto error;
556 }
557
558 if (output_channel_stride < output_channels) {
559 xnn_log_error(
560 "failed to create %s operator with output channel stride of %zu: "
561 "stride must be at least as large as the number of output channels (%zu)",
562 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32),
563 output_channel_stride, output_channels);
564 goto error;
565 }
566
567 if (block_size <= 1) {
568 xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
569 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32),
570 block_size);
571 goto error;
572 }
573
574 const size_t input_channels = output_channels * block_size * block_size;
575 if (input_channel_stride < input_channels) {
576 xnn_log_error(
577 "failed to create %s operator with input channel stride of %zu: "
578 "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
579 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32),
580 input_channel_stride, block_size, block_size, input_channels);
581 goto error;
582 }
583
584 status = xnn_status_out_of_memory;
585
586 depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
587 if (depth_to_space_op == NULL) {
588 xnn_log_error(
589 "failed to allocate %zu bytes for %s operator descriptor",
590 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32));
591 goto error;
592 }
593
594 depth_to_space_op->channels = output_channels;
595 depth_to_space_op->input_pixel_stride = input_channel_stride;
596 depth_to_space_op->output_pixel_stride = output_channel_stride;
597 depth_to_space_op->block_size = block_size;
598
599 depth_to_space_op->type = xnn_operator_type_depth_to_space_nchw2nhwc_x32;
600 depth_to_space_op->flags = flags;
601
602 depth_to_space_op->state = xnn_run_state_invalid;
603
604 *depth_to_space_op_out = depth_to_space_op;
605 return xnn_status_success;
606
607 error:
608 xnn_delete_operator(depth_to_space_op);
609 return status;
610 }
611
xnn_setup_depth_to_space_nchw2nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)612 enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32(
613 xnn_operator_t depth_to_space_op,
614 size_t batch_size,
615 size_t input_height,
616 size_t input_width,
617 const void* input,
618 void* output,
619 pthreadpool_t threadpool)
620 {
621 if (depth_to_space_op->type != xnn_operator_type_depth_to_space_nchw2nhwc_x32) {
622 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
623 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32),
624 xnn_operator_type_to_string(depth_to_space_op->type));
625 return xnn_status_invalid_parameter;
626 }
627 depth_to_space_op->state = xnn_run_state_invalid;
628
629 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
630 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
631 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32));
632 return xnn_status_uninitialized;
633 }
634
635 if (input_width == 0 || input_height == 0) {
636 xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
637 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nchw2nhwc_x32), input_width, input_height);
638 return xnn_status_invalid_parameter;
639 }
640
641 if (batch_size == 0) {
642 depth_to_space_op->state = xnn_run_state_skip;
643 return xnn_status_success;
644 }
645
646 const uint32_t block_size = depth_to_space_op->block_size;
647 const size_t channels = depth_to_space_op->channels;
648
649 const size_t input_shape[6] = {batch_size, block_size, block_size, channels, input_height, input_width};
650 const size_t perm[6] = {0, 4, 1, 5, 2, 3};
651 const size_t area = input_height * input_width;
652 const size_t elements_per_batch = area * channels;
653 const size_t input_stride[6] = {
654 depth_to_space_op->input_pixel_stride * area,
655 block_size * elements_per_batch,
656 elements_per_batch,
657 area,
658 input_width,
659 1};
660 const size_t output_stride[6] = {
661 input_height * block_size * input_width * block_size * depth_to_space_op->output_pixel_stride,
662 block_size * input_width * block_size * depth_to_space_op->output_pixel_stride,
663 input_width * block_size * depth_to_space_op->output_pixel_stride,
664 block_size * depth_to_space_op->output_pixel_stride,
665 depth_to_space_op->output_pixel_stride,
666 1};
667
668 return setup_transpose_nd(
669 depth_to_space_op,
670 input,
671 output,
672 6,
673 input_shape,
674 perm,
675 input_stride,
676 output_stride,
677 sizeof(uint32_t));
678 }
679
create_depth_to_space_nhwc(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * depth_to_space_op_out)680 static enum xnn_status create_depth_to_space_nhwc(
681 size_t output_channels,
682 size_t input_channel_stride,
683 size_t output_channel_stride,
684 uint32_t block_size,
685 uint32_t flags,
686 enum xnn_operator_type operator_type,
687 xnn_operator_t* depth_to_space_op_out)
688 {
689 xnn_operator_t depth_to_space_op = NULL;
690 enum xnn_status status = xnn_status_uninitialized;
691
692 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
693 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
694 xnn_operator_type_to_string(operator_type));
695 goto error;
696 }
697
698 status = xnn_status_invalid_parameter;
699
700 if (output_channels == 0) {
701 xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
702 xnn_operator_type_to_string(operator_type), output_channels);
703 goto error;
704 }
705
706 if (output_channel_stride < output_channels) {
707 xnn_log_error(
708 "failed to create %s operator with output channel stride of %zu: "
709 "stride must be at least as large as the number of output channels (%zu)",
710 xnn_operator_type_to_string(operator_type),
711 output_channel_stride, output_channels);
712 goto error;
713 }
714
715 if (block_size <= 1) {
716 xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
717 xnn_operator_type_to_string(operator_type),
718 block_size);
719 goto error;
720 }
721
722 const size_t input_channels = output_channels * block_size * block_size;
723 if (input_channel_stride < input_channels) {
724 xnn_log_error(
725 "failed to create %s operator with input channel stride of %zu: "
726 "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
727 xnn_operator_type_to_string(operator_type),
728 input_channel_stride, block_size, block_size, input_channels);
729 goto error;
730 }
731
732 status = xnn_status_out_of_memory;
733
734 depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
735 if (depth_to_space_op == NULL) {
736 xnn_log_error(
737 "failed to allocate %zu bytes for %s operator descriptor",
738 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
739 goto error;
740 }
741
742 depth_to_space_op->channels = output_channels;
743 depth_to_space_op->input_pixel_stride = input_channel_stride;
744 depth_to_space_op->output_pixel_stride = output_channel_stride;
745 depth_to_space_op->block_size = block_size;
746
747 depth_to_space_op->type = operator_type;
748 depth_to_space_op->flags = flags;
749
750 depth_to_space_op->state = xnn_run_state_invalid;
751
752 *depth_to_space_op_out = depth_to_space_op;
753 return xnn_status_success;
754
755 error:
756 xnn_delete_operator(depth_to_space_op);
757 return status;
758 }
759
xnn_create_depth_to_space_nhwc_x8(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)760 enum xnn_status xnn_create_depth_to_space_nhwc_x8(
761 size_t output_channels,
762 size_t input_channel_stride,
763 size_t output_channel_stride,
764 uint32_t block_size,
765 uint32_t flags,
766 xnn_operator_t* depth_to_space_op_out)
767 {
768 return create_depth_to_space_nhwc(
769 output_channels,
770 input_channel_stride,
771 output_channel_stride,
772 block_size,
773 flags,
774 xnn_operator_type_depth_to_space_nhwc_x8,
775 depth_to_space_op_out);
776 }
777
xnn_create_depth_to_space_nhwc_x16(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)778 enum xnn_status xnn_create_depth_to_space_nhwc_x16(
779 size_t output_channels,
780 size_t input_channel_stride,
781 size_t output_channel_stride,
782 uint32_t block_size,
783 uint32_t flags,
784 xnn_operator_t* depth_to_space_op_out)
785 {
786 return create_depth_to_space_nhwc(
787 output_channels,
788 input_channel_stride,
789 output_channel_stride,
790 block_size,
791 flags,
792 xnn_operator_type_depth_to_space_nhwc_x16,
793 depth_to_space_op_out);
794 }
795
xnn_create_depth_to_space_nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)796 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
797 size_t output_channels,
798 size_t input_channel_stride,
799 size_t output_channel_stride,
800 uint32_t block_size,
801 uint32_t flags,
802 xnn_operator_t* depth_to_space_op_out)
803 {
804 return create_depth_to_space_nhwc(
805 output_channels,
806 input_channel_stride,
807 output_channel_stride,
808 block_size,
809 flags,
810 xnn_operator_type_depth_to_space_nhwc_x32,
811 depth_to_space_op_out);
812 }
813
setup_depth_to_space_nhwc(xnn_operator_t depth_to_space_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t element_size)814 static enum xnn_status setup_depth_to_space_nhwc(
815 xnn_operator_t depth_to_space_op,
816 enum xnn_operator_type expected_operator_type,
817 size_t batch_size,
818 size_t input_height,
819 size_t input_width,
820 const void* input,
821 void* output,
822 uint32_t element_size)
823 {
824 if (depth_to_space_op->type != expected_operator_type) {
825 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
826 xnn_operator_type_to_string(expected_operator_type),
827 xnn_operator_type_to_string(depth_to_space_op->type));
828 return xnn_status_invalid_parameter;
829 }
830 depth_to_space_op->state = xnn_run_state_invalid;
831
832 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
833 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
834 xnn_operator_type_to_string(expected_operator_type));
835 return xnn_status_uninitialized;
836 }
837
838 if (input_width == 0 || input_height == 0) {
839 xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
840 xnn_operator_type_to_string(expected_operator_type), input_width, input_height);
841 return xnn_status_invalid_parameter;
842 }
843
844 if (batch_size == 0) {
845 depth_to_space_op->state = xnn_run_state_skip;
846 return xnn_status_success;
847 }
848
849 const uint32_t block_size = depth_to_space_op->block_size;
850 const size_t channels = depth_to_space_op->channels;
851 const size_t input_pixel_stride = depth_to_space_op->input_pixel_stride;
852 const size_t output_pixel_stride = depth_to_space_op->output_pixel_stride;
853 const size_t block_output_pixel_stride = block_size * depth_to_space_op->output_pixel_stride;
854
855 const size_t input_shape[5] = {batch_size * input_height, input_width, block_size, block_size, channels};
856 const size_t perm[5] = {0, 2, 1, 3, 4};
857 const size_t input_stride[5] = {
858 input_width * input_pixel_stride,
859 input_pixel_stride,
860 block_size * channels,
861 channels,
862 1};
863 const size_t output_stride[5] = {
864 block_size * input_width * block_output_pixel_stride,
865 input_width * block_output_pixel_stride,
866 block_output_pixel_stride,
867 output_pixel_stride,
868 1};
869
870 return setup_transpose_nd(
871 depth_to_space_op,
872 input,
873 output,
874 5,
875 input_shape,
876 perm,
877 input_stride,
878 output_stride,
879 element_size);
880 }
881
xnn_setup_depth_to_space_nhwc_x8(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)882 enum xnn_status xnn_setup_depth_to_space_nhwc_x8(
883 xnn_operator_t depth_to_space_op,
884 size_t batch_size,
885 size_t input_height,
886 size_t input_width,
887 const void* input,
888 void* output,
889 pthreadpool_t threadpool)
890 {
891 return setup_depth_to_space_nhwc(
892 depth_to_space_op,
893 xnn_operator_type_depth_to_space_nhwc_x8,
894 batch_size, input_height, input_width,
895 input, output, 1);
896 }
897
xnn_setup_depth_to_space_nhwc_x16(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)898 enum xnn_status xnn_setup_depth_to_space_nhwc_x16(
899 xnn_operator_t depth_to_space_op,
900 size_t batch_size,
901 size_t input_height,
902 size_t input_width,
903 const void* input,
904 void* output,
905 pthreadpool_t threadpool)
906 {
907 return setup_depth_to_space_nhwc(
908 depth_to_space_op,
909 xnn_operator_type_depth_to_space_nhwc_x16,
910 batch_size, input_height, input_width,
911 input, output, 2);
912 }
913
xnn_setup_depth_to_space_nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)914 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
915 xnn_operator_t depth_to_space_op,
916 size_t batch_size,
917 size_t input_height,
918 size_t input_width,
919 const void* input,
920 void* output,
921 pthreadpool_t threadpool)
922 {
923 return setup_depth_to_space_nhwc(
924 depth_to_space_op,
925 xnn_operator_type_depth_to_space_nhwc_x32,
926 batch_size, input_height, input_width,
927 input, output, 4);
928 }
929
create_space_to_depth_nhwc(size_t input_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * space_to_depth_op_out)930 static enum xnn_status create_space_to_depth_nhwc(
931 size_t input_channels,
932 size_t input_channel_stride,
933 size_t output_channel_stride,
934 uint32_t block_size,
935 uint32_t flags,
936 enum xnn_operator_type operator_type,
937 xnn_operator_t* space_to_depth_op_out)
938 {
939 xnn_operator_t space_to_depth_op = NULL;
940 enum xnn_status status = xnn_status_uninitialized;
941
942 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
943 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
944 xnn_operator_type_to_string(operator_type));
945 goto error;
946 }
947
948 status = xnn_status_invalid_parameter;
949
950 if (input_channels == 0) {
951 xnn_log_error("failed to create %s operator with %zu input channels: number of channels must be non-zero",
952 xnn_operator_type_to_string(operator_type), input_channels);
953 goto error;
954 }
955
956 if (input_channel_stride < input_channels) {
957 xnn_log_error(
958 "failed to create %s operator with input channel stride of %zu: "
959 "stride must be at least as large as the number of input channels (%zu)",
960 xnn_operator_type_to_string(operator_type),
961 input_channel_stride, input_channels);
962 goto error;
963 }
964
965 if (block_size <= 1) {
966 xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
967 xnn_operator_type_to_string(operator_type),
968 block_size);
969 goto error;
970 }
971
972 const size_t output_channels = input_channels * block_size * block_size;
973 if (output_channel_stride < output_channels) {
974 xnn_log_error(
975 "failed to create %s operator with output channel stride of %zu: "
976 "stride must be at least as large as the number of output channels (%" PRIu32 "x%" PRIu32 "x%zu)",
977 xnn_operator_type_to_string(operator_type),
978 output_channel_stride, block_size, block_size, input_channels);
979 goto error;
980 }
981
982 status = xnn_status_out_of_memory;
983
984 space_to_depth_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
985 if (space_to_depth_op == NULL) {
986 xnn_log_error(
987 "failed to allocate %zu bytes for %s operator descriptor",
988 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
989 goto error;
990 }
991
992 space_to_depth_op->channels = input_channels;
993 space_to_depth_op->input_pixel_stride = input_channel_stride;
994 space_to_depth_op->output_pixel_stride = output_channel_stride;
995 space_to_depth_op->block_size = block_size;
996
997 space_to_depth_op->type = operator_type;
998 space_to_depth_op->flags = flags;
999
1000 space_to_depth_op->state = xnn_run_state_invalid;
1001
1002 *space_to_depth_op_out = space_to_depth_op;
1003 return xnn_status_success;
1004
1005 error:
1006 xnn_delete_operator(space_to_depth_op);
1007 return status;
1008 }
1009
xnn_create_space_to_depth_nhwc_x8(size_t input_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * space_to_depth_op_out)1010 enum xnn_status xnn_create_space_to_depth_nhwc_x8(
1011 size_t input_channels,
1012 size_t input_channel_stride,
1013 size_t output_channel_stride,
1014 uint32_t block_size,
1015 uint32_t flags,
1016 xnn_operator_t* space_to_depth_op_out)
1017 {
1018 return create_space_to_depth_nhwc(
1019 input_channels,
1020 input_channel_stride,
1021 output_channel_stride,
1022 block_size,
1023 flags,
1024 xnn_operator_type_space_to_depth_nhwc_x8,
1025 space_to_depth_op_out);
1026 }
1027
xnn_create_space_to_depth_nhwc_x16(size_t input_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * space_to_depth_op_out)1028 enum xnn_status xnn_create_space_to_depth_nhwc_x16(
1029 size_t input_channels,
1030 size_t input_channel_stride,
1031 size_t output_channel_stride,
1032 uint32_t block_size,
1033 uint32_t flags,
1034 xnn_operator_t* space_to_depth_op_out)
1035 {
1036 return create_space_to_depth_nhwc(
1037 input_channels,
1038 input_channel_stride,
1039 output_channel_stride,
1040 block_size,
1041 flags,
1042 xnn_operator_type_space_to_depth_nhwc_x16,
1043 space_to_depth_op_out);
1044 }
1045
xnn_create_space_to_depth_nhwc_x32(size_t input_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * space_to_depth_op_out)1046 enum xnn_status xnn_create_space_to_depth_nhwc_x32(
1047 size_t input_channels,
1048 size_t input_channel_stride,
1049 size_t output_channel_stride,
1050 uint32_t block_size,
1051 uint32_t flags,
1052 xnn_operator_t* space_to_depth_op_out)
1053 {
1054 return create_space_to_depth_nhwc(
1055 input_channels,
1056 input_channel_stride,
1057 output_channel_stride,
1058 block_size,
1059 flags,
1060 xnn_operator_type_space_to_depth_nhwc_x32,
1061 space_to_depth_op_out);
1062 }
1063
setup_space_to_depth_nhwc(xnn_operator_t space_to_depth_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t element_size)1064 static enum xnn_status setup_space_to_depth_nhwc(
1065 xnn_operator_t space_to_depth_op,
1066 enum xnn_operator_type expected_operator_type,
1067 size_t batch_size,
1068 size_t input_height,
1069 size_t input_width,
1070 const void* input,
1071 void* output,
1072 uint32_t element_size)
1073 {
1074 if (space_to_depth_op->type != expected_operator_type) {
1075 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
1076 xnn_operator_type_to_string(expected_operator_type),
1077 xnn_operator_type_to_string(space_to_depth_op->type));
1078 return xnn_status_invalid_parameter;
1079 }
1080 space_to_depth_op->state = xnn_run_state_invalid;
1081
1082 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
1083 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
1084 xnn_operator_type_to_string(expected_operator_type));
1085 return xnn_status_uninitialized;
1086 }
1087
1088 if (input_width == 0 || input_height == 0) {
1089 xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
1090 xnn_operator_type_to_string(expected_operator_type), input_width, input_height);
1091 return xnn_status_invalid_parameter;
1092 }
1093
1094 if (batch_size == 0) {
1095 space_to_depth_op->state = xnn_run_state_skip;
1096 return xnn_status_success;
1097 }
1098
1099 const uint32_t block_size = space_to_depth_op->block_size;
1100
1101 const size_t input_shape[5] = {batch_size * (input_height / block_size), block_size, input_width / block_size, block_size, space_to_depth_op->channels};
1102 const size_t perm[5] = {0, 2, 1, 3, 4};
1103
1104 const size_t input_stride[5] = {
1105 block_size * input_width * space_to_depth_op->input_pixel_stride,
1106 input_width * space_to_depth_op->input_pixel_stride,
1107 block_size * space_to_depth_op->input_pixel_stride,
1108 space_to_depth_op->input_pixel_stride,
1109 1};
1110 const size_t output_stride[5] = {
1111 (input_width/block_size) * space_to_depth_op->output_pixel_stride,
1112 space_to_depth_op->output_pixel_stride,
1113 block_size * space_to_depth_op->channels,
1114 space_to_depth_op->channels,
1115 1};
1116
1117 return setup_transpose_nd(
1118 space_to_depth_op,
1119 input,
1120 output,
1121 5,
1122 input_shape,
1123 perm,
1124 input_stride,
1125 output_stride,
1126 element_size);
1127 }
1128
xnn_setup_space_to_depth_nhwc_x8(xnn_operator_t space_to_depth_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)1129 enum xnn_status xnn_setup_space_to_depth_nhwc_x8(
1130 xnn_operator_t space_to_depth_op,
1131 size_t batch_size,
1132 size_t input_height,
1133 size_t input_width,
1134 const void* input,
1135 void* output,
1136 pthreadpool_t threadpool)
1137 {
1138 return setup_space_to_depth_nhwc(
1139 space_to_depth_op,
1140 xnn_operator_type_space_to_depth_nhwc_x8,
1141 batch_size, input_height, input_width,
1142 input, output, sizeof(uint8_t));
1143 }
1144
xnn_setup_space_to_depth_nhwc_x16(xnn_operator_t space_to_depth_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)1145 enum xnn_status xnn_setup_space_to_depth_nhwc_x16(
1146 xnn_operator_t space_to_depth_op,
1147 size_t batch_size,
1148 size_t input_height,
1149 size_t input_width,
1150 const void* input,
1151 void* output,
1152 pthreadpool_t threadpool)
1153 {
1154 return setup_space_to_depth_nhwc(
1155 space_to_depth_op,
1156 xnn_operator_type_space_to_depth_nhwc_x16,
1157 batch_size, input_height, input_width,
1158 input, output, sizeof(uint16_t));
1159 }
1160
xnn_setup_space_to_depth_nhwc_x32(xnn_operator_t space_to_depth_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)1161 enum xnn_status xnn_setup_space_to_depth_nhwc_x32(
1162 xnn_operator_t space_to_depth_op,
1163 size_t batch_size,
1164 size_t input_height,
1165 size_t input_width,
1166 const void* input,
1167 void* output,
1168 pthreadpool_t threadpool)
1169 {
1170 return setup_space_to_depth_nhwc(
1171 space_to_depth_op,
1172 xnn_operator_type_space_to_depth_nhwc_x32,
1173 batch_size, input_height, input_width,
1174 input, output, sizeof(uint32_t));
1175 }
1176