1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <stddef.h>
7 #include <stdint.h>
8
9 #include <xnnpack.h>
10 #include <xnnpack/allocator.h>
11 #include <xnnpack/operator.h>
12 #include <xnnpack/log.h>
13 #include <xnnpack/params.h>
14
15
xnn_create_depth_to_space_nhwc_x32(size_t output_channels,size_t input_channel_stride,size_t output_channel_stride,uint32_t block_size,uint32_t flags,xnn_operator_t * depth_to_space_op_out)16 enum xnn_status xnn_create_depth_to_space_nhwc_x32(
17 size_t output_channels,
18 size_t input_channel_stride,
19 size_t output_channel_stride,
20 uint32_t block_size,
21 uint32_t flags,
22 xnn_operator_t* depth_to_space_op_out)
23 {
24 xnn_operator_t depth_to_space_op = NULL;
25 enum xnn_status status = xnn_status_uninitialized;
26
27 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
28 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
29 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
30 goto error;
31 }
32
33 status = xnn_status_invalid_parameter;
34
35 if (output_channels == 0) {
36 xnn_log_error("failed to create %s operator with %zu output channels: number of channels must be non-zero",
37 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32), output_channels);
38 goto error;
39 }
40
41 if (output_channel_stride < output_channels) {
42 xnn_log_error(
43 "failed to create %s operator with output channel stride of %zu: "
44 "stride must be at least as large as the number of output channels (%zu)",
45 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
46 output_channel_stride, output_channels);
47 goto error;
48 }
49
50 if (block_size <= 1) {
51 xnn_log_error("failed to create %s operator with %u block size: block size must be greater than 1",
52 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
53 block_size);
54 goto error;
55 }
56
57 const size_t input_channels = output_channels * block_size * block_size;
58 if (input_channel_stride < input_channels) {
59 xnn_log_error(
60 "failed to create %s operator with input channel stride of %zu: "
61 "stride must be at least as large as the number of input channels (%" PRIu32 "x%" PRIu32 "x%zu)",
62 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
63 input_channel_stride, block_size, block_size, input_channels);
64 goto error;
65 }
66
67 status = xnn_status_out_of_memory;
68
69 depth_to_space_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
70 if (depth_to_space_op == NULL) {
71 xnn_log_error(
72 "failed to allocate %zu bytes for %s operator descriptor",
73 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
74 goto error;
75 }
76
77 depth_to_space_op->channels = output_channels;
78 depth_to_space_op->input_pixel_stride = input_channel_stride;
79 depth_to_space_op->output_pixel_stride = output_channel_stride;
80 depth_to_space_op->block_size = block_size;
81
82 depth_to_space_op->type = xnn_operator_type_depth_to_space_nhwc_x32;
83 depth_to_space_op->flags = flags;
84
85 depth_to_space_op->state = xnn_run_state_invalid;
86
87 *depth_to_space_op_out = depth_to_space_op;
88 return xnn_status_success;
89
90 error:
91 xnn_delete_operator(depth_to_space_op);
92 return status;
93 }
94
xnn_setup_depth_to_space_nhwc_x32(xnn_operator_t depth_to_space_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)95 enum xnn_status xnn_setup_depth_to_space_nhwc_x32(
96 xnn_operator_t depth_to_space_op,
97 size_t batch_size,
98 size_t input_height,
99 size_t input_width,
100 const void* input,
101 void* output,
102 pthreadpool_t threadpool)
103 {
104 if (depth_to_space_op->type != xnn_operator_type_depth_to_space_nhwc_x32) {
105 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
106 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32),
107 xnn_operator_type_to_string(depth_to_space_op->type));
108 return xnn_status_invalid_parameter;
109 }
110 depth_to_space_op->state = xnn_run_state_invalid;
111
112 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
113 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
114 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32));
115 return xnn_status_uninitialized;
116 }
117
118 if (input_width == 0 || input_height == 0) {
119 xnn_log_error("failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
120 xnn_operator_type_to_string(xnn_operator_type_depth_to_space_nhwc_x32), input_width, input_height);
121 return xnn_status_invalid_parameter;
122 }
123
124 if (batch_size == 0) {
125 depth_to_space_op->state = xnn_run_state_skip;
126 return xnn_status_success;
127 }
128
129 const uint32_t block_size = depth_to_space_op->block_size;
130 const size_t output_channels = depth_to_space_op->channels;
131 const size_t output_width = input_width * block_size;
132
133 depth_to_space_op->context.depthtospace2d_hwc = (struct depthtospace2d_hwc_context) {
134 .elements = output_channels * sizeof(float),
135 .input_width = input_width,
136 .block_size = (size_t) block_size,
137 .input = input,
138 .output = output,
139 .input_height_stride = input_width * depth_to_space_op->input_pixel_stride * sizeof(float),
140 .input_width_stride = depth_to_space_op->input_pixel_stride * sizeof(float),
141 .output_height_stride = output_width * depth_to_space_op->output_pixel_stride * sizeof(float),
142 .output_width_stride = depth_to_space_op->output_pixel_stride * sizeof(float),
143 .ukernel = xnn_params.xx.copy,
144 };
145 if (depth_to_space_op->output_pixel_stride == output_channels) {
146 // Transpose (N, Hi, Wi, Hb, Wb, Cout) -> (N, Hi, Hb, Wi, Wb, Cout) with Wb, Cout contiguous in memory.
147 // Optimization: copy Wb * Cout pixels at once
148 depth_to_space_op->context.depthtospace2d_hwc.elements *= block_size;
149
150 depth_to_space_op->compute.type = xnn_parallelization_type_3d;
151 depth_to_space_op->compute.task_3d = (pthreadpool_task_3d_t) xnn_compute_depthtospace2d_hwc_contiguous;
152 depth_to_space_op->compute.range[0] = batch_size * input_height;
153 depth_to_space_op->compute.range[1] = input_width;
154 depth_to_space_op->compute.range[2] = block_size;
155 } else {
156 depth_to_space_op->compute.type = xnn_parallelization_type_4d;
157 depth_to_space_op->compute.task_4d = (pthreadpool_task_4d_t) xnn_compute_depthtospace2d_hwc_strided;
158 depth_to_space_op->compute.range[0] = batch_size * input_height;
159 depth_to_space_op->compute.range[1] = input_width;
160 depth_to_space_op->compute.range[2] = block_size;
161 depth_to_space_op->compute.range[3] = block_size;
162 }
163 depth_to_space_op->state = xnn_run_state_ready;
164
165 return xnn_status_success;
166 }
167