1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22
23
create_resize_bilinear2d_nhwc(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * resize_op_out)24 static enum xnn_status create_resize_bilinear2d_nhwc(
25 size_t channels,
26 size_t input_pixel_stride,
27 size_t output_pixel_stride,
28 uint32_t flags,
29 enum xnn_operator_type operator_type,
30 xnn_operator_t* resize_op_out)
31 {
32 xnn_operator_t resize_op = NULL;
33 enum xnn_status status = xnn_status_uninitialized;
34
35 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
36 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
37 xnn_operator_type_to_string(operator_type));
38 goto error;
39 }
40
41 status = xnn_status_invalid_parameter;
42
43 if (channels == 0) {
44 xnn_log_error(
45 "failed to create %s operator with %zu channels: number of channels must be non-zero",
46 xnn_operator_type_to_string(operator_type), channels);
47 goto error;
48 }
49
50 if (input_pixel_stride < channels) {
51 xnn_log_error(
52 "failed to create %s operator with input pixel stride of %zu: "
53 "stride must be at least as large as the number of channels (%zu)",
54 xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
55 goto error;
56 }
57
58 if (output_pixel_stride < channels) {
59 xnn_log_error(
60 "failed to create %s operator with output pixel stride of %zu: "
61 "stride must be at least as large as the number of channels (%zu)",
62 xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
63 goto error;
64 }
65
66 status = xnn_status_out_of_memory;
67
68 resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
69 if (resize_op == NULL) {
70 xnn_log_error(
71 "failed to allocate %zu bytes for %s operator descriptor",
72 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
73 goto error;
74 }
75
76 resize_op->channels = channels;
77 resize_op->input_pixel_stride = input_pixel_stride;
78 resize_op->output_pixel_stride = output_pixel_stride;
79
80 resize_op->type = operator_type;
81 resize_op->flags = flags;
82
83 resize_op->state = xnn_run_state_invalid;
84
85 *resize_op_out = resize_op;
86 return xnn_status_success;
87
88 error:
89 xnn_delete_operator(resize_op);
90 return status;
91 }
92
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)93 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
94 size_t channels,
95 size_t input_pixel_stride,
96 size_t output_pixel_stride,
97 uint32_t flags,
98 xnn_operator_t* resize_op_out)
99 {
100 return create_resize_bilinear2d_nhwc(
101 channels,
102 input_pixel_stride,
103 output_pixel_stride,
104 flags,
105 xnn_operator_type_resize_bilinear_nhwc_f32,
106 resize_op_out);
107 }
108
xnn_create_resize_bilinear2d_nhwc_s8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)109 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
110 size_t channels,
111 size_t input_pixel_stride,
112 size_t output_pixel_stride,
113 uint32_t flags,
114 xnn_operator_t* resize_op_out)
115 {
116 return create_resize_bilinear2d_nhwc(
117 channels,
118 input_pixel_stride,
119 output_pixel_stride,
120 flags,
121 xnn_operator_type_resize_bilinear_nhwc_s8,
122 resize_op_out);
123 }
124
xnn_create_resize_bilinear2d_nhwc_u8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)125 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
126 size_t channels,
127 size_t input_pixel_stride,
128 size_t output_pixel_stride,
129 uint32_t flags,
130 xnn_operator_t* resize_op_out)
131 {
132 return create_resize_bilinear2d_nhwc(
133 channels,
134 input_pixel_stride,
135 output_pixel_stride,
136 flags,
137 xnn_operator_type_resize_bilinear_nhwc_u8,
138 resize_op_out);
139 }
140
setup_resize_bilinear2d_nhwc(xnn_operator_t resize_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,uint32_t log2_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)141 static enum xnn_status setup_resize_bilinear2d_nhwc(
142 xnn_operator_t resize_op,
143 enum xnn_operator_type expected_operator_type,
144 size_t batch_size,
145 size_t input_height,
146 size_t input_width,
147 size_t output_height,
148 size_t output_width,
149 const void* input,
150 void* output,
151 uint32_t log2_element_size,
152 uint32_t log2_weight_element_size,
153 xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,
154 const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS(1)],
155 size_t num_threads)
156 {
157 if (resize_op->type != expected_operator_type) {
158 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
159 xnn_operator_type_to_string(expected_operator_type),
160 xnn_operator_type_to_string(resize_op->type));
161 return xnn_status_invalid_parameter;
162 }
163 resize_op->state = xnn_run_state_invalid;
164
165 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
166 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
167 xnn_operator_type_to_string(resize_op->type));
168 return xnn_status_uninitialized;
169 }
170
171 if (input_width == 0 || input_height == 0) {
172 xnn_log_error(
173 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
174 xnn_operator_type_to_string(resize_op->type), input_width, input_height);
175 return xnn_status_invalid_parameter;
176 }
177
178 if (max(input_width, input_height) >= 16777216) {
179 xnn_log_error(
180 "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
181 xnn_operator_type_to_string(resize_op->type), input_width, input_height);
182 return xnn_status_unsupported_parameter;
183 }
184
185 if (output_width == 0 || output_height == 0) {
186 xnn_log_error(
187 "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
188 xnn_operator_type_to_string(resize_op->type), output_width, output_height);
189 return xnn_status_invalid_parameter;
190 }
191
192 if (max(output_width, output_height) >= 16777216) {
193 xnn_log_error(
194 "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
195 xnn_operator_type_to_string(resize_op->type), output_width, output_height);
196 return xnn_status_unsupported_parameter;
197 }
198
199 if (batch_size == 0) {
200 resize_op->state = xnn_run_state_skip;
201 return xnn_status_success;
202 }
203
204 if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
205 const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
206 const size_t packed_weights_size = (output_height * output_width * 2) << log2_weight_element_size;
207
208 const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
209 if (indirection_buffer == NULL) {
210 xnn_log_error(
211 "failed to allocate %zu bytes for %s operator indirection buffer",
212 indirection_buffer_size, xnn_operator_type_to_string(resize_op->type));
213 return xnn_status_out_of_memory;
214 }
215 resize_op->indirection_buffer = indirection_buffer;
216
217 // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
218 xnn_release_simd_memory(resize_op->packed_weights);
219 resize_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
220 if (resize_op->packed_weights == NULL) {
221 xnn_log_error(
222 "failed to allocate %zu bytes for %s operator packed weights",
223 packed_weights_size, xnn_operator_type_to_string(resize_op->type));
224 return xnn_status_out_of_memory;
225 }
226 }
227
228 const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride << log2_element_size;
229 if (input_height != resize_op->last_input_height ||
230 input_width != resize_op->last_input_width ||
231 output_height != resize_op->last_output_height ||
232 output_width != resize_op->last_output_width)
233 {
234 const uint32_t flags = resize_op->flags;
235 indirection_init(
236 input_pixel_stride_in_bytes,
237 input_height, input_width,
238 output_height, output_width,
239 input, resize_op->indirection_buffer, resize_op->packed_weights,
240 !!(flags & XNN_FLAG_ALIGN_CORNERS),
241 !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
242
243 resize_op->last_input = input;
244 resize_op->last_input_height = input_height;
245 resize_op->last_input_width = input_width;
246 resize_op->last_output_height = output_height;
247 resize_op->last_output_width = output_width;
248 }
249
250 const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride << log2_element_size;
251 resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
252 .scaled_channels = resize_op->channels << log2_element_size,
253 .indirect_input = resize_op->indirection_buffer,
254 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
255 .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
256 .packed_weights = resize_op->packed_weights,
257 .output = output,
258 .output_pixel_stride = output_pixel_stride_in_bytes,
259 .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
260 .log2_wsize = 1 + log2_weight_element_size /* log2(2 * sizeof(weight)) */,
261 .ukernel = ibilinear->ukernel,
262 };
263
264 const size_t output_size = output_height * output_width;
265 size_t output_size_tile = output_size;
266 if (num_threads > 1) {
267 const size_t target_tiles_per_thread = 5;
268 const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
269 if (max_output_size_tile < output_size_tile) {
270 const uint32_t output_size_subtile = ibilinear->pixel_tile;
271 output_size_tile =
272 min(output_size_tile,
273 divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
274 }
275 }
276 resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
277 resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
278 resize_op->compute.range[0] = batch_size;
279 resize_op->compute.range[1] = output_size;
280 resize_op->compute.tile[0] = output_size_tile;
281 resize_op->state = xnn_run_state_ready;
282
283 return xnn_status_success;
284 }
285
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)286 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
287 xnn_operator_t resize_op,
288 size_t batch_size,
289 size_t input_height,
290 size_t input_width,
291 size_t output_height,
292 size_t output_width,
293 const float* input,
294 float* output,
295 pthreadpool_t threadpool)
296 {
297 return setup_resize_bilinear2d_nhwc(
298 resize_op,
299 xnn_operator_type_resize_bilinear_nhwc_f32,
300 batch_size,
301 input_height,
302 input_width,
303 output_height,
304 output_width,
305 input,
306 output,
307 2 /* log2(element size) == log2(sizeof(float)) */,
308 2 /* log2(weight element size) == log2(sizeof(float)) */,
309 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f32,
310 &xnn_params.f32.ibilinear,
311 pthreadpool_get_threads_count(threadpool));
312 }
313
xnn_setup_resize_bilinear2d_nhwc_s8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)314 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
315 xnn_operator_t resize_op,
316 size_t batch_size,
317 size_t input_height,
318 size_t input_width,
319 size_t output_height,
320 size_t output_width,
321 const int8_t* input,
322 int8_t* output,
323 pthreadpool_t threadpool)
324 {
325 return setup_resize_bilinear2d_nhwc(
326 resize_op,
327 xnn_operator_type_resize_bilinear_nhwc_s8,
328 batch_size,
329 input_height,
330 input_width,
331 output_height,
332 output_width,
333 input,
334 output,
335 0 /* log2(element size) == log2(sizeof(int8_t)) */,
336 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
337 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
338 &xnn_params.s8.ibilinear,
339 pthreadpool_get_threads_count(threadpool));
340 }
341
xnn_setup_resize_bilinear2d_nhwc_u8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)342 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
343 xnn_operator_t resize_op,
344 size_t batch_size,
345 size_t input_height,
346 size_t input_width,
347 size_t output_height,
348 size_t output_width,
349 const uint8_t* input,
350 uint8_t* output,
351 pthreadpool_t threadpool)
352 {
353 return setup_resize_bilinear2d_nhwc(
354 resize_op,
355 xnn_operator_type_resize_bilinear_nhwc_u8,
356 batch_size,
357 input_height,
358 input_width,
359 output_height,
360 output_width,
361 input,
362 output,
363 0 /* log2(element size) == log2(sizeof(uint8_t)) */,
364 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
365 (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
366 &xnn_params.u8.ibilinear,
367 pthreadpool_get_threads_count(threadpool));
368 }
369