• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22 
23 
create_resize_bilinear2d_nhwc(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,enum xnn_operator_type operator_type,xnn_operator_t * resize_op_out)24 static enum xnn_status create_resize_bilinear2d_nhwc(
25     size_t channels,
26     size_t input_pixel_stride,
27     size_t output_pixel_stride,
28     uint32_t flags,
29     enum xnn_operator_type operator_type,
30     xnn_operator_t* resize_op_out)
31 {
32   xnn_operator_t resize_op = NULL;
33   enum xnn_status status = xnn_status_uninitialized;
34 
35   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
36     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
37       xnn_operator_type_to_string(operator_type));
38     goto error;
39   }
40 
41   status = xnn_status_invalid_parameter;
42 
43   if (channels == 0) {
44     xnn_log_error(
45       "failed to create %s operator with %zu channels: number of channels must be non-zero",
46       xnn_operator_type_to_string(operator_type), channels);
47     goto error;
48   }
49 
50   if (input_pixel_stride < channels) {
51     xnn_log_error(
52       "failed to create %s operator with input pixel stride of %zu: "
53       "stride must be at least as large as the number of channels (%zu)",
54       xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
55     goto error;
56   }
57 
58   if (output_pixel_stride < channels) {
59     xnn_log_error(
60       "failed to create %s operator with output pixel stride of %zu: "
61       "stride must be at least as large as the number of channels (%zu)",
62       xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
63     goto error;
64   }
65 
66   status = xnn_status_out_of_memory;
67 
68   resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
69   if (resize_op == NULL) {
70     xnn_log_error(
71       "failed to allocate %zu bytes for %s operator descriptor",
72       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
73     goto error;
74   }
75 
76   resize_op->channels = channels;
77   resize_op->input_pixel_stride = input_pixel_stride;
78   resize_op->output_pixel_stride = output_pixel_stride;
79 
80   resize_op->type = operator_type;
81   resize_op->flags = flags;
82 
83   resize_op->state = xnn_run_state_invalid;
84 
85   *resize_op_out = resize_op;
86   return xnn_status_success;
87 
88 error:
89   xnn_delete_operator(resize_op);
90   return status;
91 }
92 
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)93 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
94     size_t channels,
95     size_t input_pixel_stride,
96     size_t output_pixel_stride,
97     uint32_t flags,
98     xnn_operator_t* resize_op_out)
99 {
100   return create_resize_bilinear2d_nhwc(
101     channels,
102     input_pixel_stride,
103     output_pixel_stride,
104     flags,
105     xnn_operator_type_resize_bilinear_nhwc_f32,
106     resize_op_out);
107 }
108 
xnn_create_resize_bilinear2d_nhwc_s8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)109 enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
110     size_t channels,
111     size_t input_pixel_stride,
112     size_t output_pixel_stride,
113     uint32_t flags,
114     xnn_operator_t* resize_op_out)
115 {
116   return create_resize_bilinear2d_nhwc(
117     channels,
118     input_pixel_stride,
119     output_pixel_stride,
120     flags,
121     xnn_operator_type_resize_bilinear_nhwc_s8,
122     resize_op_out);
123 }
124 
xnn_create_resize_bilinear2d_nhwc_u8(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)125 enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
126     size_t channels,
127     size_t input_pixel_stride,
128     size_t output_pixel_stride,
129     uint32_t flags,
130     xnn_operator_t* resize_op_out)
131 {
132   return create_resize_bilinear2d_nhwc(
133     channels,
134     input_pixel_stride,
135     output_pixel_stride,
136     flags,
137     xnn_operator_type_resize_bilinear_nhwc_u8,
138     resize_op_out);
139 }
140 
setup_resize_bilinear2d_nhwc(xnn_operator_t resize_op,enum xnn_operator_type expected_operator_type,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,void * output,uint32_t log2_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS (1)],size_t num_threads)141 static enum xnn_status setup_resize_bilinear2d_nhwc(
142     xnn_operator_t resize_op,
143     enum xnn_operator_type expected_operator_type,
144     size_t batch_size,
145     size_t input_height,
146     size_t input_width,
147     size_t output_height,
148     size_t output_width,
149     const void* input,
150     void* output,
151     uint32_t log2_element_size,
152     uint32_t log2_weight_element_size,
153     xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,
154     const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS(1)],
155     size_t num_threads)
156 {
157   if (resize_op->type != expected_operator_type) {
158     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
159       xnn_operator_type_to_string(expected_operator_type),
160       xnn_operator_type_to_string(resize_op->type));
161     return xnn_status_invalid_parameter;
162   }
163   resize_op->state = xnn_run_state_invalid;
164 
165   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
166     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
167       xnn_operator_type_to_string(resize_op->type));
168     return xnn_status_uninitialized;
169   }
170 
171   if (input_width == 0 || input_height == 0) {
172     xnn_log_error(
173       "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
174       xnn_operator_type_to_string(resize_op->type), input_width, input_height);
175     return xnn_status_invalid_parameter;
176   }
177 
178   if (max(input_width, input_height) >= 16777216) {
179     xnn_log_error(
180       "failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
181       xnn_operator_type_to_string(resize_op->type), input_width, input_height);
182     return xnn_status_unsupported_parameter;
183   }
184 
185   if (output_width == 0 || output_height == 0) {
186     xnn_log_error(
187       "failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
188       xnn_operator_type_to_string(resize_op->type), output_width, output_height);
189     return xnn_status_invalid_parameter;
190   }
191 
192   if (max(output_width, output_height) >= 16777216) {
193     xnn_log_error(
194       "failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
195       xnn_operator_type_to_string(resize_op->type), output_width, output_height);
196     return xnn_status_unsupported_parameter;
197   }
198 
199   if (batch_size == 0) {
200     resize_op->state = xnn_run_state_skip;
201     return xnn_status_success;
202   }
203 
204   if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
205     const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
206     const size_t packed_weights_size = (output_height * output_width * 2) << log2_weight_element_size;
207 
208     const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
209     if (indirection_buffer == NULL) {
210       xnn_log_error(
211         "failed to allocate %zu bytes for %s operator indirection buffer",
212         indirection_buffer_size, xnn_operator_type_to_string(resize_op->type));
213       return xnn_status_out_of_memory;
214     }
215     resize_op->indirection_buffer = indirection_buffer;
216 
217     // Note: packed weights must be SIMD-aligned, so we can't use xnn_reallocate_memory
218     xnn_release_simd_memory(resize_op->packed_weights);
219     resize_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
220     if (resize_op->packed_weights == NULL) {
221       xnn_log_error(
222         "failed to allocate %zu bytes for %s operator packed weights",
223         packed_weights_size, xnn_operator_type_to_string(resize_op->type));
224       return xnn_status_out_of_memory;
225     }
226   }
227 
228   const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride << log2_element_size;
229   if (input_height != resize_op->last_input_height ||
230       input_width != resize_op->last_input_width ||
231       output_height != resize_op->last_output_height ||
232       output_width != resize_op->last_output_width)
233   {
234     const uint32_t flags = resize_op->flags;
235     indirection_init(
236       input_pixel_stride_in_bytes,
237       input_height, input_width,
238       output_height, output_width,
239       input, resize_op->indirection_buffer, resize_op->packed_weights,
240       !!(flags & XNN_FLAG_ALIGN_CORNERS),
241       !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
242 
243     resize_op->last_input = input;
244     resize_op->last_input_height = input_height;
245     resize_op->last_input_width = input_width;
246     resize_op->last_output_height = output_height;
247     resize_op->last_output_width = output_width;
248   }
249 
250   const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride << log2_element_size;
251   resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
252     .scaled_channels = resize_op->channels << log2_element_size,
253     .indirect_input = resize_op->indirection_buffer,
254     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
255     .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
256     .packed_weights = resize_op->packed_weights,
257     .output = output,
258     .output_pixel_stride = output_pixel_stride_in_bytes,
259     .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
260     .log2_wsize = 1 + log2_weight_element_size /* log2(2 * sizeof(weight)) */,
261     .ukernel = ibilinear->ukernel,
262   };
263 
264   const size_t output_size = output_height * output_width;
265   size_t output_size_tile = output_size;
266   if (num_threads > 1) {
267     const size_t target_tiles_per_thread = 5;
268     const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
269     if (max_output_size_tile < output_size_tile) {
270       const uint32_t output_size_subtile = ibilinear->pixel_tile;
271       output_size_tile =
272         min(output_size_tile,
273           divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
274     }
275   }
276   resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
277   resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
278   resize_op->compute.range[0] = batch_size;
279   resize_op->compute.range[1] = output_size;
280   resize_op->compute.tile[0] = output_size_tile;
281   resize_op->state = xnn_run_state_ready;
282 
283   return xnn_status_success;
284 }
285 
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)286 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
287     xnn_operator_t resize_op,
288     size_t batch_size,
289     size_t input_height,
290     size_t input_width,
291     size_t output_height,
292     size_t output_width,
293     const float* input,
294     float* output,
295     pthreadpool_t threadpool)
296 {
297   return setup_resize_bilinear2d_nhwc(
298     resize_op,
299     xnn_operator_type_resize_bilinear_nhwc_f32,
300     batch_size,
301     input_height,
302     input_width,
303     output_height,
304     output_width,
305     input,
306     output,
307     2 /* log2(element size) == log2(sizeof(float)) */,
308     2 /* log2(weight element size) == log2(sizeof(float)) */,
309     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f32,
310     &xnn_params.f32.ibilinear,
311     pthreadpool_get_threads_count(threadpool));
312 }
313 
xnn_setup_resize_bilinear2d_nhwc_s8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)314 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
315     xnn_operator_t resize_op,
316     size_t batch_size,
317     size_t input_height,
318     size_t input_width,
319     size_t output_height,
320     size_t output_width,
321     const int8_t* input,
322     int8_t* output,
323     pthreadpool_t threadpool)
324 {
325   return setup_resize_bilinear2d_nhwc(
326     resize_op,
327     xnn_operator_type_resize_bilinear_nhwc_s8,
328     batch_size,
329     input_height,
330     input_width,
331     output_height,
332     output_width,
333     input,
334     output,
335     0 /* log2(element size) == log2(sizeof(int8_t)) */,
336     1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
337     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
338     &xnn_params.s8.ibilinear,
339     pthreadpool_get_threads_count(threadpool));
340 }
341 
xnn_setup_resize_bilinear2d_nhwc_u8(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)342 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
343     xnn_operator_t resize_op,
344     size_t batch_size,
345     size_t input_height,
346     size_t input_width,
347     size_t output_height,
348     size_t output_width,
349     const uint8_t* input,
350     uint8_t* output,
351     pthreadpool_t threadpool)
352 {
353   return setup_resize_bilinear2d_nhwc(
354     resize_op,
355     xnn_operator_type_resize_bilinear_nhwc_u8,
356     batch_size,
357     input_height,
358     input_width,
359     output_height,
360     output_width,
361     input,
362     output,
363     0 /* log2(element size) == log2(sizeof(uint8_t)) */,
364     1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
365     (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
366     &xnn_params.u8.ibilinear,
367     pthreadpool_get_threads_count(threadpool));
368 }
369