• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22 
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)23 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
24     size_t channels,
25     size_t input_pixel_stride,
26     size_t output_pixel_stride,
27     uint32_t flags,
28     xnn_operator_t* resize_op_out)
29 {
30   xnn_operator_t resize_op = NULL;
31   enum xnn_status status = xnn_status_uninitialized;
32 
33   if (!xnn_params.initialized) {
34     xnn_log_error("failed to create Resize Bilinear operator: XNNPACK is not initialized");
35     goto error;
36   }
37 
38   status = xnn_status_invalid_parameter;
39 
40   if (channels == 0) {
41     xnn_log_error(
42       "failed to create Resize Bilinear operator with %zu channels: number of channels must be non-zero",
43       channels);
44     goto error;
45   }
46 
47   if (input_pixel_stride < channels) {
48     xnn_log_error(
49       "failed to create Resize Bilinear operator with input pixel stride of %zu: "
50       "stride must be at least as large as the number of channels (%zu)",
51       input_pixel_stride, channels);
52     goto error;
53   }
54 
55   if (output_pixel_stride < channels) {
56     xnn_log_error(
57       "failed to create Resize Bilinear operator with output pixel stride of %zu: "
58       "stride must be at least as large as the number of channels (%zu)",
59       output_pixel_stride, channels);
60     goto error;
61   }
62 
63   status = xnn_status_out_of_memory;
64 
65   resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
66   if (resize_op == NULL) {
67     xnn_log_error("failed to allocate %zu bytes for Resize Bilinear operator descriptor", sizeof(struct xnn_operator));
68     goto error;
69   }
70 
71   resize_op->channels = channels;
72   resize_op->input_pixel_stride = input_pixel_stride;
73   resize_op->output_pixel_stride = output_pixel_stride;
74 
75   resize_op->type = xnn_operator_type_resize_bilinear_nhwc_f32;
76   resize_op->ukernel.type = xnn_ukernel_type_unpooling;
77   resize_op->flags = flags;
78 
79   resize_op->state = xnn_run_state_invalid;
80 
81   *resize_op_out = resize_op;
82   return xnn_status_success;
83 
84 error:
85   xnn_delete_operator(resize_op);
86   return status;
87 }
88 
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)89 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
90     xnn_operator_t resize_op,
91     size_t batch_size,
92     size_t input_height,
93     size_t input_width,
94     size_t output_height,
95     size_t output_width,
96     const float* input,
97     float* output,
98     pthreadpool_t threadpool)
99 {
100   if (resize_op->type != xnn_operator_type_resize_bilinear_nhwc_f32) {
101     xnn_log_error("failed to setup Resize Bilinear (NHWC, F32) operator: operator type mismatch");
102     return xnn_status_invalid_parameter;
103   }
104   resize_op->state = xnn_run_state_invalid;
105 
106   if (!xnn_params.initialized) {
107     xnn_log_error("failed to setup Resize Bilinear operator: XNNPACK is not initialized");
108     return xnn_status_uninitialized;
109   }
110 
111   if (input_width == 0 || input_height == 0) {
112     xnn_log_error(
113       "failed to setup Resize Bilinear operator with %zux%zu input: input dimensions must be non-zero",
114       input_width, input_height);
115     return xnn_status_invalid_parameter;
116   }
117 
118   if (max(input_width, input_height) >= 16777216) {
119     xnn_log_error(
120       "failed to setup Resize Bilinear operator with %zux%zu input: "
121       "input dimensions must be below 2**24",
122       input_width, input_height);
123     return xnn_status_unsupported_parameter;
124   }
125 
126   if (output_width == 0 || output_height == 0) {
127     xnn_log_error(
128       "failed to setup Resize Bilinear operator with %zux%zu output: output dimensions must be non-zero",
129       output_width, output_height);
130     return xnn_status_invalid_parameter;
131   }
132 
133   if (max(output_width, output_height) >= 16777216) {
134     xnn_log_error(
135       "failed to setup Resize Bilinear operator with %zux%zu output: "
136       "output dimensions must be below 2**24",
137       output_width, output_height);
138     return xnn_status_unsupported_parameter;
139   }
140 
141   if (batch_size == 0) {
142     resize_op->state = xnn_run_state_skip;
143     return xnn_status_success;
144   }
145 
146   if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
147     const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
148     const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
149 
150     const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
151     if (indirection_buffer == NULL) {
152       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
153       return xnn_status_out_of_memory;
154     }
155     resize_op->indirection_buffer = indirection_buffer;
156 
157     float* packed_weights = (float*) xnn_reallocate_memory(resize_op->packed_weights, packed_weights_size);
158     if (packed_weights == NULL) {
159       xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
160       return xnn_status_out_of_memory;
161     }
162     resize_op->packed_weights = packed_weights;
163   }
164 
165   const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride * sizeof(float);
166   if (input_height != resize_op->last_input_height ||
167       input_width != resize_op->last_input_width ||
168       output_height != resize_op->last_output_height ||
169       output_width != resize_op->last_output_width)
170   {
171     const uint32_t flags = resize_op->flags;
172     xnn_indirection_init_resize_bilinear2d_f32(
173       input_pixel_stride_in_bytes,
174       input_height, input_width,
175       output_height, output_width,
176       input, resize_op->indirection_buffer, resize_op->packed_weights,
177       !!(flags & XNN_FLAG_ALIGN_CORNERS),
178       !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
179 
180     resize_op->last_input = input;
181     resize_op->last_input_height = input_height;
182     resize_op->last_input_width = input_width;
183     resize_op->last_output_height = output_height;
184     resize_op->last_output_width = output_width;
185   }
186 
187   const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride * sizeof(float);
188   resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
189     .scaled_channels = resize_op->channels * sizeof(float),
190     .indirect_input = resize_op->indirection_buffer,
191     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
192     .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
193     .packed_weights = resize_op->packed_weights,
194     .output = output,
195     .output_pixel_stride = output_pixel_stride_in_bytes,
196     .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
197     .log2_wsize = 3 /* log2(2 * sizeof(float)) */,
198     .ukernel = xnn_params.f32.bilinear.ukernel,
199   };
200 
201   const size_t output_size = output_height * output_width;
202   size_t output_size_tile = output_size;
203   const size_t num_threads = pthreadpool_get_threads_count(threadpool);
204   if (num_threads > 1) {
205     const size_t target_tiles_per_thread = 5;
206     const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
207     if (max_output_size_tile < output_size_tile) {
208       const uint32_t output_size_subtile = xnn_params.f32.bilinear.pixel_tile;
209       output_size_tile =
210         min(output_size_tile,
211           divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
212     }
213   }
214   resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
215   resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
216   resize_op->compute.range[0] = batch_size;
217   resize_op->compute.range[1] = output_size;
218   resize_op->compute.tile[0] = output_size_tile;
219   resize_op->state = xnn_run_state_ready;
220 
221   return xnn_status_success;
222 }
223