1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stdbool.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/operator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/common.h>
19 #include <xnnpack/math.h>
20 #include <xnnpack/params.h>
21 #include <xnnpack/indirection.h>
22
xnn_create_resize_bilinear2d_nhwc_f32(size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,xnn_operator_t * resize_op_out)23 enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
24 size_t channels,
25 size_t input_pixel_stride,
26 size_t output_pixel_stride,
27 uint32_t flags,
28 xnn_operator_t* resize_op_out)
29 {
30 xnn_operator_t resize_op = NULL;
31 enum xnn_status status = xnn_status_uninitialized;
32
33 if (!xnn_params.initialized) {
34 xnn_log_error("failed to create Resize Bilinear operator: XNNPACK is not initialized");
35 goto error;
36 }
37
38 status = xnn_status_invalid_parameter;
39
40 if (channels == 0) {
41 xnn_log_error(
42 "failed to create Resize Bilinear operator with %zu channels: number of channels must be non-zero",
43 channels);
44 goto error;
45 }
46
47 if (input_pixel_stride < channels) {
48 xnn_log_error(
49 "failed to create Resize Bilinear operator with input pixel stride of %zu: "
50 "stride must be at least as large as the number of channels (%zu)",
51 input_pixel_stride, channels);
52 goto error;
53 }
54
55 if (output_pixel_stride < channels) {
56 xnn_log_error(
57 "failed to create Resize Bilinear operator with output pixel stride of %zu: "
58 "stride must be at least as large as the number of channels (%zu)",
59 output_pixel_stride, channels);
60 goto error;
61 }
62
63 status = xnn_status_out_of_memory;
64
65 resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
66 if (resize_op == NULL) {
67 xnn_log_error("failed to allocate %zu bytes for Resize Bilinear operator descriptor", sizeof(struct xnn_operator));
68 goto error;
69 }
70
71 resize_op->channels = channels;
72 resize_op->input_pixel_stride = input_pixel_stride;
73 resize_op->output_pixel_stride = output_pixel_stride;
74
75 resize_op->type = xnn_operator_type_resize_bilinear_nhwc_f32;
76 resize_op->ukernel.type = xnn_ukernel_type_unpooling;
77 resize_op->flags = flags;
78
79 resize_op->state = xnn_run_state_invalid;
80
81 *resize_op_out = resize_op;
82 return xnn_status_success;
83
84 error:
85 xnn_delete_operator(resize_op);
86 return status;
87 }
88
xnn_setup_resize_bilinear2d_nhwc_f32(xnn_operator_t resize_op,size_t batch_size,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const float * input,float * output,pthreadpool_t threadpool)89 enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
90 xnn_operator_t resize_op,
91 size_t batch_size,
92 size_t input_height,
93 size_t input_width,
94 size_t output_height,
95 size_t output_width,
96 const float* input,
97 float* output,
98 pthreadpool_t threadpool)
99 {
100 if (resize_op->type != xnn_operator_type_resize_bilinear_nhwc_f32) {
101 xnn_log_error("failed to setup Resize Bilinear (NHWC, F32) operator: operator type mismatch");
102 return xnn_status_invalid_parameter;
103 }
104 resize_op->state = xnn_run_state_invalid;
105
106 if (!xnn_params.initialized) {
107 xnn_log_error("failed to setup Resize Bilinear operator: XNNPACK is not initialized");
108 return xnn_status_uninitialized;
109 }
110
111 if (input_width == 0 || input_height == 0) {
112 xnn_log_error(
113 "failed to setup Resize Bilinear operator with %zux%zu input: input dimensions must be non-zero",
114 input_width, input_height);
115 return xnn_status_invalid_parameter;
116 }
117
118 if (max(input_width, input_height) >= 16777216) {
119 xnn_log_error(
120 "failed to setup Resize Bilinear operator with %zux%zu input: "
121 "input dimensions must be below 2**24",
122 input_width, input_height);
123 return xnn_status_unsupported_parameter;
124 }
125
126 if (output_width == 0 || output_height == 0) {
127 xnn_log_error(
128 "failed to setup Resize Bilinear operator with %zux%zu output: output dimensions must be non-zero",
129 output_width, output_height);
130 return xnn_status_invalid_parameter;
131 }
132
133 if (max(output_width, output_height) >= 16777216) {
134 xnn_log_error(
135 "failed to setup Resize Bilinear operator with %zux%zu output: "
136 "output dimensions must be below 2**24",
137 output_width, output_height);
138 return xnn_status_unsupported_parameter;
139 }
140
141 if (batch_size == 0) {
142 resize_op->state = xnn_run_state_skip;
143 return xnn_status_success;
144 }
145
146 if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
147 const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
148 const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
149
150 const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
151 if (indirection_buffer == NULL) {
152 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
153 return xnn_status_out_of_memory;
154 }
155 resize_op->indirection_buffer = indirection_buffer;
156
157 float* packed_weights = (float*) xnn_reallocate_memory(resize_op->packed_weights, packed_weights_size);
158 if (packed_weights == NULL) {
159 xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
160 return xnn_status_out_of_memory;
161 }
162 resize_op->packed_weights = packed_weights;
163 }
164
165 const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride * sizeof(float);
166 if (input_height != resize_op->last_input_height ||
167 input_width != resize_op->last_input_width ||
168 output_height != resize_op->last_output_height ||
169 output_width != resize_op->last_output_width)
170 {
171 const uint32_t flags = resize_op->flags;
172 xnn_indirection_init_resize_bilinear2d_f32(
173 input_pixel_stride_in_bytes,
174 input_height, input_width,
175 output_height, output_width,
176 input, resize_op->indirection_buffer, resize_op->packed_weights,
177 !!(flags & XNN_FLAG_ALIGN_CORNERS),
178 !!(flags & XNN_FLAG_TENSORFLOW_LEGACY_MODE));
179
180 resize_op->last_input = input;
181 resize_op->last_input_height = input_height;
182 resize_op->last_input_width = input_width;
183 resize_op->last_output_height = output_height;
184 resize_op->last_output_width = output_width;
185 }
186
187 const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride * sizeof(float);
188 resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
189 .scaled_channels = resize_op->channels * sizeof(float),
190 .indirect_input = resize_op->indirection_buffer,
191 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
192 .input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
193 .packed_weights = resize_op->packed_weights,
194 .output = output,
195 .output_pixel_stride = output_pixel_stride_in_bytes,
196 .output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
197 .log2_wsize = 3 /* log2(2 * sizeof(float)) */,
198 .ukernel = xnn_params.f32.bilinear.ukernel,
199 };
200
201 const size_t output_size = output_height * output_width;
202 size_t output_size_tile = output_size;
203 const size_t num_threads = pthreadpool_get_threads_count(threadpool);
204 if (num_threads > 1) {
205 const size_t target_tiles_per_thread = 5;
206 const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
207 if (max_output_size_tile < output_size_tile) {
208 const uint32_t output_size_subtile = xnn_params.f32.bilinear.pixel_tile;
209 output_size_tile =
210 min(output_size_tile,
211 divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
212 }
213 }
214 resize_op->compute.type = xnn_parallelization_type_2d_tile_1d;
215 resize_op->compute.task_2d_tile_1d = (pthreadpool_task_2d_tile_1d_t) xnn_compute_resize_bilinear;
216 resize_op->compute.range[0] = batch_size;
217 resize_op->compute.range[1] = output_size;
218 resize_op->compute.tile[0] = output_size_tile;
219 resize_op->state = xnn_run_state_ready;
220
221 return xnn_status_success;
222 }
223