1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <math.h>
17 #include "nnacl/fp32/resize_fp32.h"
18 #include "nnacl/common_func.h"
19 #include "nnacl/errorcode.h"
20
CalculateCoordinate(float out,int in,int * bottom,int * top,float * bottom_weight)21 void CalculateCoordinate(float out, int in, int *bottom, int *top, float *bottom_weight) {
22 *bottom = (int)(floorf(out));
23 *bottom = *bottom >= 0 ? *bottom : 0; // extrapolate may generate neg value
24 *top = *bottom + 1 < in ? (*bottom + 1) : (in - 1);
25 float top_weight = (float)out - (float)(*bottom);
26 *bottom_weight = 1.0f - top_weight;
27 }
28
BicubicBaseFunc(float a,const float x,float * weight)29 static void BicubicBaseFunc(float a, const float x, float *weight) {
30 float abs_x = fabsf(x);
31 if (abs_x >= 0 && abs_x <= 1) {
32 *weight = ((a + 2) * abs_x - (a + 3)) * abs_x * abs_x + 1;
33 } else if (abs_x > 1 && abs_x <= 2) {
34 *weight = a * abs_x * abs_x * abs_x - 5 * a * abs_x * abs_x + 8 * a * abs_x - 4 * a;
35 } else {
36 *weight = 0;
37 }
38 }
39
40 // a is a coefficient
41 // W(x) = { (a + 2) * |x| * |x| * |x| - (a + 3) * |x| * |x| + 1, for |x| <= 1
42 // { a * |x| * |x| * |x| - 5 * a * |x| * |x| + 8 * a *|x| - 4 * a, for 1 < |x| < 2
43 // { 0, otherwise
44 // the value of 'a' depends on if is half_pixel_center(the scheme is the same as tf).
45 // If is half pixel mode, a equals to -0.5, otherwise -0.75.
CalculateWeightForBicubic(float out,int in,int * index,float * weights,float a)46 void CalculateWeightForBicubic(float out, int in, int *index, float *weights, float a) {
47 int floor_index = (int)(floorf(out));
48 index[0] = (floor_index - 1) < 0 ? 0 : (floor_index - 1);
49 index[1] = floor_index;
50 index[2] = (floor_index + 1) < in ? (floor_index + 1) : (in - 1);
51 index[3] = (floor_index + 2) < in ? (floor_index + 2) : (in - 1);
52
53 // get positive value
54 float distance[4] = {-1, 0, 1, 2};
55 float tmp_dis = out - (float)floor_index;
56 distance[0] -= tmp_dis;
57 distance[1] -= tmp_dis;
58 distance[2] -= tmp_dis;
59 distance[3] -= tmp_dis;
60
61 for (int i = 0; i < 4; ++i) {
62 BicubicBaseFunc(a, distance[i], &weights[i]);
63 }
64 }
65
PrepareResizeBilinear(const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int * y_bottoms,int * y_tops,int * x_lefts,int * x_rights,float * y_bottom_weights,float * x_left_weights)66 int PrepareResizeBilinear(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
67 int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights,
68 float *x_left_weights) {
69 if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
70 x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
71 return NNACL_NULL_PTR;
72 }
73
74 int in_h = input_shape[1];
75 int in_w = input_shape[2];
76
77 int new_height = output_shape[1];
78 int new_width = output_shape[2];
79
80 for (int h = 0; h < new_height; h++) {
81 float actual_y = calculate(h, in_h, new_height);
82 CalculateCoordinate(actual_y, in_h, y_bottoms + h, y_tops + h, y_bottom_weights + h);
83 }
84 for (int w = 0; w < new_width; w++) {
85 float actual_x = calculate(w, in_w, new_width);
86 CalculateCoordinate(actual_x, in_w, x_lefts + w, x_rights + w, x_left_weights + w);
87 }
88 return NNACL_OK;
89 }
90
PrepareResizeBicubic(const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int * y_tops,int * x_lefts,float * y_weights,float * x_weights,float cubic_coeff)91 int PrepareResizeBicubic(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
92 int *y_tops, int *x_lefts, float *y_weights, float *x_weights, float cubic_coeff) {
93 if (input_shape == NULL || output_shape == NULL || y_tops == NULL || x_lefts == NULL || y_weights == NULL ||
94 x_weights == NULL) {
95 return NNACL_NULL_PTR;
96 }
97
98 int in_h = input_shape[1];
99 int in_w = input_shape[2];
100 int new_height = output_shape[1];
101 int new_width = output_shape[2];
102
103 for (int h = 0; h < new_height; h++) {
104 float actual_y = calculate(h, in_h, new_height);
105 CalculateWeightForBicubic(actual_y, in_h, y_tops + 4 * h, y_weights + 4 * h, cubic_coeff);
106 }
107 for (int w = 0; w < new_width; w++) {
108 float actual_x = calculate(w, in_w, new_width);
109 CalculateWeightForBicubic(actual_x, in_w, x_lefts + 4 * w, x_weights + 4 * w, cubic_coeff);
110 }
111 return NNACL_OK;
112 }
113
PrepareCropAndResizeBilinear(const int * input_shape,const float * boxes,const int * box_idx,const int * output_shape,int * y_bottoms,int * y_tops,int * x_lefts,int * x_rights,float * y_bottom_weights,float * x_left_weights)114 int PrepareCropAndResizeBilinear(const int *input_shape, const float *boxes, const int *box_idx,
115 const int *output_shape, int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights,
116 float *y_bottom_weights, float *x_left_weights) {
117 if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
118 x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
119 return NNACL_NULL_PTR;
120 }
121 int in_h = input_shape[1];
122 int in_w = input_shape[2];
123 int batch = output_shape[0];
124 int new_height = output_shape[1];
125 int new_width = output_shape[2];
126 float actual_x;
127 float actual_y;
128
129 for (int b = 0; b < batch; b++) {
130 const float *box = boxes + b * 4;
131 float start_h = box[0];
132 float end_h = box[2];
133 float start_w = box[1];
134 float end_w = box[3];
135
136 int *y_bottom = y_bottoms + b * new_height;
137 int *y_top = y_tops + b * new_height;
138 float *y_bottom_weight = y_bottom_weights + b * new_height;
139 int *x_left = x_lefts + b * new_width;
140 int *x_right = x_rights + b * new_width;
141 float *x_left_weight = x_left_weights + b * new_width;
142 for (int h = 0; h < new_height; h++) {
143 if (new_height > 1) {
144 actual_y = start_h * (in_h - 1) + h * (end_h - start_h) * (in_h - 1) / (new_height - 1);
145 } else {
146 actual_y = 0.5 * (end_h + start_h) * (in_h - 1);
147 }
148 CalculateCoordinate(actual_y, in_h, y_bottom + h, y_top + h, y_bottom_weight + h);
149 }
150 for (int w = 0; w < new_width; w++) {
151 if (new_width > 1) {
152 actual_x = start_w * (in_w - 1) + w * (end_w - start_w) * (in_w - 1) / (new_width - 1);
153 } else {
154 actual_x = 0.5 * (end_w + start_w) * (in_w - 1);
155 }
156 CalculateCoordinate(actual_x, in_w, x_left + w, x_right + w, x_left_weight + w);
157 }
158 }
159 return NNACL_OK;
160 }
161
InterpRow(const float * src_line,float * linear_output,int new_width,const float * x_left_weights,const int * x_lefts,const int * x_rights,int in_c)162 int InterpRow(const float *src_line, float *linear_output, int new_width, const float *x_left_weights,
163 const int *x_lefts, const int *x_rights, int in_c) {
164 int w;
165 for (w = 0; w < new_width; w++) {
166 int c = 0;
167 #if defined(ENABLE_AVX)
168 MS_FLOAT32X8 left_w_8 = MS_MOV256_F32(x_left_weights[w]);
169 MS_FLOAT32X8 right_w_8 = MS_MOV256_F32(1.0f - x_left_weights[w]);
170 for (; c <= in_c - C8NUM; c += C8NUM) {
171 MS_FLOAT32X8 left = MS_LD256_F32(src_line + x_lefts[w] * in_c + c);
172 MS_FLOAT32X8 right = MS_LD256_F32(src_line + x_rights[w] * in_c + c);
173 MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, right_w_8));
174 MS_ST256_F32(linear_output + w * in_c + c, interp_value);
175 }
176 #endif
177 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
178 MS_FLOAT32X4 left_w = MS_MOVQ_F32(x_left_weights[w]);
179 MS_FLOAT32X4 right_w = MS_MOVQ_F32(1.0f - x_left_weights[w]);
180 for (; c <= in_c - C4NUM; c += C4NUM) {
181 MS_FLOAT32X4 left = MS_LDQ_F32(src_line + x_lefts[w] * in_c + c);
182 MS_FLOAT32X4 right = MS_LDQ_F32(src_line + x_rights[w] * in_c + c);
183 MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(left, left_w), MS_MULQ_F32(right, right_w));
184 MS_STQ_F32(linear_output + w * in_c + c, interp_value);
185 }
186 #endif
187 int left_w_offset = x_lefts[w] * in_c;
188 int right_w_offset = x_rights[w] * in_c;
189 for (; c < in_c; c++) {
190 float left = src_line[left_w_offset + c];
191 float right = src_line[right_w_offset + c];
192 linear_output[w * in_c + c] = left * x_left_weights[w] + right * (1.0f - x_left_weights[w]);
193 }
194 }
195 return 0;
196 }
197
InterpCol(const float * bottom_line,const float * top_line,float * output,int new_width,float y_bottom_weight,int in_c)198 int InterpCol(const float *bottom_line, const float *top_line, float *output, int new_width, float y_bottom_weight,
199 int in_c) {
200 int w;
201 for (w = 0; w < new_width; w++) {
202 int c = 0;
203 #if defined(ENABLE_AVX)
204 MS_FLOAT32X8 bottom_w_8 = MS_MOV256_F32(y_bottom_weight);
205 MS_FLOAT32X8 top_w_8 = MS_MOV256_F32(1.0f - y_bottom_weight);
206 for (; c <= in_c - C8NUM; c += C8NUM) {
207 MS_FLOAT32X8 bottom = MS_LD256_F32(bottom_line + w * in_c + c);
208 MS_FLOAT32X8 top = MS_LD256_F32(top_line + w * in_c + c);
209 MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, top_w_8));
210 MS_ST256_F32(output + w * in_c + c, interp_value);
211 }
212 #endif
213 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
214 MS_FLOAT32X4 bottom_w = MS_MOVQ_F32(y_bottom_weight);
215 MS_FLOAT32X4 top_w = MS_MOVQ_F32(1.0f - y_bottom_weight);
216 for (; c <= in_c - C4NUM; c += C4NUM) {
217 MS_FLOAT32X4 bottom = MS_LDQ_F32(bottom_line + w * in_c + c);
218 MS_FLOAT32X4 top = MS_LDQ_F32(top_line + w * in_c + c);
219 MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(bottom, bottom_w), MS_MULQ_F32(top, top_w));
220 MS_STQ_F32(output + w * in_c + c, interp_value);
221 }
222 #endif
223 for (; c < in_c; c++) {
224 float bottom = bottom_line[w * in_c + c];
225 float top = top_line[w * in_c + c];
226 output[w * in_c + c] = bottom * y_bottom_weight + top * (1.0f - y_bottom_weight);
227 }
228 }
229 return 0;
230 }
231
Bilinear(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_bottom,const int * y_top,const int * x_left,const int * x_right,const float * y_bottom_weight,const float * x_left_weight,float * line0,float * line1,const int h_begin,const int h_end)232 void Bilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
233 const int *y_bottom, const int *y_top, const int *x_left, const int *x_right,
234 const float *y_bottom_weight, const float *x_left_weight, float *line0, float *line1, const int h_begin,
235 const int h_end) {
236 int in_w = input_shape[2];
237 int in_c = input_shape[3];
238 int new_width = output_shape[2];
239 int h_stride = new_width * in_c;
240
241 bool cache_line_used[2] = {false, false};
242 int cache_line_num[2] = {-1, -1};
243 float *const cache_line_ptr[2] = {line0, line1};
244 float *current_line_ptr[2] = {line0, line1};
245 int current_line_num[2] = {-1, -1};
246
247 for (int h = h_begin; h < h_end; h++) {
248 current_line_num[0] = y_bottom[h];
249 current_line_num[1] = y_top[h];
250
251 for (int i = 0; i < 2; i++) {
252 cache_line_used[i] = false;
253 }
254 // search if we cached
255 for (int j = 0; j < 2; j++) {
256 bool find = false;
257 for (int k = 0; k < 2; k++) {
258 if (current_line_num[j] == cache_line_num[k]) {
259 cache_line_used[k] = true;
260 current_line_ptr[j] = cache_line_ptr[k];
261 find = true;
262 break;
263 }
264 }
265
266 if (!find) {
267 const float *line = input_data + current_line_num[j] * in_w * in_c;
268 for (int k = 0; k < 2; k++) {
269 if (!cache_line_used[k]) {
270 cache_line_num[k] = current_line_num[j];
271 cache_line_used[k] = true;
272 current_line_ptr[j] = cache_line_ptr[k];
273 InterpRow(line, current_line_ptr[j], new_width, x_left_weight, x_left, x_right, in_c);
274 break;
275 }
276 }
277 }
278 }
279 // do col interp
280 InterpCol(current_line_ptr[0], current_line_ptr[1], output_data + h * h_stride, new_width, y_bottom_weight[h],
281 in_c);
282 }
283 }
284
ResizeBilinear(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_bottoms,const int * y_tops,const int * x_lefts,const int * x_rights,const float * y_bottom_weights,const float * x_left_weights,float * line0,float * line1,const int h_begin,const int h_end)285 int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
286 const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
287 const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
288 const int h_begin, const int h_end) {
289 if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL ||
290 y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
291 return NNACL_NULL_PTR;
292 }
293
294 int in_b = input_shape[0];
295 int in_h = input_shape[1];
296 int in_w = input_shape[2];
297 int in_c = input_shape[3];
298 int new_height = output_shape[1];
299 int new_width = output_shape[2];
300
301 for (int b = 0; b < in_b; b++) {
302 const float *input = input_data + b * in_h * in_w * in_c;
303 float *output = output_data + b * new_height * new_width * in_c;
304 Bilinear(input, output, input_shape, output_shape, y_bottoms, y_tops, x_lefts, x_rights, y_bottom_weights,
305 x_left_weights, line0, line1, h_begin, h_end);
306 }
307 return NNACL_OK;
308 }
309
BicubicInterpRow(const float * src,float * dst,const float * weights,const int * lefts,int width,int channel)310 void BicubicInterpRow(const float *src, float *dst, const float *weights, const int *lefts, int width, int channel) {
311 for (int w = 0; w < width; w++) {
312 const float *weight = weights + 4 * w;
313 float *dst_w = dst + w * channel;
314 const float *src0_w = src + lefts[4 * w] * channel;
315 const float *src1_w = src + lefts[4 * w + 1] * channel;
316 const float *src2_w = src + lefts[4 * w + 2] * channel;
317 const float *src3_w = src + lefts[4 * w + 3] * channel;
318 int c = 0;
319 #if defined(ENABLE_AVX)
320 MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weight[0]);
321 MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weight[1]);
322 MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weight[2]);
323 MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weight[3]);
324 for (; c <= channel - C8NUM; c += C8NUM) {
325 MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
326 MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
327 MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
328 MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
329 MS_FLOAT32X8 dst0 = MS_MUL256_F32(src0_vec, weight0_vec_8);
330 MS_FLOAT32X8 dst1 = MS_MUL256_F32(src1_vec, weight1_vec_8);
331 MS_FLOAT32X8 dst2 = MS_MUL256_F32(src2_vec, weight2_vec_8);
332 MS_FLOAT32X8 dst3 = MS_MUL256_F32(src3_vec, weight3_vec_8);
333 MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst3, MS_ADD256_F32(dst2, MS_ADD256_F32(dst1, dst0)));
334 MS_ST256_F32(dst_w + c, interp_value);
335 }
336 #endif
337 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
338 MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weight[0]);
339 MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weight[1]);
340 MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weight[2]);
341 MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weight[3]);
342 for (; c <= channel - C4NUM; c += C4NUM) {
343 MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
344 MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
345 MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
346 MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
347 MS_FLOAT32X4 dst0 = MS_MULQ_F32(src0_vec, weight0_vec);
348 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src1_vec, weight1_vec);
349 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src2_vec, weight2_vec);
350 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src3_vec, weight3_vec);
351 MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst2, MS_ADDQ_F32(dst1, dst0)));
352 MS_STQ_F32(dst_w + c, interp_value);
353 }
354 #endif
355 for (; c < channel; c++) {
356 dst_w[c] = src0_w[c] * weight[0] + src1_w[c] * weight[1] + src2_w[c] * weight[2] + src3_w[c] * weight[3];
357 }
358 }
359 }
360
BicubicInterpCol(const float * src,float * dst,const float * weights,int width,int channel)361 void BicubicInterpCol(const float *src, float *dst, const float *weights, int width, int channel) {
362 const float *src0 = src;
363 const float *src1 = src + width * channel;
364 const float *src2 = src + 2 * width * channel;
365 const float *src3 = src + 3 * width * channel;
366 for (int w = 0; w < width; w++) {
367 float *dst_w = dst + w * channel;
368 const float *src0_w = src0 + w * channel;
369 const float *src1_w = src1 + w * channel;
370 const float *src2_w = src2 + w * channel;
371 const float *src3_w = src3 + w * channel;
372 int c = 0;
373 #ifdef ENABLE_AVX
374 MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weights[0]);
375 MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weights[1]);
376 MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weights[2]);
377 MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weights[3]);
378 for (; c <= channel - C8NUM; c += C8NUM) {
379 MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
380 MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
381 MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
382 MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
383 MS_FLOAT32X8 dst1 = MS_MUL256_F32(src0_vec, weight0_vec_8);
384 MS_FLOAT32X8 dst2 = MS_MUL256_F32(src1_vec, weight1_vec_8);
385 MS_FLOAT32X8 dst3 = MS_MUL256_F32(src2_vec, weight2_vec_8);
386 MS_FLOAT32X8 dst4 = MS_MUL256_F32(src3_vec, weight3_vec_8);
387 MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst4, MS_ADD256_F32(dst3, MS_ADD256_F32(dst1, dst2)));
388 MS_ST256_F32(dst_w + c, interp_value);
389 }
390 #endif
391 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
392 MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weights[0]);
393 MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weights[1]);
394 MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weights[2]);
395 MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weights[3]);
396 for (; c <= channel - C4NUM; c += C4NUM) {
397 MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
398 MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
399 MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
400 MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
401 MS_FLOAT32X4 dst1 = MS_MULQ_F32(src0_vec, weight0_vec);
402 MS_FLOAT32X4 dst2 = MS_MULQ_F32(src1_vec, weight1_vec);
403 MS_FLOAT32X4 dst3 = MS_MULQ_F32(src2_vec, weight2_vec);
404 MS_FLOAT32X4 dst4 = MS_MULQ_F32(src3_vec, weight3_vec);
405 MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst4, MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst1, dst2)));
406 MS_STQ_F32(dst_w + c, interp_value);
407 }
408 #endif
409 for (; c < channel; c++) {
410 dst_w[c] = src0_w[c] * weights[0] + src1_w[c] * weights[1] + src2_w[c] * weights[2] + src3_w[c] * weights[3];
411 }
412 }
413 }
414
Bicubic(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_tops,const int * x_lefts,const float * y_weights,const float * x_weights,float * line_buffer,const int h_begin,const int h_end)415 void Bicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
416 const int *y_tops, const int *x_lefts, const float *y_weights, const float *x_weights, float *line_buffer,
417 const int h_begin, const int h_end) {
418 int in_w = input_shape[2];
419 int in_c = input_shape[3];
420 int new_width = output_shape[2];
421 int h_stride = new_width * in_c;
422
423 for (int h = h_begin; h < h_end; h++) {
424 for (int i = 0; i < 4; ++i) {
425 BicubicInterpRow(input_data + y_tops[4 * h + i] * in_w * in_c, line_buffer + i * h_stride, x_weights, x_lefts,
426 new_width, in_c);
427 }
428 BicubicInterpCol(line_buffer, output_data + h * h_stride, y_weights + 4 * h, new_width, in_c);
429 }
430 }
431
ResizeBicubic(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_tops,const int * x_lefts,const float * y_weights,const float * x_weights,float * line_buffer,const int h_begin,const int h_end)432 int ResizeBicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
433 const int *y_tops, const int *x_lefts, const float *y_weights, const float *x_weights,
434 float *line_buffer, const int h_begin, const int h_end) {
435 if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_tops == NULL ||
436 x_lefts == NULL || y_weights == NULL || x_weights == NULL) {
437 return NNACL_NULL_PTR;
438 }
439 int input_cube_per_batch = input_shape[1] * input_shape[2] * input_shape[3];
440 int output_cube_per_batch = output_shape[1] * output_shape[2] * input_shape[3];
441 for (int b = 0; b < input_shape[0]; b++) {
442 const float *input = input_data + b * input_cube_per_batch;
443 float *output = output_data + b * output_cube_per_batch;
444 Bicubic(input, output, input_shape, output_shape, y_tops, x_lefts, y_weights, x_weights, line_buffer, h_begin,
445 h_end);
446 }
447 return NNACL_OK;
448 }
449
RewriteExtrapolationValue(const float * input_data,float * output_data,const int * box_idx,const float * boxes,const CropAndResizeParameter * param,const int * input_shape,const int * output_shape,const int * y_tops,const int h_begin,const int h_end)450 int RewriteExtrapolationValue(const float *input_data, float *output_data, const int *box_idx, const float *boxes,
451 const CropAndResizeParameter *param, const int *input_shape, const int *output_shape,
452 const int *y_tops, const int h_begin, const int h_end) {
453 if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
454 output_shape == NULL) {
455 return NNACL_NULL_PTR;
456 }
457 int batch = output_shape[0];
458 int new_height = output_shape[1];
459 int new_width = output_shape[2];
460 int new_channel = output_shape[3];
461 int input_h = input_shape[1];
462 int input_w = input_shape[2];
463
464 for (int b = 0; b < batch; b++) {
465 float *output = output_data + b * new_height * new_width * new_channel;
466 const float extrapolation_value = param->extrapolation_value_;
467 const float *box = boxes + 4 * b;
468 float start_h = box[0];
469 float end_h = box[2];
470 float start_w = box[1];
471 float end_w = box[3];
472 float actual_y, actual_x;
473 for (int h = h_begin; h < h_end; ++h) {
474 if (new_height > 1) {
475 actual_y = start_h * (input_h - 1) + h * (end_h - start_h) * (input_h - 1) / (new_height - 1);
476 } else {
477 actual_y = 0.5 * (end_h + start_h) * (input_h - 1);
478 }
479 if (actual_y < 0 || actual_y > input_h - 1) {
480 float *output_data_base = output + h * new_width * new_channel;
481 for (int x = 0; x < new_width; ++x) {
482 for (int d = 0; d < new_channel; ++d) {
483 *output_data_base = extrapolation_value;
484 output_data_base++;
485 }
486 }
487 }
488 for (int w = 0; w < new_width; ++w) {
489 if (new_width > 1) {
490 actual_x = start_w * (input_w - 1) + w * (end_w - start_w) * (input_w - 1) / (new_width - 1);
491 } else {
492 actual_x = 0.5 * (end_w + start_w) * (input_w - 1);
493 }
494 if (actual_x < 0 || actual_x > input_w - 1) {
495 float *output_data_base = output + h * new_width * new_channel + w * new_channel;
496 for (int d = 0; d < new_channel; ++d) {
497 output_data_base[d] = extrapolation_value;
498 }
499 }
500 }
501 }
502 }
503 return NNACL_OK;
504 }
505
CropAndResizeBilinear(const float * input_data,float * output_data,const int * box_idx,const float * boxes,const CropAndResizeParameter * param,const int * input_shape,const int * output_shape,const int * y_bottoms,const int * y_tops,const int * x_lefts,const int * x_rights,const float * y_bottom_weights,const float * x_left_weights,float * line0,float * line1,const int h_begin,const int h_end)506 int CropAndResizeBilinear(const float *input_data, float *output_data, const int *box_idx, const float *boxes,
507 const CropAndResizeParameter *param, const int *input_shape, const int *output_shape,
508 const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
509 const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
510 const int h_begin, const int h_end) {
511 if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
512 output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL || x_rights == NULL ||
513 y_bottom_weights == NULL || x_left_weights == NULL) {
514 return NNACL_NULL_PTR;
515 }
516 int batch = output_shape[0];
517 int new_height = output_shape[1];
518 int new_width = output_shape[2];
519 int new_channel = output_shape[3];
520 int input_h = input_shape[1];
521 int input_w = input_shape[2];
522
523 for (int b = 0; b < batch; b++) {
524 const float *cur_img = input_data + box_idx[b] * input_h * input_w * new_channel;
525 const int *y_bottom = y_bottoms + b * new_height;
526 const int *y_top = y_tops + b * new_height;
527 const float *y_bottom_weight = y_bottom_weights + b * new_height;
528 const int *x_left = x_lefts + b * new_width;
529 const int *x_right = x_rights + b * new_width;
530 const float *x_left_weight = x_left_weights + b * new_width;
531 float *output = output_data + b * new_height * new_width * new_channel;
532
533 Bilinear(cur_img, output, input_shape, output_shape, y_bottom, y_top, x_left, x_right, y_bottom_weight,
534 x_left_weight, line0, line1, h_begin, h_end);
535 }
536 RewriteExtrapolationValue(input_data, output_data, box_idx, boxes, param, input_shape, output_shape, y_tops, h_begin,
537 h_end);
538 return NNACL_OK;
539 }
540
ResizeNearestNeighbor(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int coordinate_transform_mode,int tid,int thread_num)541 int ResizeNearestNeighbor(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
542 CalculateOriginalCoordinate calculate, int coordinate_transform_mode, int tid,
543 int thread_num) {
544 if (thread_num == 0) {
545 return NNACL_PARAM_INVALID;
546 }
547 int c = input_shape[3];
548 bool align_corners = coordinate_transform_mode == 1;
549 for (int batch = 0; batch < output_shape[0]; batch++) {
550 for (int y = tid; y < output_shape[1]; y += thread_num) {
551 float actual_y = calculate(y, input_shape[1], output_shape[1]);
552 int input_y;
553 if (align_corners) {
554 input_y = (int)(roundf(actual_y));
555 } else {
556 input_y = (int)(floorf(actual_y));
557 }
558 for (int x = 0; x < output_shape[2]; x++) {
559 float actual_x = calculate(x, input_shape[2], output_shape[2]);
560 int input_x;
561 if (align_corners) {
562 input_x = (int)(roundf(actual_x));
563 } else {
564 input_x = (int)(floorf(actual_x));
565 }
566 int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
567 int out_offset = Offset(output_shape, batch, y, x, 0);
568 memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float));
569 }
570 }
571 }
572 return NNACL_OK;
573 }
574
CalculateAsymmetric(int x_resized,int length_original,int length_resized)575 float CalculateAsymmetric(int x_resized, int length_original, int length_resized) {
576 float scale = (float)(length_resized) / (float)(length_original);
577 return (float)(x_resized) / scale;
578 }
579
CalculateAlignCorners(int x_resized,int length_original,int length_resized)580 float CalculateAlignCorners(int x_resized, int length_original, int length_resized) {
581 float scale = (float)(length_resized - 1) / (float)(length_original - 1);
582 return (float)(x_resized) / scale;
583 }
584
CalculateHalfPixel(int x_resized,int length_original,int length_resized)585 float CalculateHalfPixel(int x_resized, int length_original, int length_resized) {
586 float scale = (float)(length_resized) / (float)(length_original);
587 float actual = (float)(x_resized + 0.5) / scale - 0.5;
588 return actual > 0 ? actual : 0;
589 }
590