• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <math.h>
17 #include "nnacl/fp32/resize_fp32.h"
18 #include "nnacl/common_func.h"
19 #include "nnacl/errorcode.h"
20 
CalculateCoordinate(float out,int in,int * bottom,int * top,float * bottom_weight)21 void CalculateCoordinate(float out, int in, int *bottom, int *top, float *bottom_weight) {
22   *bottom = (int)(floorf(out));
23   *bottom = *bottom >= 0 ? *bottom : 0;  // extrapolate may generate neg value
24   *top = *bottom + 1 < in ? (*bottom + 1) : (in - 1);
25   float top_weight = (float)out - (float)(*bottom);
26   *bottom_weight = 1.0f - top_weight;
27 }
28 
BicubicBaseFunc(float a,const float x,float * weight)29 static void BicubicBaseFunc(float a, const float x, float *weight) {
30   float abs_x = fabsf(x);
31   if (abs_x >= 0 && abs_x <= 1) {
32     *weight = ((a + 2) * abs_x - (a + 3)) * abs_x * abs_x + 1;
33   } else if (abs_x > 1 && abs_x <= 2) {
34     *weight = a * abs_x * abs_x * abs_x - 5 * a * abs_x * abs_x + 8 * a * abs_x - 4 * a;
35   } else {
36     *weight = 0;
37   }
38 }
39 
40 // a is a coefficient
41 // W(x) = { (a + 2) * |x| * |x| * |x| - (a + 3) * |x| * |x| + 1,           for |x| <= 1
42 //        { a * |x| * |x| * |x| - 5 * a * |x| * |x| + 8 * a *|x| - 4 * a,  for 1 < |x| < 2
43 //        { 0,                                                             otherwise
44 // the value of 'a' depends on if is half_pixel_center(the scheme is the same as tf).
45 // If is half pixel mode, a equals to -0.5, otherwise -0.75.
CalculateWeightForBicubic(float out,int in,int * index,float * weights,float a)46 void CalculateWeightForBicubic(float out, int in, int *index, float *weights, float a) {
47   int floor_index = (int)(floorf(out));
48   index[0] = (floor_index - 1) < 0 ? 0 : (floor_index - 1);
49   index[1] = floor_index;
50   index[2] = (floor_index + 1) < in ? (floor_index + 1) : (in - 1);
51   index[3] = (floor_index + 2) < in ? (floor_index + 2) : (in - 1);
52 
53   // get positive value
54   float distance[4] = {-1, 0, 1, 2};
55   float tmp_dis = out - (float)floor_index;
56   distance[0] -= tmp_dis;
57   distance[1] -= tmp_dis;
58   distance[2] -= tmp_dis;
59   distance[3] -= tmp_dis;
60 
61   for (int i = 0; i < 4; ++i) {
62     BicubicBaseFunc(a, distance[i], &weights[i]);
63   }
64 }
65 
PrepareResizeBilinear(const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int * y_bottoms,int * y_tops,int * x_lefts,int * x_rights,float * y_bottom_weights,float * x_left_weights)66 int PrepareResizeBilinear(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
67                           int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights,
68                           float *x_left_weights) {
69   if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
70       x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
71     return NNACL_NULL_PTR;
72   }
73 
74   int in_h = input_shape[1];
75   int in_w = input_shape[2];
76 
77   int new_height = output_shape[1];
78   int new_width = output_shape[2];
79 
80   for (int h = 0; h < new_height; h++) {
81     float actual_y = calculate(h, in_h, new_height);
82     CalculateCoordinate(actual_y, in_h, y_bottoms + h, y_tops + h, y_bottom_weights + h);
83   }
84   for (int w = 0; w < new_width; w++) {
85     float actual_x = calculate(w, in_w, new_width);
86     CalculateCoordinate(actual_x, in_w, x_lefts + w, x_rights + w, x_left_weights + w);
87   }
88   return NNACL_OK;
89 }
90 
PrepareResizeBicubic(const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int * y_tops,int * x_lefts,float * y_weights,float * x_weights,float cubic_coeff)91 int PrepareResizeBicubic(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
92                          int *y_tops, int *x_lefts, float *y_weights, float *x_weights, float cubic_coeff) {
93   if (input_shape == NULL || output_shape == NULL || y_tops == NULL || x_lefts == NULL || y_weights == NULL ||
94       x_weights == NULL) {
95     return NNACL_NULL_PTR;
96   }
97 
98   int in_h = input_shape[1];
99   int in_w = input_shape[2];
100   int new_height = output_shape[1];
101   int new_width = output_shape[2];
102 
103   for (int h = 0; h < new_height; h++) {
104     float actual_y = calculate(h, in_h, new_height);
105     CalculateWeightForBicubic(actual_y, in_h, y_tops + 4 * h, y_weights + 4 * h, cubic_coeff);
106   }
107   for (int w = 0; w < new_width; w++) {
108     float actual_x = calculate(w, in_w, new_width);
109     CalculateWeightForBicubic(actual_x, in_w, x_lefts + 4 * w, x_weights + 4 * w, cubic_coeff);
110   }
111   return NNACL_OK;
112 }
113 
PrepareCropAndResizeBilinear(const int * input_shape,const float * boxes,const int * box_idx,const int * output_shape,int * y_bottoms,int * y_tops,int * x_lefts,int * x_rights,float * y_bottom_weights,float * x_left_weights)114 int PrepareCropAndResizeBilinear(const int *input_shape, const float *boxes, const int *box_idx,
115                                  const int *output_shape, int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights,
116                                  float *y_bottom_weights, float *x_left_weights) {
117   if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
118       x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
119     return NNACL_NULL_PTR;
120   }
121   int in_h = input_shape[1];
122   int in_w = input_shape[2];
123   int batch = output_shape[0];
124   int new_height = output_shape[1];
125   int new_width = output_shape[2];
126   float actual_x;
127   float actual_y;
128 
129   for (int b = 0; b < batch; b++) {
130     const float *box = boxes + b * 4;
131     float start_h = box[0];
132     float end_h = box[2];
133     float start_w = box[1];
134     float end_w = box[3];
135 
136     int *y_bottom = y_bottoms + b * new_height;
137     int *y_top = y_tops + b * new_height;
138     float *y_bottom_weight = y_bottom_weights + b * new_height;
139     int *x_left = x_lefts + b * new_width;
140     int *x_right = x_rights + b * new_width;
141     float *x_left_weight = x_left_weights + b * new_width;
142     for (int h = 0; h < new_height; h++) {
143       if (new_height > 1) {
144         actual_y = start_h * (in_h - 1) + h * (end_h - start_h) * (in_h - 1) / (new_height - 1);
145       } else {
146         actual_y = 0.5 * (end_h + start_h) * (in_h - 1);
147       }
148       CalculateCoordinate(actual_y, in_h, y_bottom + h, y_top + h, y_bottom_weight + h);
149     }
150     for (int w = 0; w < new_width; w++) {
151       if (new_width > 1) {
152         actual_x = start_w * (in_w - 1) + w * (end_w - start_w) * (in_w - 1) / (new_width - 1);
153       } else {
154         actual_x = 0.5 * (end_w + start_w) * (in_w - 1);
155       }
156       CalculateCoordinate(actual_x, in_w, x_left + w, x_right + w, x_left_weight + w);
157     }
158   }
159   return NNACL_OK;
160 }
161 
InterpRow(const float * src_line,float * linear_output,int new_width,const float * x_left_weights,const int * x_lefts,const int * x_rights,int in_c)162 int InterpRow(const float *src_line, float *linear_output, int new_width, const float *x_left_weights,
163               const int *x_lefts, const int *x_rights, int in_c) {
164   int w;
165   for (w = 0; w < new_width; w++) {
166     int c = 0;
167 #if defined(ENABLE_AVX)
168     MS_FLOAT32X8 left_w_8 = MS_MOV256_F32(x_left_weights[w]);
169     MS_FLOAT32X8 right_w_8 = MS_MOV256_F32(1.0f - x_left_weights[w]);
170     for (; c <= in_c - C8NUM; c += C8NUM) {
171       MS_FLOAT32X8 left = MS_LD256_F32(src_line + x_lefts[w] * in_c + c);
172       MS_FLOAT32X8 right = MS_LD256_F32(src_line + x_rights[w] * in_c + c);
173       MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(left, left_w_8), MS_MUL256_F32(right, right_w_8));
174       MS_ST256_F32(linear_output + w * in_c + c, interp_value);
175     }
176 #endif
177 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
178     MS_FLOAT32X4 left_w = MS_MOVQ_F32(x_left_weights[w]);
179     MS_FLOAT32X4 right_w = MS_MOVQ_F32(1.0f - x_left_weights[w]);
180     for (; c <= in_c - C4NUM; c += C4NUM) {
181       MS_FLOAT32X4 left = MS_LDQ_F32(src_line + x_lefts[w] * in_c + c);
182       MS_FLOAT32X4 right = MS_LDQ_F32(src_line + x_rights[w] * in_c + c);
183       MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(left, left_w), MS_MULQ_F32(right, right_w));
184       MS_STQ_F32(linear_output + w * in_c + c, interp_value);
185     }
186 #endif
187     int left_w_offset = x_lefts[w] * in_c;
188     int right_w_offset = x_rights[w] * in_c;
189     for (; c < in_c; c++) {
190       float left = src_line[left_w_offset + c];
191       float right = src_line[right_w_offset + c];
192       linear_output[w * in_c + c] = left * x_left_weights[w] + right * (1.0f - x_left_weights[w]);
193     }
194   }
195   return 0;
196 }
197 
InterpCol(const float * bottom_line,const float * top_line,float * output,int new_width,float y_bottom_weight,int in_c)198 int InterpCol(const float *bottom_line, const float *top_line, float *output, int new_width, float y_bottom_weight,
199               int in_c) {
200   int w;
201   for (w = 0; w < new_width; w++) {
202     int c = 0;
203 #if defined(ENABLE_AVX)
204     MS_FLOAT32X8 bottom_w_8 = MS_MOV256_F32(y_bottom_weight);
205     MS_FLOAT32X8 top_w_8 = MS_MOV256_F32(1.0f - y_bottom_weight);
206     for (; c <= in_c - C8NUM; c += C8NUM) {
207       MS_FLOAT32X8 bottom = MS_LD256_F32(bottom_line + w * in_c + c);
208       MS_FLOAT32X8 top = MS_LD256_F32(top_line + w * in_c + c);
209       MS_FLOAT32X8 interp_value = MS_ADD256_F32(MS_MUL256_F32(bottom, bottom_w_8), MS_MUL256_F32(top, top_w_8));
210       MS_ST256_F32(output + w * in_c + c, interp_value);
211     }
212 #endif
213 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
214     MS_FLOAT32X4 bottom_w = MS_MOVQ_F32(y_bottom_weight);
215     MS_FLOAT32X4 top_w = MS_MOVQ_F32(1.0f - y_bottom_weight);
216     for (; c <= in_c - C4NUM; c += C4NUM) {
217       MS_FLOAT32X4 bottom = MS_LDQ_F32(bottom_line + w * in_c + c);
218       MS_FLOAT32X4 top = MS_LDQ_F32(top_line + w * in_c + c);
219       MS_FLOAT32X4 interp_value = MS_ADDQ_F32(MS_MULQ_F32(bottom, bottom_w), MS_MULQ_F32(top, top_w));
220       MS_STQ_F32(output + w * in_c + c, interp_value);
221     }
222 #endif
223     for (; c < in_c; c++) {
224       float bottom = bottom_line[w * in_c + c];
225       float top = top_line[w * in_c + c];
226       output[w * in_c + c] = bottom * y_bottom_weight + top * (1.0f - y_bottom_weight);
227     }
228   }
229   return 0;
230 }
231 
Bilinear(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_bottom,const int * y_top,const int * x_left,const int * x_right,const float * y_bottom_weight,const float * x_left_weight,float * line0,float * line1,const int h_begin,const int h_end)232 void Bilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
233               const int *y_bottom, const int *y_top, const int *x_left, const int *x_right,
234               const float *y_bottom_weight, const float *x_left_weight, float *line0, float *line1, const int h_begin,
235               const int h_end) {
236   int in_w = input_shape[2];
237   int in_c = input_shape[3];
238   int new_width = output_shape[2];
239   int h_stride = new_width * in_c;
240 
241   bool cache_line_used[2] = {false, false};
242   int cache_line_num[2] = {-1, -1};
243   float *const cache_line_ptr[2] = {line0, line1};
244   float *current_line_ptr[2] = {line0, line1};
245   int current_line_num[2] = {-1, -1};
246 
247   for (int h = h_begin; h < h_end; h++) {
248     current_line_num[0] = y_bottom[h];
249     current_line_num[1] = y_top[h];
250 
251     for (int i = 0; i < 2; i++) {
252       cache_line_used[i] = false;
253     }
254     // search if we cached
255     for (int j = 0; j < 2; j++) {
256       bool find = false;
257       for (int k = 0; k < 2; k++) {
258         if (current_line_num[j] == cache_line_num[k]) {
259           cache_line_used[k] = true;
260           current_line_ptr[j] = cache_line_ptr[k];
261           find = true;
262           break;
263         }
264       }
265 
266       if (!find) {
267         const float *line = input_data + current_line_num[j] * in_w * in_c;
268         for (int k = 0; k < 2; k++) {
269           if (!cache_line_used[k]) {
270             cache_line_num[k] = current_line_num[j];
271             cache_line_used[k] = true;
272             current_line_ptr[j] = cache_line_ptr[k];
273             InterpRow(line, current_line_ptr[j], new_width, x_left_weight, x_left, x_right, in_c);
274             break;
275           }
276         }
277       }
278     }
279     // do col interp
280     InterpCol(current_line_ptr[0], current_line_ptr[1], output_data + h * h_stride, new_width, y_bottom_weight[h],
281               in_c);
282   }
283 }
284 
ResizeBilinear(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_bottoms,const int * y_tops,const int * x_lefts,const int * x_rights,const float * y_bottom_weights,const float * x_left_weights,float * line0,float * line1,const int h_begin,const int h_end)285 int ResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
286                    const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
287                    const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
288                    const int h_begin, const int h_end) {
289   if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL ||
290       y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_bottom_weights == NULL || x_left_weights == NULL) {
291     return NNACL_NULL_PTR;
292   }
293 
294   int in_b = input_shape[0];
295   int in_h = input_shape[1];
296   int in_w = input_shape[2];
297   int in_c = input_shape[3];
298   int new_height = output_shape[1];
299   int new_width = output_shape[2];
300 
301   for (int b = 0; b < in_b; b++) {
302     const float *input = input_data + b * in_h * in_w * in_c;
303     float *output = output_data + b * new_height * new_width * in_c;
304     Bilinear(input, output, input_shape, output_shape, y_bottoms, y_tops, x_lefts, x_rights, y_bottom_weights,
305              x_left_weights, line0, line1, h_begin, h_end);
306   }
307   return NNACL_OK;
308 }
309 
BicubicInterpRow(const float * src,float * dst,const float * weights,const int * lefts,int width,int channel)310 void BicubicInterpRow(const float *src, float *dst, const float *weights, const int *lefts, int width, int channel) {
311   for (int w = 0; w < width; w++) {
312     const float *weight = weights + 4 * w;
313     float *dst_w = dst + w * channel;
314     const float *src0_w = src + lefts[4 * w] * channel;
315     const float *src1_w = src + lefts[4 * w + 1] * channel;
316     const float *src2_w = src + lefts[4 * w + 2] * channel;
317     const float *src3_w = src + lefts[4 * w + 3] * channel;
318     int c = 0;
319 #if defined(ENABLE_AVX)
320     MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weight[0]);
321     MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weight[1]);
322     MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weight[2]);
323     MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weight[3]);
324     for (; c <= channel - C8NUM; c += C8NUM) {
325       MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
326       MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
327       MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
328       MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
329       MS_FLOAT32X8 dst0 = MS_MUL256_F32(src0_vec, weight0_vec_8);
330       MS_FLOAT32X8 dst1 = MS_MUL256_F32(src1_vec, weight1_vec_8);
331       MS_FLOAT32X8 dst2 = MS_MUL256_F32(src2_vec, weight2_vec_8);
332       MS_FLOAT32X8 dst3 = MS_MUL256_F32(src3_vec, weight3_vec_8);
333       MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst3, MS_ADD256_F32(dst2, MS_ADD256_F32(dst1, dst0)));
334       MS_ST256_F32(dst_w + c, interp_value);
335     }
336 #endif
337 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
338     MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weight[0]);
339     MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weight[1]);
340     MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weight[2]);
341     MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weight[3]);
342     for (; c <= channel - C4NUM; c += C4NUM) {
343       MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
344       MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
345       MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
346       MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
347       MS_FLOAT32X4 dst0 = MS_MULQ_F32(src0_vec, weight0_vec);
348       MS_FLOAT32X4 dst1 = MS_MULQ_F32(src1_vec, weight1_vec);
349       MS_FLOAT32X4 dst2 = MS_MULQ_F32(src2_vec, weight2_vec);
350       MS_FLOAT32X4 dst3 = MS_MULQ_F32(src3_vec, weight3_vec);
351       MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst2, MS_ADDQ_F32(dst1, dst0)));
352       MS_STQ_F32(dst_w + c, interp_value);
353     }
354 #endif
355     for (; c < channel; c++) {
356       dst_w[c] = src0_w[c] * weight[0] + src1_w[c] * weight[1] + src2_w[c] * weight[2] + src3_w[c] * weight[3];
357     }
358   }
359 }
360 
BicubicInterpCol(const float * src,float * dst,const float * weights,int width,int channel)361 void BicubicInterpCol(const float *src, float *dst, const float *weights, int width, int channel) {
362   const float *src0 = src;
363   const float *src1 = src + width * channel;
364   const float *src2 = src + 2 * width * channel;
365   const float *src3 = src + 3 * width * channel;
366   for (int w = 0; w < width; w++) {
367     float *dst_w = dst + w * channel;
368     const float *src0_w = src0 + w * channel;
369     const float *src1_w = src1 + w * channel;
370     const float *src2_w = src2 + w * channel;
371     const float *src3_w = src3 + w * channel;
372     int c = 0;
373 #ifdef ENABLE_AVX
374     MS_FLOAT32X8 weight0_vec_8 = MS_MOV256_F32(weights[0]);
375     MS_FLOAT32X8 weight1_vec_8 = MS_MOV256_F32(weights[1]);
376     MS_FLOAT32X8 weight2_vec_8 = MS_MOV256_F32(weights[2]);
377     MS_FLOAT32X8 weight3_vec_8 = MS_MOV256_F32(weights[3]);
378     for (; c <= channel - C8NUM; c += C8NUM) {
379       MS_FLOAT32X8 src0_vec = MS_LD256_F32(src0_w + c);
380       MS_FLOAT32X8 src1_vec = MS_LD256_F32(src1_w + c);
381       MS_FLOAT32X8 src2_vec = MS_LD256_F32(src2_w + c);
382       MS_FLOAT32X8 src3_vec = MS_LD256_F32(src3_w + c);
383       MS_FLOAT32X8 dst1 = MS_MUL256_F32(src0_vec, weight0_vec_8);
384       MS_FLOAT32X8 dst2 = MS_MUL256_F32(src1_vec, weight1_vec_8);
385       MS_FLOAT32X8 dst3 = MS_MUL256_F32(src2_vec, weight2_vec_8);
386       MS_FLOAT32X8 dst4 = MS_MUL256_F32(src3_vec, weight3_vec_8);
387       MS_FLOAT32X8 interp_value = MS_ADD256_F32(dst4, MS_ADD256_F32(dst3, MS_ADD256_F32(dst1, dst2)));
388       MS_ST256_F32(dst_w + c, interp_value);
389     }
390 #endif
391 #if defined(ENABLE_NEON) || defined(ENABLE_SSE)
392     MS_FLOAT32X4 weight0_vec = MS_MOVQ_F32(weights[0]);
393     MS_FLOAT32X4 weight1_vec = MS_MOVQ_F32(weights[1]);
394     MS_FLOAT32X4 weight2_vec = MS_MOVQ_F32(weights[2]);
395     MS_FLOAT32X4 weight3_vec = MS_MOVQ_F32(weights[3]);
396     for (; c <= channel - C4NUM; c += C4NUM) {
397       MS_FLOAT32X4 src0_vec = MS_LDQ_F32(src0_w + c);
398       MS_FLOAT32X4 src1_vec = MS_LDQ_F32(src1_w + c);
399       MS_FLOAT32X4 src2_vec = MS_LDQ_F32(src2_w + c);
400       MS_FLOAT32X4 src3_vec = MS_LDQ_F32(src3_w + c);
401       MS_FLOAT32X4 dst1 = MS_MULQ_F32(src0_vec, weight0_vec);
402       MS_FLOAT32X4 dst2 = MS_MULQ_F32(src1_vec, weight1_vec);
403       MS_FLOAT32X4 dst3 = MS_MULQ_F32(src2_vec, weight2_vec);
404       MS_FLOAT32X4 dst4 = MS_MULQ_F32(src3_vec, weight3_vec);
405       MS_FLOAT32X4 interp_value = MS_ADDQ_F32(dst4, MS_ADDQ_F32(dst3, MS_ADDQ_F32(dst1, dst2)));
406       MS_STQ_F32(dst_w + c, interp_value);
407     }
408 #endif
409     for (; c < channel; c++) {
410       dst_w[c] = src0_w[c] * weights[0] + src1_w[c] * weights[1] + src2_w[c] * weights[2] + src3_w[c] * weights[3];
411     }
412   }
413 }
414 
Bicubic(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_tops,const int * x_lefts,const float * y_weights,const float * x_weights,float * line_buffer,const int h_begin,const int h_end)415 void Bicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
416              const int *y_tops, const int *x_lefts, const float *y_weights, const float *x_weights, float *line_buffer,
417              const int h_begin, const int h_end) {
418   int in_w = input_shape[2];
419   int in_c = input_shape[3];
420   int new_width = output_shape[2];
421   int h_stride = new_width * in_c;
422 
423   for (int h = h_begin; h < h_end; h++) {
424     for (int i = 0; i < 4; ++i) {
425       BicubicInterpRow(input_data + y_tops[4 * h + i] * in_w * in_c, line_buffer + i * h_stride, x_weights, x_lefts,
426                        new_width, in_c);
427     }
428     BicubicInterpCol(line_buffer, output_data + h * h_stride, y_weights + 4 * h, new_width, in_c);
429   }
430 }
431 
ResizeBicubic(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,const int * y_tops,const int * x_lefts,const float * y_weights,const float * x_weights,float * line_buffer,const int h_begin,const int h_end)432 int ResizeBicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
433                   const int *y_tops, const int *x_lefts, const float *y_weights, const float *x_weights,
434                   float *line_buffer, const int h_begin, const int h_end) {
435   if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_tops == NULL ||
436       x_lefts == NULL || y_weights == NULL || x_weights == NULL) {
437     return NNACL_NULL_PTR;
438   }
439   int input_cube_per_batch = input_shape[1] * input_shape[2] * input_shape[3];
440   int output_cube_per_batch = output_shape[1] * output_shape[2] * input_shape[3];
441   for (int b = 0; b < input_shape[0]; b++) {
442     const float *input = input_data + b * input_cube_per_batch;
443     float *output = output_data + b * output_cube_per_batch;
444     Bicubic(input, output, input_shape, output_shape, y_tops, x_lefts, y_weights, x_weights, line_buffer, h_begin,
445             h_end);
446   }
447   return NNACL_OK;
448 }
449 
RewriteExtrapolationValue(const float * input_data,float * output_data,const int * box_idx,const float * boxes,const CropAndResizeParameter * param,const int * input_shape,const int * output_shape,const int * y_tops,const int h_begin,const int h_end)450 int RewriteExtrapolationValue(const float *input_data, float *output_data, const int *box_idx, const float *boxes,
451                               const CropAndResizeParameter *param, const int *input_shape, const int *output_shape,
452                               const int *y_tops, const int h_begin, const int h_end) {
453   if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
454       output_shape == NULL) {
455     return NNACL_NULL_PTR;
456   }
457   int batch = output_shape[0];
458   int new_height = output_shape[1];
459   int new_width = output_shape[2];
460   int new_channel = output_shape[3];
461   int input_h = input_shape[1];
462   int input_w = input_shape[2];
463 
464   for (int b = 0; b < batch; b++) {
465     float *output = output_data + b * new_height * new_width * new_channel;
466     const float extrapolation_value = param->extrapolation_value_;
467     const float *box = boxes + 4 * b;
468     float start_h = box[0];
469     float end_h = box[2];
470     float start_w = box[1];
471     float end_w = box[3];
472     float actual_y, actual_x;
473     for (int h = h_begin; h < h_end; ++h) {
474       if (new_height > 1) {
475         actual_y = start_h * (input_h - 1) + h * (end_h - start_h) * (input_h - 1) / (new_height - 1);
476       } else {
477         actual_y = 0.5 * (end_h + start_h) * (input_h - 1);
478       }
479       if (actual_y < 0 || actual_y > input_h - 1) {
480         float *output_data_base = output + h * new_width * new_channel;
481         for (int x = 0; x < new_width; ++x) {
482           for (int d = 0; d < new_channel; ++d) {
483             *output_data_base = extrapolation_value;
484             output_data_base++;
485           }
486         }
487       }
488       for (int w = 0; w < new_width; ++w) {
489         if (new_width > 1) {
490           actual_x = start_w * (input_w - 1) + w * (end_w - start_w) * (input_w - 1) / (new_width - 1);
491         } else {
492           actual_x = 0.5 * (end_w + start_w) * (input_w - 1);
493         }
494         if (actual_x < 0 || actual_x > input_w - 1) {
495           float *output_data_base = output + h * new_width * new_channel + w * new_channel;
496           for (int d = 0; d < new_channel; ++d) {
497             output_data_base[d] = extrapolation_value;
498           }
499         }
500       }
501     }
502   }
503   return NNACL_OK;
504 }
505 
CropAndResizeBilinear(const float * input_data,float * output_data,const int * box_idx,const float * boxes,const CropAndResizeParameter * param,const int * input_shape,const int * output_shape,const int * y_bottoms,const int * y_tops,const int * x_lefts,const int * x_rights,const float * y_bottom_weights,const float * x_left_weights,float * line0,float * line1,const int h_begin,const int h_end)506 int CropAndResizeBilinear(const float *input_data, float *output_data, const int *box_idx, const float *boxes,
507                           const CropAndResizeParameter *param, const int *input_shape, const int *output_shape,
508                           const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
509                           const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
510                           const int h_begin, const int h_end) {
511   if (input_data == NULL || output_data == NULL || box_idx == NULL || param == NULL || input_shape == NULL ||
512       output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL || x_rights == NULL ||
513       y_bottom_weights == NULL || x_left_weights == NULL) {
514     return NNACL_NULL_PTR;
515   }
516   int batch = output_shape[0];
517   int new_height = output_shape[1];
518   int new_width = output_shape[2];
519   int new_channel = output_shape[3];
520   int input_h = input_shape[1];
521   int input_w = input_shape[2];
522 
523   for (int b = 0; b < batch; b++) {
524     const float *cur_img = input_data + box_idx[b] * input_h * input_w * new_channel;
525     const int *y_bottom = y_bottoms + b * new_height;
526     const int *y_top = y_tops + b * new_height;
527     const float *y_bottom_weight = y_bottom_weights + b * new_height;
528     const int *x_left = x_lefts + b * new_width;
529     const int *x_right = x_rights + b * new_width;
530     const float *x_left_weight = x_left_weights + b * new_width;
531     float *output = output_data + b * new_height * new_width * new_channel;
532 
533     Bilinear(cur_img, output, input_shape, output_shape, y_bottom, y_top, x_left, x_right, y_bottom_weight,
534              x_left_weight, line0, line1, h_begin, h_end);
535   }
536   RewriteExtrapolationValue(input_data, output_data, box_idx, boxes, param, input_shape, output_shape, y_tops, h_begin,
537                             h_end);
538   return NNACL_OK;
539 }
540 
ResizeNearestNeighbor(const float * input_data,float * output_data,const int * input_shape,const int * output_shape,CalculateOriginalCoordinate calculate,int coordinate_transform_mode,int tid,int thread_num)541 int ResizeNearestNeighbor(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
542                           CalculateOriginalCoordinate calculate, int coordinate_transform_mode, int tid,
543                           int thread_num) {
544   if (thread_num == 0) {
545     return NNACL_PARAM_INVALID;
546   }
547   int c = input_shape[3];
548   bool align_corners = coordinate_transform_mode == 1;
549   for (int batch = 0; batch < output_shape[0]; batch++) {
550     for (int y = tid; y < output_shape[1]; y += thread_num) {
551       float actual_y = calculate(y, input_shape[1], output_shape[1]);
552       int input_y;
553       if (align_corners) {
554         input_y = (int)(roundf(actual_y));
555       } else {
556         input_y = (int)(floorf(actual_y));
557       }
558       for (int x = 0; x < output_shape[2]; x++) {
559         float actual_x = calculate(x, input_shape[2], output_shape[2]);
560         int input_x;
561         if (align_corners) {
562           input_x = (int)(roundf(actual_x));
563         } else {
564           input_x = (int)(floorf(actual_x));
565         }
566         int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
567         int out_offset = Offset(output_shape, batch, y, x, 0);
568         memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float));
569       }
570     }
571   }
572   return NNACL_OK;
573 }
574 
CalculateAsymmetric(int x_resized,int length_original,int length_resized)575 float CalculateAsymmetric(int x_resized, int length_original, int length_resized) {
576   float scale = (float)(length_resized) / (float)(length_original);
577   return (float)(x_resized) / scale;
578 }
579 
CalculateAlignCorners(int x_resized,int length_original,int length_resized)580 float CalculateAlignCorners(int x_resized, int length_original, int length_resized) {
581   float scale = (float)(length_resized - 1) / (float)(length_original - 1);
582   return (float)(x_resized) / scale;
583 }
584 
CalculateHalfPixel(int x_resized,int length_original,int length_resized)585 float CalculateHalfPixel(int x_resized, int length_original, int length_resized) {
586   float scale = (float)(length_resized) / (float)(length_original);
587   float actual = (float)(x_resized + 0.5) / scale - 0.5;
588   return actual > 0 ? actual : 0;
589 }
590