• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/fp32/arithmetic_fp32.h"
18 #include <math.h>
19 #include "nnacl/arithmetic_fp32_simd.h"
20 
21 #define ACCURACY_DATA 0.00000001
22 
ElementFloorMod(const float * in0,const float * in1,float * out,int size)23 int ElementFloorMod(const float *in0, const float *in1, float *out, int size) {
24   int i = 0;
25 
26   SIMD_RUN_X86_NO_SCALAR(ElementFloorMod, i, in0, in1, out, size);  // neon no floor instruction
27 
28   for (; i < size; i++) {
29     out[i] = in0[i] - floorf(in0[i] / in1[i]) * in1[i];
30   }
31   return NNACL_OK;
32 }
33 
ElementOptFloorMod(const float * in0,const float * in1,float * out,int size,bool first_scalar)34 int ElementOptFloorMod(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
35   int i = 0;
36 
37   if (first_scalar) {
38     SIMD_RUN_X86_NO_SCALAR(ElementOptFloorModNum0, i, in0, in1, out, size);  // neon no floor instruction
39     for (; i < size; i++) {
40       out[i] = in0[0] - floorf(in0[0] / in1[i]) * in1[i];
41     }
42   } else {
43     SIMD_RUN_X86_NO_SCALAR(ElementOptFloorModNum1, i, in0, in1, out, size);  // neon no floor instruction
44     for (; i < size; i++) {
45       out[i] = in0[i] - floorf(in0[i] / in1[0]) * in1[0];
46     }
47   }
48 
49   return NNACL_OK;
50 }
51 
ElementFloorModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)52 int ElementFloorModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
53   for (int i = 0; i < size; i++) {
54     NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
55     int remainder = in0[i] - (in0[i] / in1[i]) * in1[i];
56     out[i] = (remainder != 0) && ((in0[i] > 0) != (in1[i] > 0)) ? remainder + in1[i] : remainder;
57   }
58   return NNACL_OK;
59 }
60 
ElementOptFloorModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)61 int ElementOptFloorModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
62   int i = 0;
63   if (first_scalar) {
64     for (; i < size; i++) {
65       NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
66       int remainder = in0[0] - (in0[0] / in1[i]) * in1[i];
67       out[i] = (remainder != 0) && ((in0[0] > 0) != (in1[i] > 0)) ? remainder + in1[i] : remainder;
68     }
69   } else {
70     NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
71     for (; i < size; i++) {
72       int remainder = in0[i] - (in0[i] / in1[0]) * in1[0];
73       out[i] = (remainder != 0) && ((in0[i] > 0) != (in1[0] > 0)) ? remainder + in1[0] : remainder;
74     }
75   }
76 
77   return NNACL_OK;
78 }
79 
ElementMod(const float * in0,const float * in1,float * out,int size)80 int ElementMod(const float *in0, const float *in1, float *out, int size) {
81   for (int i = 0; i < size; i++) {
82     out[i] = fmodf(in0[i], in1[i]);
83   }
84   return NNACL_OK;
85 }
86 
ElementOptMod(const float * in0,const float * in1,float * out,int size,bool first_scalar)87 int ElementOptMod(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
88   int index = 0;
89   if (first_scalar) {
90     for (; index < size; index++) {
91       out[index] = fmodf(in0[0], in1[index]);
92     }
93   } else {
94     for (; index < size; index++) {
95       out[index] = fmodf(in0[index], in1[0]);
96     }
97   }
98   return NNACL_OK;
99 }
100 
ElementModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)101 int ElementModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
102   for (int i = 0; i < size; i++) {
103     NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
104     out[i] = in0[i] % in1[i];
105   }
106   return NNACL_OK;
107 }
108 
ElementOptModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)109 int ElementOptModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
110   if (first_scalar) {
111     for (int index = 0; index < size; index++) {
112       NNACL_CHECK_ZERO_RETURN_ERR(in1[index]);
113       out[index] = in0[0] % in1[index];
114     }
115   } else {
116     NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
117     for (int index = 0; index < size; index++) {
118       out[index] = in0[index] % in1[0];
119     }
120   }
121   return NNACL_OK;
122 }
123 
ElementFloorDiv(const float * in0,const float * in1,float * out,int size)124 int ElementFloorDiv(const float *in0, const float *in1, float *out, int size) {
125   int i = 0;
126 
127   SIMD_RUN_X86_NO_SCALAR(ElementFloorDiv, i, in0, in1, out, size);  // neon no floor instruction
128 
129   for (; i < size; i++) {
130     out[i] = floorf(in0[i] / in1[i]);
131   }
132   return NNACL_OK;
133 }
134 
ElementOptFloorDiv(const float * in0,const float * in1,float * out,int size,bool first_scalar)135 int ElementOptFloorDiv(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
136   int i = 0;
137 
138   if (first_scalar) {
139     SIMD_RUN_X86_NO_SCALAR(ElementOptFloorDivNum0, i, in0, in1, out, size);  // neon no floor instruction
140 
141     for (; i < size; i++) {
142       out[i] = floorf(in0[0] / in1[i]);
143     }
144   } else {
145     SIMD_RUN_X86_NO_SCALAR(ElementOptFloorDivNum1, i, in0, in1, out, size);  // neon no floor instruction
146 
147     for (; i < size; i++) {
148       out[i] = floorf(in0[i] / in1[0]);
149     }
150   }
151 
152   return NNACL_OK;
153 }
154 
ElementFloorDivInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)155 int ElementFloorDivInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
156   int i = 0;
157 
158   SIMD_RUN_NO_SCALAR(ElementFloorDivInt, i, in0, in1, out, size);
159 
160   for (; i < size; i++) {
161     NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
162     out[i] = in0[i] / in1[i];
163   }
164   return NNACL_OK;
165 }
166 
ElementOptFloorDivInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)167 int ElementOptFloorDivInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
168   int i = 0;
169   if (first_scalar) {
170     SIMD_RUN_NO_SCALAR(ElementOptFloorDivIntNum0, i, in0, in1, out, size);
171 
172     for (; i < size; i++) {
173       NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
174       out[i] = in0[0] / in1[i];
175     }
176   } else {
177     NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
178 
179     SIMD_RUN_NO_SCALAR(ElementOptFloorDivIntNum1, i, in0, in1, out, size);
180 
181     for (; i < size; i++) {
182       out[i] = in0[i] / in1[0];
183     }
184   }
185 
186   return NNACL_OK;
187 }
188 
ElementLogicalAnd(const float * in0,const float * in1,float * out,int size)189 int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size) {
190   int index = 0;
191 
192   SIMD_RUN_NO_SCALAR(ElementLogicalAnd, index, in0, in1, out, size);
193   for (; index < size; index++) {
194     out[index] = (float)((bool)(in0[index]) & (bool)(in1[index]));
195   }
196   return NNACL_OK;
197 }
198 
ElementOptLogicalAnd(const float * in0,const float * in1,float * out,int size,bool first_scalar)199 int ElementOptLogicalAnd(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
200   int index = 0;
201   SIMD_RUN_NO_SCALAR(ElementOptLogicalAnd, index, in0, in1, out, size, first_scalar);
202   if (first_scalar) {
203     for (; index < size; index++) {
204       out[index] = (float)((bool)(in0[0]) & (bool)(in1[index]));
205     }
206   } else {
207     for (; index < size; index++) {
208       out[index] = (float)((bool)(in0[index]) & (bool)(in1[0]));
209     }
210   }
211 
212   return NNACL_OK;
213 }
214 
ElementLogicalAndInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)215 int ElementLogicalAndInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
216   int index = 0;
217   for (; index < size; index++) {
218     out[index] = (int)((unsigned int)(in0[index]) & (unsigned int)(in1[index]));
219   }
220   return NNACL_OK;
221 }
222 
ElementOptLogicalAndInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)223 int ElementOptLogicalAndInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
224   int index = 0;
225   if (first_scalar) {
226     for (; index < size; index++) {
227       out[index] = (int)((unsigned int)(in0[0]) & (unsigned int)(in1[index]));
228     }
229   } else {
230     for (; index < size; index++) {
231       out[index] = (int)((unsigned int)(in0[index]) & (unsigned int)(in1[0]));
232     }
233   }
234 
235   return NNACL_OK;
236 }
237 
ElementLogicalAndBool(const bool * in0,const bool * in1,bool * out,int size)238 int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size) {
239   int index = 0;
240   for (; index < size; index++) {
241     out[index] = (bool)((unsigned int)(in0[index]) & (unsigned int)(in1[index]));
242   }
243 
244   return NNACL_OK;
245 }
246 
ElementOptLogicalAndBool(const bool * in0,const bool * in1,bool * out,int size,bool first_scalar)247 int ElementOptLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size, bool first_scalar) {
248   int index = 0;
249   if (first_scalar) {
250     for (; index < size; index++) {
251       out[index] = (bool)((unsigned int)(in0[0]) & (unsigned int)(in1[index]));
252     }
253   } else {
254     for (; index < size; index++) {
255       out[index] = (bool)((unsigned int)(in0[index]) & (unsigned int)(in1[0]));
256     }
257   }
258 
259   return NNACL_OK;
260 }
261 
ElementLogicalOr(const float * in0,const float * in1,float * out,int size)262 int ElementLogicalOr(const float *in0, const float *in1, float *out, int size) {
263   int index = 0;
264 #ifdef ENABLE_NEON
265   float32x4_t vtrue = vdupq_n_f32(1);
266   float32x4_t vfalse = vdupq_n_f32(0);
267   uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
268   uint32x4_t zeros = vdupq_n_u32(0);
269   for (; index <= size - 4; index += C4NUM) {
270     uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask);
271     uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask);
272     float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue);
273     vst1q_f32(out + index, vout);
274   }
275 #endif
276   for (; index < size; index++) {
277     out[index] = (float)((bool)(in0[index]) | (bool)(in1[index]));
278   }
279   return NNACL_OK;
280 }
281 
ElementOptLogicalOr(const float * in0,const float * in1,float * out,int size,bool first_scalar)282 int ElementOptLogicalOr(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
283   int index = 0;
284   if (first_scalar) {
285     for (; index < size; index++) {
286       out[index] = (float)((bool)(in0[0]) | (bool)(in1[index]));
287     }
288   } else {
289     for (; index < size; index++) {
290       out[index] = (float)((bool)(in0[index]) | (bool)(in1[0]));
291     }
292   }
293 
294   return NNACL_OK;
295 }
296 
ElementLogicalOrBool(const bool * in0,const bool * in1,bool * out,int size)297 int ElementLogicalOrBool(const bool *in0, const bool *in1, bool *out, int size) {
298   int index = 0;
299   for (; index < size; index++) {
300     out[index] = (bool)(in0[index] | in1[index]);
301   }
302   return NNACL_OK;
303 }
304 
ElementOptLogicalOrBool(const bool * in0,const bool * in1,bool * out,int size,bool first_scalar)305 int ElementOptLogicalOrBool(const bool *in0, const bool *in1, bool *out, int size, bool first_scalar) {
306   int index = 0;
307   if (first_scalar) {
308     for (; index < size; index++) {
309       out[index] = (bool)(in0[0] | in1[index]);
310     }
311   } else {
312     for (; index < size; index++) {
313       out[index] = (bool)(in0[index] | in1[0]);
314     }
315   }
316 
317   return NNACL_OK;
318 }
319 
ElementMaximum(const float * in0,const float * in1,float * out,int size)320 int ElementMaximum(const float *in0, const float *in1, float *out, int size) {
321   int index = 0;
322 
323   SIMD_RUN_NO_SCALAR(ElementMaximum, index, in0, in1, out, size);
324 
325   for (; index < size; index++) {
326     out[index] = in0[index] > in1[index] ? in0[index] : in1[index];
327   }
328   return NNACL_OK;
329 }
330 
ElementOptMaximum(const float * in0,const float * in1,float * out,int size,bool first_scalar)331 int ElementOptMaximum(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
332   int index = 0;
333 
334   if (first_scalar) {
335     SIMD_RUN_NO_SCALAR(ElementOptMaximumNum0, index, in0, in1, out, size);
336 
337     for (; index < size; index++) {
338       out[index] = in0[0] > in1[index] ? in0[0] : in1[index];
339     }
340   } else {
341     SIMD_RUN_NO_SCALAR(ElementOptMaximumNum1, index, in0, in1, out, size);
342 
343     for (; index < size; index++) {
344       out[index] = in0[index] > in1[0] ? in0[index] : in1[0];
345     }
346   }
347 
348   return NNACL_OK;
349 }
350 
ElementMaximumInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)351 int ElementMaximumInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
352   int index = 0;
353 
354   SIMD_RUN_NO_SCALAR(ElementMaximumInt, index, in0, in1, out, size);
355 
356   for (; index < size; index++) {
357     out[index] = in0[index] > in1[index] ? in0[index] : in1[index];
358   }
359   return NNACL_OK;
360 }
361 
ElementOptMaximumInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)362 int ElementOptMaximumInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
363   int index = 0;
364   if (first_scalar) {
365     SIMD_RUN_NO_SCALAR(ElementOptMaximumIntNum0, index, in0, in1, out, size);
366 
367     for (; index < size; index++) {
368       out[index] = in0[0] > in1[index] ? in0[0] : in1[index];
369     }
370   } else {
371     SIMD_RUN_NO_SCALAR(ElementOptMaximumIntNum1, index, in0, in1, out, size);
372 
373     for (; index < size; index++) {
374       out[index] = in0[index] > in1[0] ? in0[index] : in1[0];
375     }
376   }
377 
378   return NNACL_OK;
379 }
380 
ElementMinimumInt(const int32_t * input0,const int32_t * input1,int32_t * output,int size)381 int ElementMinimumInt(const int32_t *input0, const int32_t *input1, int32_t *output, int size) {
382   int index = 0;
383 
384   SIMD_RUN_NO_SCALAR(ElementMinimumInt, index, input0, input1, output, size);
385 
386   for (; index < size; index++) {
387     output[index] = input0[index] > input1[index] ? input1[index] : input0[index];
388   }
389   return NNACL_OK;
390 }
391 
ElementOptMinimumInt(const int32_t * input0,const int32_t * input1,int32_t * output,int size,bool first_scalar)392 int ElementOptMinimumInt(const int32_t *input0, const int32_t *input1, int32_t *output, int size, bool first_scalar) {
393   int index = 0;
394   if (first_scalar) {
395     SIMD_RUN_NO_SCALAR(ElementOptMinimumIntNum0, index, input0, input1, output, size);
396 
397     for (; index < size; index++) {
398       output[index] = input0[0] > input1[index] ? input1[index] : input0[0];
399     }
400   } else {
401     SIMD_RUN_NO_SCALAR(ElementOptMinimumIntNum1, index, input0, input1, output, size);
402 
403     for (; index < size; index++) {
404       output[index] = input0[index] > input1[0] ? input1[0] : input0[index];
405     }
406   }
407 
408   return NNACL_OK;
409 }
410 
ElementMinimum(const float * in0,const float * in1,float * out,int size)411 int ElementMinimum(const float *in0, const float *in1, float *out, int size) {
412   int index = 0;
413 
414   SIMD_RUN_NO_SCALAR(ElementMinimum, index, in0, in1, out, size);
415 
416   for (; index < size; index++) {
417     out[index] = in0[index] > in1[index] ? in1[index] : in0[index];
418   }
419   return NNACL_OK;
420 }
421 
ElementOptMinimum(const float * in0,const float * in1,float * out,int size,bool first_scalar)422 int ElementOptMinimum(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
423   int index = 0;
424   if (first_scalar) {
425     SIMD_RUN_NO_SCALAR(ElementOptMinimumNum0, index, in0, in1, out, size);
426 
427     for (; index < size; index++) {
428       out[index] = in0[0] > in1[index] ? in1[index] : in0[0];
429     }
430   } else {
431     SIMD_RUN_NO_SCALAR(ElementOptMinimumNum1, index, in0, in1, out, size);
432 
433     for (; index < size; index++) {
434       out[index] = in0[index] > in1[0] ? in1[0] : in0[index];
435     }
436   }
437 
438   return NNACL_OK;
439 }
440 
441 #undef ACCURACY_DATA
442 
TileOneDimensionFp32(const void * inPtr,void * outPtr,int dim,size_t ndim,const int32_t * inShape,const int32_t * inStrides,const int32_t * outStrides,const int32_t * multiple)443 void TileOneDimensionFp32(const void *inPtr, void *outPtr, int dim, size_t ndim, const int32_t *inShape,
444                           const int32_t *inStrides, const int32_t *outStrides, const int32_t *multiple) {
445   const float *inData = (const float *)inPtr;
446   float *outData = (float *)outPtr;
447 
448   int srcDimSize = inShape[dim];
449   if (dim == ndim - 1) {
450     for (int i = 0; i < multiple[dim]; i++) {
451       memcpy(outData, inData, srcDimSize * sizeof(float));
452       outData += srcDimSize;
453     }
454     return;
455   }
456   for (size_t i = 0; i < srcDimSize; i++) {
457     for (size_t j = 0; j < multiple[dim]; j++) {
458       TileOneDimensionFp32(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim,
459                            inShape, inStrides, outStrides, multiple);
460     }
461   }
462 }
463 
TileDimensionsFp32(const float * data0,const float * data1,float * tile_data0,float * tile_data1,ArithmeticParameter * param)464 void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
465                         ArithmeticParameter *param) {
466   CalcMultiplesAndStrides(param);
467   TileOneDimensionFp32(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
468                        param->multiples0_);
469   TileOneDimensionFp32(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_,
470                        param->multiples1_);
471 }
472 
AssignSubOpt(float * in0,const float * in1,size_t size)473 void AssignSubOpt(float *in0, const float *in1, size_t size) {
474   int index = 0;
475 
476   SIMD_RUN_NO_SCALAR(AssignSubOpt, index, in0, in1, size);
477 
478   for (; index < size; index++) {
479     in0[index] = in0[index] - in1[index];
480   }
481   return;
482 }
483