1 /**
2 * Copyright 2020-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/fp32/arithmetic_fp32.h"
18 #include <math.h>
19 #include "nnacl/arithmetic_fp32_simd.h"
20
21 #define ACCURACY_DATA 0.00000001
22
ElementFloorMod(const float * in0,const float * in1,float * out,int size)23 int ElementFloorMod(const float *in0, const float *in1, float *out, int size) {
24 int i = 0;
25
26 SIMD_RUN_X86_NO_SCALAR(ElementFloorMod, i, in0, in1, out, size); // neon no floor instruction
27
28 for (; i < size; i++) {
29 out[i] = in0[i] - floorf(in0[i] / in1[i]) * in1[i];
30 }
31 return NNACL_OK;
32 }
33
ElementOptFloorMod(const float * in0,const float * in1,float * out,int size,bool first_scalar)34 int ElementOptFloorMod(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
35 int i = 0;
36
37 if (first_scalar) {
38 SIMD_RUN_X86_NO_SCALAR(ElementOptFloorModNum0, i, in0, in1, out, size); // neon no floor instruction
39 for (; i < size; i++) {
40 out[i] = in0[0] - floorf(in0[0] / in1[i]) * in1[i];
41 }
42 } else {
43 SIMD_RUN_X86_NO_SCALAR(ElementOptFloorModNum1, i, in0, in1, out, size); // neon no floor instruction
44 for (; i < size; i++) {
45 out[i] = in0[i] - floorf(in0[i] / in1[0]) * in1[0];
46 }
47 }
48
49 return NNACL_OK;
50 }
51
ElementFloorModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)52 int ElementFloorModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
53 for (int i = 0; i < size; i++) {
54 NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
55 int remainder = in0[i] - (in0[i] / in1[i]) * in1[i];
56 out[i] = (remainder != 0) && ((in0[i] > 0) != (in1[i] > 0)) ? remainder + in1[i] : remainder;
57 }
58 return NNACL_OK;
59 }
60
ElementOptFloorModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)61 int ElementOptFloorModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
62 int i = 0;
63 if (first_scalar) {
64 for (; i < size; i++) {
65 NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
66 int remainder = in0[0] - (in0[0] / in1[i]) * in1[i];
67 out[i] = (remainder != 0) && ((in0[0] > 0) != (in1[i] > 0)) ? remainder + in1[i] : remainder;
68 }
69 } else {
70 NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
71 for (; i < size; i++) {
72 int remainder = in0[i] - (in0[i] / in1[0]) * in1[0];
73 out[i] = (remainder != 0) && ((in0[i] > 0) != (in1[0] > 0)) ? remainder + in1[0] : remainder;
74 }
75 }
76
77 return NNACL_OK;
78 }
79
ElementMod(const float * in0,const float * in1,float * out,int size)80 int ElementMod(const float *in0, const float *in1, float *out, int size) {
81 for (int i = 0; i < size; i++) {
82 out[i] = fmodf(in0[i], in1[i]);
83 }
84 return NNACL_OK;
85 }
86
ElementOptMod(const float * in0,const float * in1,float * out,int size,bool first_scalar)87 int ElementOptMod(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
88 int index = 0;
89 if (first_scalar) {
90 for (; index < size; index++) {
91 out[index] = fmodf(in0[0], in1[index]);
92 }
93 } else {
94 for (; index < size; index++) {
95 out[index] = fmodf(in0[index], in1[0]);
96 }
97 }
98 return NNACL_OK;
99 }
100
ElementModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)101 int ElementModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
102 for (int i = 0; i < size; i++) {
103 NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
104 out[i] = in0[i] % in1[i];
105 }
106 return NNACL_OK;
107 }
108
ElementOptModInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)109 int ElementOptModInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
110 if (first_scalar) {
111 for (int index = 0; index < size; index++) {
112 NNACL_CHECK_ZERO_RETURN_ERR(in1[index]);
113 out[index] = in0[0] % in1[index];
114 }
115 } else {
116 NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
117 for (int index = 0; index < size; index++) {
118 out[index] = in0[index] % in1[0];
119 }
120 }
121 return NNACL_OK;
122 }
123
ElementFloorDiv(const float * in0,const float * in1,float * out,int size)124 int ElementFloorDiv(const float *in0, const float *in1, float *out, int size) {
125 int i = 0;
126
127 SIMD_RUN_X86_NO_SCALAR(ElementFloorDiv, i, in0, in1, out, size); // neon no floor instruction
128
129 for (; i < size; i++) {
130 out[i] = floorf(in0[i] / in1[i]);
131 }
132 return NNACL_OK;
133 }
134
ElementOptFloorDiv(const float * in0,const float * in1,float * out,int size,bool first_scalar)135 int ElementOptFloorDiv(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
136 int i = 0;
137
138 if (first_scalar) {
139 SIMD_RUN_X86_NO_SCALAR(ElementOptFloorDivNum0, i, in0, in1, out, size); // neon no floor instruction
140
141 for (; i < size; i++) {
142 out[i] = floorf(in0[0] / in1[i]);
143 }
144 } else {
145 SIMD_RUN_X86_NO_SCALAR(ElementOptFloorDivNum1, i, in0, in1, out, size); // neon no floor instruction
146
147 for (; i < size; i++) {
148 out[i] = floorf(in0[i] / in1[0]);
149 }
150 }
151
152 return NNACL_OK;
153 }
154
ElementFloorDivInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)155 int ElementFloorDivInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
156 int i = 0;
157
158 SIMD_RUN_NO_SCALAR(ElementFloorDivInt, i, in0, in1, out, size);
159
160 for (; i < size; i++) {
161 NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
162 out[i] = in0[i] / in1[i];
163 }
164 return NNACL_OK;
165 }
166
ElementOptFloorDivInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)167 int ElementOptFloorDivInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
168 int i = 0;
169 if (first_scalar) {
170 SIMD_RUN_NO_SCALAR(ElementOptFloorDivIntNum0, i, in0, in1, out, size);
171
172 for (; i < size; i++) {
173 NNACL_CHECK_ZERO_RETURN_ERR(in1[i]);
174 out[i] = in0[0] / in1[i];
175 }
176 } else {
177 NNACL_CHECK_ZERO_RETURN_ERR(in1[0]);
178
179 SIMD_RUN_NO_SCALAR(ElementOptFloorDivIntNum1, i, in0, in1, out, size);
180
181 for (; i < size; i++) {
182 out[i] = in0[i] / in1[0];
183 }
184 }
185
186 return NNACL_OK;
187 }
188
ElementLogicalAnd(const float * in0,const float * in1,float * out,int size)189 int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size) {
190 int index = 0;
191
192 SIMD_RUN_NO_SCALAR(ElementLogicalAnd, index, in0, in1, out, size);
193 for (; index < size; index++) {
194 out[index] = (float)((bool)(in0[index]) & (bool)(in1[index]));
195 }
196 return NNACL_OK;
197 }
198
ElementOptLogicalAnd(const float * in0,const float * in1,float * out,int size,bool first_scalar)199 int ElementOptLogicalAnd(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
200 int index = 0;
201 SIMD_RUN_NO_SCALAR(ElementOptLogicalAnd, index, in0, in1, out, size, first_scalar);
202 if (first_scalar) {
203 for (; index < size; index++) {
204 out[index] = (float)((bool)(in0[0]) & (bool)(in1[index]));
205 }
206 } else {
207 for (; index < size; index++) {
208 out[index] = (float)((bool)(in0[index]) & (bool)(in1[0]));
209 }
210 }
211
212 return NNACL_OK;
213 }
214
ElementLogicalAndInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)215 int ElementLogicalAndInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
216 int index = 0;
217 for (; index < size; index++) {
218 out[index] = (int)((unsigned int)(in0[index]) & (unsigned int)(in1[index]));
219 }
220 return NNACL_OK;
221 }
222
ElementOptLogicalAndInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)223 int ElementOptLogicalAndInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
224 int index = 0;
225 if (first_scalar) {
226 for (; index < size; index++) {
227 out[index] = (int)((unsigned int)(in0[0]) & (unsigned int)(in1[index]));
228 }
229 } else {
230 for (; index < size; index++) {
231 out[index] = (int)((unsigned int)(in0[index]) & (unsigned int)(in1[0]));
232 }
233 }
234
235 return NNACL_OK;
236 }
237
ElementLogicalAndBool(const bool * in0,const bool * in1,bool * out,int size)238 int ElementLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size) {
239 int index = 0;
240 for (; index < size; index++) {
241 out[index] = (bool)((unsigned int)(in0[index]) & (unsigned int)(in1[index]));
242 }
243
244 return NNACL_OK;
245 }
246
ElementOptLogicalAndBool(const bool * in0,const bool * in1,bool * out,int size,bool first_scalar)247 int ElementOptLogicalAndBool(const bool *in0, const bool *in1, bool *out, int size, bool first_scalar) {
248 int index = 0;
249 if (first_scalar) {
250 for (; index < size; index++) {
251 out[index] = (bool)((unsigned int)(in0[0]) & (unsigned int)(in1[index]));
252 }
253 } else {
254 for (; index < size; index++) {
255 out[index] = (bool)((unsigned int)(in0[index]) & (unsigned int)(in1[0]));
256 }
257 }
258
259 return NNACL_OK;
260 }
261
ElementLogicalOr(const float * in0,const float * in1,float * out,int size)262 int ElementLogicalOr(const float *in0, const float *in1, float *out, int size) {
263 int index = 0;
264 #ifdef ENABLE_NEON
265 float32x4_t vtrue = vdupq_n_f32(1);
266 float32x4_t vfalse = vdupq_n_f32(0);
267 uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
268 uint32x4_t zeros = vdupq_n_u32(0);
269 for (; index <= size - 4; index += C4NUM) {
270 uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask);
271 uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask);
272 float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue);
273 vst1q_f32(out + index, vout);
274 }
275 #endif
276 for (; index < size; index++) {
277 out[index] = (float)((bool)(in0[index]) | (bool)(in1[index]));
278 }
279 return NNACL_OK;
280 }
281
ElementOptLogicalOr(const float * in0,const float * in1,float * out,int size,bool first_scalar)282 int ElementOptLogicalOr(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
283 int index = 0;
284 if (first_scalar) {
285 for (; index < size; index++) {
286 out[index] = (float)((bool)(in0[0]) | (bool)(in1[index]));
287 }
288 } else {
289 for (; index < size; index++) {
290 out[index] = (float)((bool)(in0[index]) | (bool)(in1[0]));
291 }
292 }
293
294 return NNACL_OK;
295 }
296
ElementLogicalOrBool(const bool * in0,const bool * in1,bool * out,int size)297 int ElementLogicalOrBool(const bool *in0, const bool *in1, bool *out, int size) {
298 int index = 0;
299 for (; index < size; index++) {
300 out[index] = (bool)(in0[index] | in1[index]);
301 }
302 return NNACL_OK;
303 }
304
ElementOptLogicalOrBool(const bool * in0,const bool * in1,bool * out,int size,bool first_scalar)305 int ElementOptLogicalOrBool(const bool *in0, const bool *in1, bool *out, int size, bool first_scalar) {
306 int index = 0;
307 if (first_scalar) {
308 for (; index < size; index++) {
309 out[index] = (bool)(in0[0] | in1[index]);
310 }
311 } else {
312 for (; index < size; index++) {
313 out[index] = (bool)(in0[index] | in1[0]);
314 }
315 }
316
317 return NNACL_OK;
318 }
319
ElementMaximum(const float * in0,const float * in1,float * out,int size)320 int ElementMaximum(const float *in0, const float *in1, float *out, int size) {
321 int index = 0;
322
323 SIMD_RUN_NO_SCALAR(ElementMaximum, index, in0, in1, out, size);
324
325 for (; index < size; index++) {
326 out[index] = in0[index] > in1[index] ? in0[index] : in1[index];
327 }
328 return NNACL_OK;
329 }
330
ElementOptMaximum(const float * in0,const float * in1,float * out,int size,bool first_scalar)331 int ElementOptMaximum(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
332 int index = 0;
333
334 if (first_scalar) {
335 SIMD_RUN_NO_SCALAR(ElementOptMaximumNum0, index, in0, in1, out, size);
336
337 for (; index < size; index++) {
338 out[index] = in0[0] > in1[index] ? in0[0] : in1[index];
339 }
340 } else {
341 SIMD_RUN_NO_SCALAR(ElementOptMaximumNum1, index, in0, in1, out, size);
342
343 for (; index < size; index++) {
344 out[index] = in0[index] > in1[0] ? in0[index] : in1[0];
345 }
346 }
347
348 return NNACL_OK;
349 }
350
ElementMaximumInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size)351 int ElementMaximumInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size) {
352 int index = 0;
353
354 SIMD_RUN_NO_SCALAR(ElementMaximumInt, index, in0, in1, out, size);
355
356 for (; index < size; index++) {
357 out[index] = in0[index] > in1[index] ? in0[index] : in1[index];
358 }
359 return NNACL_OK;
360 }
361
ElementOptMaximumInt(const int32_t * in0,const int32_t * in1,int32_t * out,int size,bool first_scalar)362 int ElementOptMaximumInt(const int32_t *in0, const int32_t *in1, int32_t *out, int size, bool first_scalar) {
363 int index = 0;
364 if (first_scalar) {
365 SIMD_RUN_NO_SCALAR(ElementOptMaximumIntNum0, index, in0, in1, out, size);
366
367 for (; index < size; index++) {
368 out[index] = in0[0] > in1[index] ? in0[0] : in1[index];
369 }
370 } else {
371 SIMD_RUN_NO_SCALAR(ElementOptMaximumIntNum1, index, in0, in1, out, size);
372
373 for (; index < size; index++) {
374 out[index] = in0[index] > in1[0] ? in0[index] : in1[0];
375 }
376 }
377
378 return NNACL_OK;
379 }
380
ElementMinimumInt(const int32_t * input0,const int32_t * input1,int32_t * output,int size)381 int ElementMinimumInt(const int32_t *input0, const int32_t *input1, int32_t *output, int size) {
382 int index = 0;
383
384 SIMD_RUN_NO_SCALAR(ElementMinimumInt, index, input0, input1, output, size);
385
386 for (; index < size; index++) {
387 output[index] = input0[index] > input1[index] ? input1[index] : input0[index];
388 }
389 return NNACL_OK;
390 }
391
ElementOptMinimumInt(const int32_t * input0,const int32_t * input1,int32_t * output,int size,bool first_scalar)392 int ElementOptMinimumInt(const int32_t *input0, const int32_t *input1, int32_t *output, int size, bool first_scalar) {
393 int index = 0;
394 if (first_scalar) {
395 SIMD_RUN_NO_SCALAR(ElementOptMinimumIntNum0, index, input0, input1, output, size);
396
397 for (; index < size; index++) {
398 output[index] = input0[0] > input1[index] ? input1[index] : input0[0];
399 }
400 } else {
401 SIMD_RUN_NO_SCALAR(ElementOptMinimumIntNum1, index, input0, input1, output, size);
402
403 for (; index < size; index++) {
404 output[index] = input0[index] > input1[0] ? input1[0] : input0[index];
405 }
406 }
407
408 return NNACL_OK;
409 }
410
ElementMinimum(const float * in0,const float * in1,float * out,int size)411 int ElementMinimum(const float *in0, const float *in1, float *out, int size) {
412 int index = 0;
413
414 SIMD_RUN_NO_SCALAR(ElementMinimum, index, in0, in1, out, size);
415
416 for (; index < size; index++) {
417 out[index] = in0[index] > in1[index] ? in1[index] : in0[index];
418 }
419 return NNACL_OK;
420 }
421
ElementOptMinimum(const float * in0,const float * in1,float * out,int size,bool first_scalar)422 int ElementOptMinimum(const float *in0, const float *in1, float *out, int size, bool first_scalar) {
423 int index = 0;
424 if (first_scalar) {
425 SIMD_RUN_NO_SCALAR(ElementOptMinimumNum0, index, in0, in1, out, size);
426
427 for (; index < size; index++) {
428 out[index] = in0[0] > in1[index] ? in1[index] : in0[0];
429 }
430 } else {
431 SIMD_RUN_NO_SCALAR(ElementOptMinimumNum1, index, in0, in1, out, size);
432
433 for (; index < size; index++) {
434 out[index] = in0[index] > in1[0] ? in1[0] : in0[index];
435 }
436 }
437
438 return NNACL_OK;
439 }
440
441 #undef ACCURACY_DATA
442
TileOneDimensionFp32(const void * inPtr,void * outPtr,int dim,size_t ndim,const int32_t * inShape,const int32_t * inStrides,const int32_t * outStrides,const int32_t * multiple)443 void TileOneDimensionFp32(const void *inPtr, void *outPtr, int dim, size_t ndim, const int32_t *inShape,
444 const int32_t *inStrides, const int32_t *outStrides, const int32_t *multiple) {
445 const float *inData = (const float *)inPtr;
446 float *outData = (float *)outPtr;
447
448 int srcDimSize = inShape[dim];
449 if (dim == ndim - 1) {
450 for (int i = 0; i < multiple[dim]; i++) {
451 memcpy(outData, inData, srcDimSize * sizeof(float));
452 outData += srcDimSize;
453 }
454 return;
455 }
456 for (size_t i = 0; i < srcDimSize; i++) {
457 for (size_t j = 0; j < multiple[dim]; j++) {
458 TileOneDimensionFp32(inData + inStrides[dim] * i, outData + outStrides[dim] * (i + j * srcDimSize), dim + 1, ndim,
459 inShape, inStrides, outStrides, multiple);
460 }
461 }
462 }
463
TileDimensionsFp32(const float * data0,const float * data1,float * tile_data0,float * tile_data1,ArithmeticParameter * param)464 void TileDimensionsFp32(const float *data0, const float *data1, float *tile_data0, float *tile_data1,
465 ArithmeticParameter *param) {
466 CalcMultiplesAndStrides(param);
467 TileOneDimensionFp32(data0, tile_data0, 0, param->ndim_, param->in_shape0_, param->in_strides0_, param->out_strides_,
468 param->multiples0_);
469 TileOneDimensionFp32(data1, tile_data1, 0, param->ndim_, param->in_shape1_, param->in_strides1_, param->out_strides_,
470 param->multiples1_);
471 }
472
AssignSubOpt(float * in0,const float * in1,size_t size)473 void AssignSubOpt(float *in0, const float *in1, size_t size) {
474 int index = 0;
475
476 SIMD_RUN_NO_SCALAR(AssignSubOpt, index, in0, in1, size);
477
478 for (; index < size; index++) {
479 in0[index] = in0[index] - in1[index];
480 }
481 return;
482 }
483