/external/XNNPACK/src/s16-rmaxabs/gen/ |
D | neon-x24.c | 30 uint16x8_t vmax0 = vzero; in xnn_s16_rmaxabs_ukernel__neon_x24() local 43 vmax0 = vmaxq_u16(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__neon_x24() 48 vmax0 = vmaxq_u16(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__neon_x24() 49 vmax0 = vmaxq_u16(vmax0, vmax2); in xnn_s16_rmaxabs_ukernel__neon_x24() 55 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x24() 63 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x24() 68 *output = vmaxvq_u16(vmax0); in xnn_s16_rmaxabs_ukernel__neon_x24() 70 uint16x4_t vmax_lo = vmax_u16(vget_low_u16(vmax0), vget_high_u16(vmax0)); in xnn_s16_rmaxabs_ukernel__neon_x24()
|
D | neon-x32.c | 30 uint16x8_t vmax0 = vzero; in xnn_s16_rmaxabs_ukernel__neon_x32() local 46 vmax0 = vmaxq_u16(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__neon_x32() 52 vmax0 = vmaxq_u16(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__neon_x32() 54 vmax0 = vmaxq_u16(vmax0, vmax2); in xnn_s16_rmaxabs_ukernel__neon_x32() 60 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x32() 68 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x32() 73 *output = vmaxvq_u16(vmax0); in xnn_s16_rmaxabs_ukernel__neon_x32() 75 uint16x4_t vmax_lo = vmax_u16(vget_low_u16(vmax0), vget_high_u16(vmax0)); in xnn_s16_rmaxabs_ukernel__neon_x32()
|
D | neon-x16.c | 30 uint16x8_t vmax0 = vzero; in xnn_s16_rmaxabs_ukernel__neon_x16() local 40 vmax0 = vmaxq_u16(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__neon_x16() 44 vmax0 = vmaxq_u16(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__neon_x16() 50 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x16() 58 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x16() 63 *output = vmaxvq_u16(vmax0); in xnn_s16_rmaxabs_ukernel__neon_x16() 65 uint16x4_t vmax_lo = vmax_u16(vget_low_u16(vmax0), vget_high_u16(vmax0)); in xnn_s16_rmaxabs_ukernel__neon_x16()
|
D | scalar-x3.c | 27 uint32_t vmax0 = 0; in xnn_s16_rmaxabs_ukernel__scalar_x3() local 41 vmax0 = math_max_u32(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__scalar_x3() 46 vmax0 = math_max_u32(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__scalar_x3() 47 vmax0 = math_max_u32(vmax0, vmax2); in xnn_s16_rmaxabs_ukernel__scalar_x3() 53 vmax0 = math_max_u32(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__scalar_x3() 56 *output = (uint16_t) vmax0; in xnn_s16_rmaxabs_ukernel__scalar_x3()
|
D | scalar-x4.c | 27 uint32_t vmax0 = 0; in xnn_s16_rmaxabs_ukernel__scalar_x4() local 44 vmax0 = math_max_u32(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__scalar_x4() 50 vmax0 = math_max_u32(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__scalar_x4() 52 vmax0 = math_max_u32(vmax0, vmax2); in xnn_s16_rmaxabs_ukernel__scalar_x4() 58 vmax0 = math_max_u32(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__scalar_x4() 61 *output = (uint16_t) vmax0; in xnn_s16_rmaxabs_ukernel__scalar_x4()
|
D | scalar-x2.c | 27 uint32_t vmax0 = 0; in xnn_s16_rmaxabs_ukernel__scalar_x2() local 38 vmax0 = math_max_u32(vmax0, vabs0); in xnn_s16_rmaxabs_ukernel__scalar_x2() 42 vmax0 = math_max_u32(vmax0, vmax1); in xnn_s16_rmaxabs_ukernel__scalar_x2() 48 vmax0 = math_max_u32(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__scalar_x2() 51 *output = (uint16_t) vmax0; in xnn_s16_rmaxabs_ukernel__scalar_x2()
|
D | neon-x8.c | 30 uint16x8_t vmax0 = vzero; in xnn_s16_rmaxabs_ukernel__neon_x8() local 37 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x8() 45 vmax0 = vmaxq_u16(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__neon_x8() 50 *output = vmaxvq_u16(vmax0); in xnn_s16_rmaxabs_ukernel__neon_x8() 52 uint16x4_t vmax_lo = vmax_u16(vget_low_u16(vmax0), vget_high_u16(vmax0)); in xnn_s16_rmaxabs_ukernel__neon_x8()
|
D | scalar-x1.c | 27 uint32_t vmax0 = 0; in xnn_s16_rmaxabs_ukernel__scalar_x1() local 34 vmax0 = math_max_u32(vmax0, vabs); in xnn_s16_rmaxabs_ukernel__scalar_x1() 37 *output = (uint16_t) vmax0; in xnn_s16_rmaxabs_ukernel__scalar_x1()
|
/external/XNNPACK/src/f32-rmax/ |
D | sse.c | 21 __m128 vmax0 = _mm_load_ss(x); in xnn_f32_rmax_ukernel__sse() local 22 vmax0 = _mm_shuffle_ps(vmax0, vmax0, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_rmax_ukernel__sse() 23 __m128 vmax1 = vmax0; in xnn_f32_rmax_ukernel__sse() 24 __m128 vmax2 = vmax0; in xnn_f32_rmax_ukernel__sse() 25 __m128 vmax3 = vmax0; in xnn_f32_rmax_ukernel__sse() 33 vmax0 = _mm_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__sse() 38 __m128 vmax = _mm_max_ps(_mm_max_ps(vmax0, vmax1), _mm_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__sse()
|
D | scalar.c | 20 float vmax0 = *x; in xnn_f32_rmax_ukernel__scalar() local 21 float vmax1 = vmax0; in xnn_f32_rmax_ukernel__scalar() 22 float vmax2 = vmax0; in xnn_f32_rmax_ukernel__scalar() 23 float vmax3 = vmax0; in xnn_f32_rmax_ukernel__scalar() 31 vmax0 = math_max_f32(vx0, vmax0); in xnn_f32_rmax_ukernel__scalar() 36 const float vmax01 = math_max_f32(vmax0, vmax1); in xnn_f32_rmax_ukernel__scalar()
|
D | wasmsimd-arm.c | 21 v128_t vmax0 = wasm_v128_load32_splat(x); in xnn_f32_rmax_ukernel__wasmsimd_arm() local 22 v128_t vmax1 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_arm() 23 v128_t vmax2 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_arm() 24 v128_t vmax3 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_arm() 32 vmax0 = wasm_f32x4_max(vmax0, vx0); in xnn_f32_rmax_ukernel__wasmsimd_arm() 37 v128_t vmax0123 = wasm_f32x4_max(wasm_f32x4_max(vmax0, vmax1), wasm_f32x4_max(vmax2, vmax3)); in xnn_f32_rmax_ukernel__wasmsimd_arm()
|
D | wasmsimd-x86.c | 22 v128_t vmax0 = wasm_v128_load32_splat(x); in xnn_f32_rmax_ukernel__wasmsimd_x86() local 23 v128_t vmax1 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_x86() 24 v128_t vmax2 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_x86() 25 v128_t vmax3 = vmax0; in xnn_f32_rmax_ukernel__wasmsimd_x86() 33 vmax0 = wasm_f32x4_pmax(vx0, vmax0); in xnn_f32_rmax_ukernel__wasmsimd_x86() 38 const v128_t vmax01 = wasm_f32x4_pmax(vmax1, vmax0); in xnn_f32_rmax_ukernel__wasmsimd_x86()
|
D | avx.c | 21 __m256 vmax0 = _mm256_broadcast_ss(x); in xnn_f32_rmax_ukernel__avx() local 22 __m256 vmax1 = vmax0; in xnn_f32_rmax_ukernel__avx() 23 __m256 vmax2 = vmax0; in xnn_f32_rmax_ukernel__avx() 24 __m256 vmax3 = vmax0; in xnn_f32_rmax_ukernel__avx() 32 vmax0 = _mm256_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__avx() 37 __m256 vmax = _mm256_max_ps(_mm256_max_ps(vmax0, vmax1), _mm256_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__avx()
|
D | neon.c | 22 float32x4_t vmax0 = vld1q_dup_f32(x); in xnn_f32_rmax_ukernel__neon() local 23 float32x4_t vmax1 = vmax0; in xnn_f32_rmax_ukernel__neon() 24 float32x4_t vmax2 = vmax0; in xnn_f32_rmax_ukernel__neon() 25 float32x4_t vmax3 = vmax0; in xnn_f32_rmax_ukernel__neon() 32 vmax0 = vmaxq_f32(vmax0, vx0); in xnn_f32_rmax_ukernel__neon() 37 float32x4_t vmax = vmaxq_f32(vmaxq_f32(vmax0, vmax1), vmaxq_f32(vmax2, vmax3)); in xnn_f32_rmax_ukernel__neon()
|
D | avx512f.c | 22 __m512 vmax0 = _mm512_broadcastss_ps(_mm_load_ss(x)); in xnn_f32_rmax_ukernel__avx512f() local 23 __m512 vmax1 = vmax0; in xnn_f32_rmax_ukernel__avx512f() 24 __m512 vmax2 = vmax0; in xnn_f32_rmax_ukernel__avx512f() 25 __m512 vmax3 = vmax0; in xnn_f32_rmax_ukernel__avx512f() 33 vmax0 = _mm512_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__avx512f() 38 __m512 vmax = _mm512_max_ps(_mm512_max_ps(vmax0, vmax1), _mm512_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__avx512f()
|
/external/XNNPACK/src/u8-rmax/ |
D | scalar.c | 18 uint8_t vmax0 = 0; in xnn_u8_rmax_ukernel__scalar() local 25 vmax0 = vt0 > vmax0 ? vt0 : vmax0; in xnn_u8_rmax_ukernel__scalar() 28 uint8_t vmax = vmax0 > vmax1 ? vmax0 : vmax1; in xnn_u8_rmax_ukernel__scalar()
|
/external/XNNPACK/src/f16-rmax/ |
D | neonfp16arith.c | 24 float16x8_t vmax0 = vld1q_dup_f16(i); in xnn_f16_rmax_ukernel__neonfp16arith() local 25 float16x8_t vmax1 = vmax0; in xnn_f16_rmax_ukernel__neonfp16arith() 26 float16x8_t vmax2 = vmax0; in xnn_f16_rmax_ukernel__neonfp16arith() 27 float16x8_t vmax3 = vmax0; in xnn_f16_rmax_ukernel__neonfp16arith() 34 vmax0 = vmaxq_f16(vmax0, vx0); in xnn_f16_rmax_ukernel__neonfp16arith() 39 float16x8_t vmax = vmaxq_f16(vmaxq_f16(vmax0, vmax1), vmaxq_f16(vmax2, vmax3)); in xnn_f16_rmax_ukernel__neonfp16arith()
|
D | f16c.c | 25 __m256 vmax0 = _mm256_cvtph_ps(vmax_init); in xnn_f16_rmax_ukernel__f16c() local 26 __m256 vmax1 = vmax0; in xnn_f16_rmax_ukernel__f16c() 27 __m256 vmax2 = vmax0; in xnn_f16_rmax_ukernel__f16c() 28 __m256 vmax3 = vmax0; in xnn_f16_rmax_ukernel__f16c() 36 vmax0 = _mm256_max_ps(vmax0, vx0); in xnn_f16_rmax_ukernel__f16c() 41 __m256 vmax = _mm256_max_ps(_mm256_max_ps(vmax0, vmax1), _mm256_max_ps(vmax2, vmax3)); in xnn_f16_rmax_ukernel__f16c()
|
/external/XNNPACK/src/s16-rmaxabs/ |
D | neon.c.in | 55 vmax0 = vmaxq_u16(vmax0, vabs); 63 vmax0 = vmaxq_u16(vmax0, vabs); 68 *output = vmaxvq_u16(vmax0); 70 uint16x4_t vmax_lo = vmax_u16(vget_low_u16(vmax0), vget_high_u16(vmax0));
|
D | scalar.c.in | 51 vmax0 = math_max_u32(vmax0, vabs); 54 *output = (uint16_t) vmax0;
|
/external/XNNPACK/src/amalgam/ |
D | f16c.c | 1279 __m256 vmax0 = _mm256_cvtph_ps(vmax_init); in xnn_f16_rmax_ukernel__f16c() local 1280 __m256 vmax1 = vmax0; in xnn_f16_rmax_ukernel__f16c() 1281 __m256 vmax2 = vmax0; in xnn_f16_rmax_ukernel__f16c() 1282 __m256 vmax3 = vmax0; in xnn_f16_rmax_ukernel__f16c() 1290 vmax0 = _mm256_max_ps(vmax0, vx0); in xnn_f16_rmax_ukernel__f16c() 1295 __m256 vmax = _mm256_max_ps(_mm256_max_ps(vmax0, vmax1), _mm256_max_ps(vmax2, vmax3)); in xnn_f16_rmax_ukernel__f16c()
|
D | sse.c | 6509 __m128 vmax0 = _mm_load_ss(x); in xnn_f32_rmax_ukernel__sse() local 6510 vmax0 = _mm_shuffle_ps(vmax0, vmax0, _MM_SHUFFLE(0, 0, 0, 0)); in xnn_f32_rmax_ukernel__sse() 6511 __m128 vmax1 = vmax0; in xnn_f32_rmax_ukernel__sse() 6512 __m128 vmax2 = vmax0; in xnn_f32_rmax_ukernel__sse() 6513 __m128 vmax3 = vmax0; in xnn_f32_rmax_ukernel__sse() 6521 vmax0 = _mm_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__sse() 6526 __m128 vmax = _mm_max_ps(_mm_max_ps(vmax0, vmax1), _mm_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__sse()
|