/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-wasmsimd-c4.c | 75 v128_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local 78 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 79 vmax = wasm_v128_bitselect(vi1, vmax, vm1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 82 const v128_t vm2 = wasm_f32x4_gt(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 83 vmax = wasm_v128_bitselect(vi2, vmax, vm2); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 86 const v128_t vm3 = wasm_f32x4_gt(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 87 vmax = wasm_v128_bitselect(vi3, vmax, vm3); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 90 const v128_t vm4 = wasm_f32x4_gt(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 91 vmax = wasm_v128_bitselect(vi4, vmax, vm4); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() 94 const v128_t vm5 = wasm_f32x4_gt(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() [all …]
|
D | 9p8x-neon-c4.c | 66 float32x4_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local 69 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 70 vmax = vbslq_f32(vm1, vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 73 const uint32x4_t vm2 = vcgtq_f32(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 74 vmax = vbslq_f32(vm2, vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 77 const uint32x4_t vm3 = vcgtq_f32(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 78 vmax = vbslq_f32(vm3, vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 81 const uint32x4_t vm4 = vcgtq_f32(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 82 vmax = vbslq_f32(vm4, vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() 85 const uint32x4_t vm5 = vcgtq_f32(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() [all …]
|
D | 9p8x-scalar-c1.c | 66 float vmax = vi0; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() local 69 if (vi1 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 70 vmax = vi1; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 74 if (vi2 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 75 vmax = vi2; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 79 if (vi3 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 80 vmax = vi3; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 84 if (vi4 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 85 vmax = vi4; in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() 89 if (vi5 > vmax) { in xnn_f32_argmaxpool_ukernel_9p8x__scalar_c1() [all …]
|
D | 9x-wasmsimd-c4.c | 95 v128_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() local 98 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 99 vmax = wasm_v128_bitselect(vi1, vmax, vm1); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 102 const v128_t vm2 = wasm_f32x4_gt(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 103 vmax = wasm_v128_bitselect(vi2, vmax, vm2); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 106 const v128_t vm3 = wasm_f32x4_gt(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 107 vmax = wasm_v128_bitselect(vi3, vmax, vm3); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 110 const v128_t vm4 = wasm_f32x4_gt(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 111 vmax = wasm_v128_bitselect(vi4, vmax, vm4); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() 114 const v128_t vm5 = wasm_f32x4_gt(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9x__wasmsimd_c4() [all …]
|
D | 9p8x-sse2-c4.c | 75 __m128 vmax = vi0; in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local 78 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 79 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 82 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 83 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 86 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 87 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 90 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 91 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 94 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() [all …]
|
D | 9x-neon-c4.c | 85 float32x4_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_9x__neon_c4() local 88 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 89 vmax = vbslq_f32(vm1, vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 92 const uint32x4_t vm2 = vcgtq_f32(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 93 vmax = vbslq_f32(vm2, vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 96 const uint32x4_t vm3 = vcgtq_f32(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 97 vmax = vbslq_f32(vm3, vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 100 const uint32x4_t vm4 = vcgtq_f32(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 101 vmax = vbslq_f32(vm4, vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() 104 const uint32x4_t vm5 = vcgtq_f32(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9x__neon_c4() [all …]
|
D | 9x-sse2-c4.c | 94 __m128 vmax = vi0; in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() local 97 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 98 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 101 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 102 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 105 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 106 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 109 const __m128i vm4 = _mm_castps_si128(_mm_cmpgt_ps(vi4, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 110 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 113 const __m128i vm5 = _mm_castps_si128(_mm_cmpgt_ps(vi5, vmax)); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() [all …]
|
D | 4x-wasmsimd-c4.c | 60 v128_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 63 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 64 vmax = wasm_v128_bitselect(vi1, vmax, vm1); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 67 const v128_t vm2 = wasm_f32x4_gt(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 68 vmax = wasm_v128_bitselect(vi2, vmax, vm2); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 71 const v128_t vm3 = wasm_f32x4_gt(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 72 vmax = wasm_v128_bitselect(vi3, vmax, vm3); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 75 wasm_v128_store(output, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() 86 v128_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() local 89 const v128_t vm1 = wasm_f32x4_gt(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__wasmsimd_c4() [all …]
|
D | 4x-sse2-c4.c | 59 __m128 vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 62 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 63 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 66 const __m128i vm2 = _mm_castps_si128(_mm_cmpgt_ps(vi2, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 67 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 70 const __m128i vm3 = _mm_castps_si128(_mm_cmpgt_ps(vi3, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 71 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 74 _mm_storeu_ps(output, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 85 __m128 vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() local 88 const __m128i vm1 = _mm_castps_si128(_mm_cmpgt_ps(vi1, vmax)); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() [all …]
|
D | 4x-neon-c4.c | 55 float32x4_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 58 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 59 vmax = vbslq_f32(vm1, vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 62 const uint32x4_t vm2 = vcgtq_f32(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 63 vmax = vbslq_f32(vm2, vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 66 const uint32x4_t vm3 = vcgtq_f32(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 67 vmax = vbslq_f32(vm3, vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 70 vst1q_f32(output, vmax); output += 4; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() 79 float32x4_t vmax = vi0; in xnn_f32_argmaxpool_ukernel_4x__neon_c4() local 82 const uint32x4_t vm1 = vcgtq_f32(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__neon_c4() [all …]
|
D | 9x-scalar-c1.c | 84 float vmax = vi0; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() local 87 if (vi1 > vmax) { in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 88 vmax = vi1; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 92 if (vi2 > vmax) { in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 93 vmax = vi2; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 97 if (vi3 > vmax) { in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 98 vmax = vi3; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 102 if (vi4 > vmax) { in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 103 vmax = vi4; in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() 107 if (vi5 > vmax) { in xnn_f32_argmaxpool_ukernel_9x__scalar_c1() [all …]
|
/external/XNNPACK/src/u8-rmax/ |
D | sse2.c | 24 __m128i vmax = _mm_setzero_si128(); in xnn_u8_rmax_ukernel__sse2() local 28 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2() 35 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2() 37 vmax = _mm_max_epu8(vmax, _mm_unpackhi_epi64(vmax, vmax)); in xnn_u8_rmax_ukernel__sse2() 38 vmax = _mm_max_epu8(vmax, _mm_srli_epi64(vmax, 32)); in xnn_u8_rmax_ukernel__sse2() 39 vmax = _mm_max_epu8(vmax, _mm_srli_epi32(vmax, 16)); in xnn_u8_rmax_ukernel__sse2() 40 vmax = _mm_max_epu8(vmax, _mm_srli_epi16(vmax, 8)); in xnn_u8_rmax_ukernel__sse2() 41 *y = (uint8_t) _mm_cvtsi128_si32(vmax); in xnn_u8_rmax_ukernel__sse2() 43 uint8_t vmax = 0; in xnn_u8_rmax_ukernel__sse2() local 46 vmax = vx > vmax ? vx : vmax; in xnn_u8_rmax_ukernel__sse2() [all …]
|
D | neon.c | 24 uint8x16_t vmax = vmovq_n_u8(0); in xnn_u8_rmax_ukernel__neon() local 27 vmax = vmaxq_u8(vmax, vx); in xnn_u8_rmax_ukernel__neon() 34 vmax = vmaxq_u8(vmax, vx); in xnn_u8_rmax_ukernel__neon() 36 uint8x8_t vmax8 = vmax_u8(vget_low_u8(vmax), vget_high_u8(vmax)); in xnn_u8_rmax_ukernel__neon() 42 uint8x8_t vmax = vmov_n_u8(0); in xnn_u8_rmax_ukernel__neon() local 45 vmax = vmax_u8(vmax, vx); in xnn_u8_rmax_ukernel__neon() 47 vst1_lane_u8(y, vmax, 0); in xnn_u8_rmax_ukernel__neon()
|
/external/llvm-project/llvm/test/MC/ARM/ |
D | neont2-minmax-encoding.s | 5 vmax.s8 d1, d2, d3 6 vmax.s16 d4, d5, d6 7 vmax.s32 d7, d8, d9 8 vmax.u8 d10, d11, d12 9 vmax.u16 d13, d14, d15 10 vmax.u32 d16, d17, d18 11 vmax.f32 d19, d20, d21 13 vmax.s8 d2, d3 14 vmax.s16 d5, d6 15 vmax.s32 d8, d9 [all …]
|
D | neon-minmax-encoding.s | 3 vmax.s8 d1, d2, d3 4 vmax.s16 d4, d5, d6 5 vmax.s32 d7, d8, d9 6 vmax.u8 d10, d11, d12 7 vmax.u16 d13, d14, d15 8 vmax.u32 d16, d17, d18 9 vmax.f32 d19, d20, d21 11 vmax.s8 d2, d3 12 vmax.s16 d5, d6 13 vmax.s32 d8, d9 [all …]
|
/external/llvm/test/MC/ARM/ |
D | neon-minmax-encoding.s | 3 vmax.s8 d1, d2, d3 4 vmax.s16 d4, d5, d6 5 vmax.s32 d7, d8, d9 6 vmax.u8 d10, d11, d12 7 vmax.u16 d13, d14, d15 8 vmax.u32 d16, d17, d18 9 vmax.f32 d19, d20, d21 11 vmax.s8 d2, d3 12 vmax.s16 d5, d6 13 vmax.s32 d8, d9 [all …]
|
D | neont2-minmax-encoding.s | 5 vmax.s8 d1, d2, d3 6 vmax.s16 d4, d5, d6 7 vmax.s32 d7, d8, d9 8 vmax.u8 d10, d11, d12 9 vmax.u16 d13, d14, d15 10 vmax.u32 d16, d17, d18 11 vmax.f32 d19, d20, d21 13 vmax.s8 d2, d3 14 vmax.s16 d5, d6 15 vmax.s32 d8, d9 [all …]
|
/external/capstone/suite/MC/ARM/ |
D | neon-minmax-encoding.s.cs | 2 0x03,0x16,0x02,0xf2 = vmax.s8 d1, d2, d3 3 0x06,0x46,0x15,0xf2 = vmax.s16 d4, d5, d6 4 0x09,0x76,0x28,0xf2 = vmax.s32 d7, d8, d9 5 0x0c,0xa6,0x0b,0xf3 = vmax.u8 d10, d11, d12 6 0x0f,0xd6,0x1e,0xf3 = vmax.u16 d13, d14, d15 7 0xa2,0x06,0x61,0xf3 = vmax.u32 d16, d17, d18 8 0xa5,0x3f,0x44,0xf2 = vmax.f32 d19, d20, d21 9 0x03,0x26,0x02,0xf2 = vmax.s8 d2, d2, d3 10 0x06,0x56,0x15,0xf2 = vmax.s16 d5, d5, d6 11 0x09,0x86,0x28,0xf2 = vmax.s32 d8, d8, d9 [all …]
|
D | neont2-minmax-encoding.s.cs | 2 0x02,0xef,0x03,0x16 = vmax.s8 d1, d2, d3 3 0x15,0xef,0x06,0x46 = vmax.s16 d4, d5, d6 4 0x28,0xef,0x09,0x76 = vmax.s32 d7, d8, d9 5 0x0b,0xff,0x0c,0xa6 = vmax.u8 d10, d11, d12 6 0x1e,0xff,0x0f,0xd6 = vmax.u16 d13, d14, d15 7 0x61,0xff,0xa2,0x06 = vmax.u32 d16, d17, d18 8 0x44,0xef,0xa5,0x3f = vmax.f32 d19, d20, d21 9 0x02,0xef,0x03,0x26 = vmax.s8 d2, d2, d3 10 0x15,0xef,0x06,0x56 = vmax.s16 d5, d5, d6 11 0x28,0xef,0x09,0x86 = vmax.s32 d8, d8, d9 [all …]
|
/external/XNNPACK/src/f32-spmm/gen/ |
D | 8x4-minmax-scalar.c | 32 const float vmax = params->scalar.max; in xnn_f32_spmm_minmax_ukernel_8x4__scalar() local 123 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 124 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 125 float vout2x0 = math_min_f32(vacc2x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 126 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 127 float vout4x0 = math_min_f32(vacc4x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 128 float vout5x0 = math_min_f32(vacc5x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 129 float vout6x0 = math_min_f32(vacc6x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 130 float vout7x0 = math_min_f32(vacc7x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() 131 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x4__scalar() [all …]
|
D | 8x2-minmax-scalar.c | 32 const float vmax = params->scalar.max; in xnn_f32_spmm_minmax_ukernel_8x2__scalar() local 89 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 90 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 91 float vout2x0 = math_min_f32(vacc2x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 92 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 93 float vout4x0 = math_min_f32(vacc4x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 94 float vout5x0 = math_min_f32(vacc5x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 95 float vout6x0 = math_min_f32(vacc6x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 96 float vout7x0 = math_min_f32(vacc7x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() 97 float vout0x1 = math_min_f32(vacc0x1, vmax); in xnn_f32_spmm_minmax_ukernel_8x2__scalar() [all …]
|
D | 32x1-minmax-wasmsimd-x86.c | 33 const v128_t vmax = wasm_v32x4_load_splat(¶ms->scalar.max); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 73 v128_t vout0123 = wasm_v128_bitselect(vacc0123, vmax, wasm_f32x4_le(vacc0123, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 74 v128_t vout4567 = wasm_v128_bitselect(vacc4567, vmax, wasm_f32x4_le(vacc4567, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 75 v128_t vout89AB = wasm_v128_bitselect(vacc89AB, vmax, wasm_f32x4_le(vacc89AB, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 76 v128_t voutCDEF = wasm_v128_bitselect(vaccCDEF, vmax, wasm_f32x4_le(vaccCDEF, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 77 v128_t voutGHIJ = wasm_v128_bitselect(vaccGHIJ, vmax, wasm_f32x4_le(vaccGHIJ, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 78 v128_t voutKLMN = wasm_v128_bitselect(vaccKLMN, vmax, wasm_f32x4_le(vaccKLMN, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 79 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 80 v128_t voutSTUV = wasm_v128_bitselect(vaccSTUV, vmax, wasm_f32x4_le(vaccSTUV, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() 131 v128_t vout0123 = wasm_v128_bitselect(vacc0123, vmax, wasm_f32x4_le(vacc0123, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() [all …]
|
D | 32x4-minmax-neonfma.c | 33 const float32x4_t vmax = vld1q_dup_f32(¶ms->scalar.max); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 124 float32x4_t vout0123n0 = vminq_f32(vacc0123n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 125 float32x4_t vout4567n0 = vminq_f32(vacc4567n0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 126 float32x4_t vout89ABn0 = vminq_f32(vacc89ABn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 127 float32x4_t voutCDEFn0 = vminq_f32(vaccCDEFn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 128 float32x4_t voutGHIJn0 = vminq_f32(vaccGHIJn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 129 float32x4_t voutKLMNn0 = vminq_f32(vaccKLMNn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 130 float32x4_t voutOPQRn0 = vminq_f32(vaccOPQRn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 131 float32x4_t voutSTUVn0 = vminq_f32(vaccSTUVn0, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 132 float32x4_t vout0123n1 = vminq_f32(vacc0123n1, vmax); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() [all …]
|
D | 32x1-minmax-wasmsimd-x86-pipelined.c | 33 const v128_t vmax = wasm_v32x4_load_splat(¶ms->scalar.max); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 87 v128_t vout0123 = wasm_v128_bitselect(vacc0123, vmax, wasm_f32x4_le(vacc0123, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 88 v128_t vout4567 = wasm_v128_bitselect(vacc4567, vmax, wasm_f32x4_le(vacc4567, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 89 v128_t vout89AB = wasm_v128_bitselect(vacc89AB, vmax, wasm_f32x4_le(vacc89AB, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 90 v128_t voutCDEF = wasm_v128_bitselect(vaccCDEF, vmax, wasm_f32x4_le(vaccCDEF, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 91 v128_t voutGHIJ = wasm_v128_bitselect(vaccGHIJ, vmax, wasm_f32x4_le(vaccGHIJ, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 92 v128_t voutKLMN = wasm_v128_bitselect(vaccKLMN, vmax, wasm_f32x4_le(vaccKLMN, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 93 v128_t voutOPQR = wasm_v128_bitselect(vaccOPQR, vmax, wasm_f32x4_le(vaccOPQR, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 94 v128_t voutSTUV = wasm_v128_bitselect(vaccSTUV, vmax, wasm_f32x4_le(vaccSTUV, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 145 v128_t vout0123 = wasm_v128_bitselect(vacc0123, vmax, wasm_f32x4_le(vacc0123, vmax)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() [all …]
|
D | 8x1-minmax-scalar.c | 32 const float vmax = params->scalar.max; in xnn_f32_spmm_minmax_ukernel_8x1__scalar() local 72 float vout0x0 = math_min_f32(vacc0x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 73 float vout1x0 = math_min_f32(vacc1x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 74 float vout2x0 = math_min_f32(vacc2x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 75 float vout3x0 = math_min_f32(vacc3x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 76 float vout4x0 = math_min_f32(vacc4x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 77 float vout5x0 = math_min_f32(vacc5x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 78 float vout6x0 = math_min_f32(vacc6x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 79 float vout7x0 = math_min_f32(vacc7x0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() 141 float vout0 = math_min_f32(vacc0, vmax); in xnn_f32_spmm_minmax_ukernel_8x1__scalar() [all …]
|