/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x8.c | 38 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x8() local 41 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x8() 44 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x8() 47 wasm_v128_store(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x8()
|
D | vcvt-sse2-x8.c | 42 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x8() local 45 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qs8_f32_vcvt_ukernel__sse2_x8() 48 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x8() 51 _mm_storeu_ps(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__sse2_x8()
|
D | vcvt-wasmsimd-x16.c | 67 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() local 70 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 73 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 76 wasm_v128_store(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-wasmsimd-x24.c | 77 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() local 80 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 83 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 86 wasm_v128_store(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-sse2-x16.c | 74 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() local 77 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 80 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x16() 83 _mm_storeu_ps(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-wasmsimd-x32.c | 87 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() local 90 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 93 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 96 wasm_v128_store(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32()
|
D | vcvt-sse2-x24.c | 85 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() local 88 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 91 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x24() 94 _mm_storeu_ps(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse2-x32.c | 96 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() local 99 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 102 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 105 _mm_storeu_ps(y + 4, vy_hi); in xnn_qs8_f32_vcvt_ukernel__sse2_x32()
|
D | vcvt-neon-x8.c | 41 float32x4_t vy_hi = vcvtq_f32_s32(vwx_hi); in xnn_qs8_f32_vcvt_ukernel__neon_x8() local 44 vy_hi = vmulq_f32(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x8() 47 vst1q_f32(y, vy_hi); y += 4; in xnn_qs8_f32_vcvt_ukernel__neon_x8()
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x8.c | 38 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x8() local 41 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x8() 44 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x8() 47 wasm_v128_store(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x8()
|
D | vcvt-sse2-x8.c | 40 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x8() local 43 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qu8_f32_vcvt_ukernel__sse2_x8() 46 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x8() 49 _mm_storeu_ps(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__sse2_x8()
|
D | vcvt-wasmsimd-x16.c | 67 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() local 70 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 73 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 76 wasm_v128_store(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-wasmsimd-x24.c | 77 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() local 80 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 83 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 86 wasm_v128_store(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-sse2-x16.c | 70 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() local 73 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 76 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x16() 79 _mm_storeu_ps(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__sse2_x16()
|
D | vcvt-wasmsimd-x32.c | 87 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() local 90 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 93 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 96 wasm_v128_store(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32()
|
D | vcvt-sse2-x24.c | 80 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() local 83 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 86 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 89 _mm_storeu_ps(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse2-x32.c | 90 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() local 93 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 96 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 99 _mm_storeu_ps(y + 4, vy_hi); in xnn_qu8_f32_vcvt_ukernel__sse2_x32()
|
D | vcvt-neon-x8.c | 41 float32x4_t vy_hi = vcvtq_f32_s32(vwx_hi); in xnn_qu8_f32_vcvt_ukernel__neon_x8() local 44 vy_hi = vmulq_f32(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x8() 47 vst1q_f32(y, vy_hi); y += 4; in xnn_qu8_f32_vcvt_ukernel__neon_x8()
|
/external/XNNPACK/src/math/ |
D | cvt-f32-qs8-neon.c | 33 int32x4_t vy_hi = vreinterpretq_s32_f32(vx_hi); in xnn_math_f32_qs8_cvt__neon() local 36 vy_hi = vqsubq_s32(vy_hi, vimagic); in xnn_math_f32_qs8_cvt__neon() 38 const int16x8_t vy = vcombine_s16(vqmovn_s32(vy_lo), vqmovn_s32(vy_hi)); in xnn_math_f32_qs8_cvt__neon()
|
D | cvt-f32-qu8-neon.c | 33 int32x4_t vy_hi = vreinterpretq_s32_f32(vx_hi); in xnn_math_f32_qu8_cvt__neon() local 36 vy_hi = vqsubq_s32(vy_hi, vimagic); in xnn_math_f32_qu8_cvt__neon() 38 const int16x8_t vy = vcombine_s16(vqmovn_s32(vy_lo), vqmovn_s32(vy_hi)); in xnn_math_f32_qu8_cvt__neon()
|
/external/XNNPACK/src/qs8-f32-vcvt/ |
D | wasmsimd.c.in | 64 v128_t vy_hi = wasm_i32x4_extend_high_i16x8(vx); variable 67 vy_hi = wasm_f32x4_convert_i32x4(vy_hi); 70 vy_hi = wasm_f32x4_mul(vy_hi, vscale); 73 wasm_v128_store(y + 4, vy_hi);
|
D | sse2.c.in | 74 __m128 vy_hi = _mm_castsi128_ps(_mm_unpackhi_epi16(vx, vmagic_exp)); variable 77 vy_hi = _mm_sub_ps(vy_hi, vmagic_bias); 80 vy_hi = _mm_mul_ps(vy_hi, vscale); 83 _mm_storeu_ps(y + 4, vy_hi);
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-sse41-x8.c | 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local 49 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8() local 71 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qs8_vcvt_ukernel__sse41_x8()
|
D | vcvt-sse2-x8.c | 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local 49 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8() local 71 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qs8_vcvt_ukernel__sse2_x8()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-sse2-x8.c | 47 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local 49 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() 69 const __m128i vy_hi = _mm_cvtps_epi32(vx_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8() local 71 __m128i vy = _mm_packs_epi32(vy_lo, vy_hi); in xnn_f32_qu8_vcvt_ukernel__sse2_x8()
|