/external/ComputeLibrary/src/core/NEON/ |
D | SVEAsymm.h | 71 const auto vscale = svdup_n_f32(scale); in svdequantize_z() local 73 …_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlb_u16(qv))), voffset)), vscale), in svdequantize_z() 74 …_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlb_u16(qv))), voffset)), vscale), in svdequantize_z() 75 …_s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlb_u32(svmovlt_u16(qv))), voffset)), vscale), in svdequantize_z() 76 …s32_z(pg, svsub_s32_z(pg, svreinterpret_s32_u32(svmovlt_u32(svmovlt_u16(qv))), voffset)), vscale)); in svdequantize_z() 105 const auto vscale = svdup_n_f32(scale); in svdequantize_z() local 107 …mul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlb_s16(qv)), voffset)), vscale), in svdequantize_z() 108 …mul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlb_s16(qv)), voffset)), vscale), in svdequantize_z() 109 …mul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlb_s32(svmovlt_s16(qv)), voffset)), vscale), in svdequantize_z() 110 …ul_f32_z(pg, svcvt_f32_s32_z(pg, svsub_s32_z(pg, svmovlt_s32(svmovlt_s16(qv)), voffset)), vscale)); in svdequantize_z() [all …]
|
D | NEAsymm.h | 420 const float32x4_t vscale = vdupq_n_f32(scale); in vdequantize() local 424 …f32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(qv)))), voffset)), vscale), in vdequantize() 425 …32_s32(vsubq_s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(qv)))), voffset)), vscale), in vdequantize() 443 const float32x4_t vscale = vdupq_n_f32(scale); in vdequantize() local 447 … vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_low_s16(vmovl_s8(qv))), voffset)), vscale), in vdequantize() 448 … vmulq_f32(vcvtq_f32_s32(vsubq_s32(vmovl_s16(vget_high_s16(vmovl_s8(qv))), voffset)), vscale), in vdequantize() 466 const float32x4_t vscale = vdupq_n_f32(scale); in vdequantize() local 470 …_s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), in vdequantize() 471 …s32(vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(vmovl_u8(vget_low_u8(qv))))), voffset)), vscale), in vdequantize() 472 …s32(vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vget_high_u8(qv))))), voffset)), vscale), in vdequantize() [all …]
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x32.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() local 62 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 63 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 64 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 65 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 66 vyGHIJ = wasm_f32x4_mul(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 67 vyKLMN = wasm_f32x4_mul(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 68 vyOPQR = wasm_f32x4_mul(vyOPQR, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 69 vySTUV = wasm_f32x4_mul(vySTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() 92 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32() [all …]
|
D | vcvt-neon-x32.c | 31 const float32x4_t vscale = vld1q_dup_f32(¶ms->neon.scale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() local 61 vy0123 = vmulq_f32(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 62 vy4567 = vmulq_f32(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 63 vy89AB = vmulq_f32(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 64 vyCDEF = vmulq_f32(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 65 vyGHIJ = vmulq_f32(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 66 vyKLMN = vmulq_f32(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 67 vyOPQR = vmulq_f32(vyOPQR, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 68 vySTUV = vmulq_f32(vySTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() 90 vy_lo = vmulq_f32(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x32() [all …]
|
D | vcvt-sse2-x32.c | 32 const __m128 vscale = _mm_load_ps(params->sse2.scale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() local 65 vy0123 = _mm_mul_ps(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 66 vy4567 = _mm_mul_ps(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 67 vy89AB = _mm_mul_ps(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 68 vyCDEF = _mm_mul_ps(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 69 vyGHIJ = _mm_mul_ps(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 70 vyKLMN = _mm_mul_ps(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 71 vyOPQR = _mm_mul_ps(vyOPQR, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 72 vySTUV = _mm_mul_ps(vySTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() 95 vy_lo = _mm_mul_ps(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-wasmsimd-x24.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() local 56 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 57 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 58 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 59 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 60 vyGHIJ = wasm_f32x4_mul(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 61 vyKLMN = wasm_f32x4_mul(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 82 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 83 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() 99 vy = wasm_f32x4_mul(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x24() [all …]
|
D | vcvt-sse41-x32.c | 31 const __m128 vscale = _mm_load_ps(params->sse4.scale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() local 61 vy0123 = _mm_mul_ps(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 62 vy4567 = _mm_mul_ps(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 63 vy89AB = _mm_mul_ps(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 64 vyCDEF = _mm_mul_ps(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 65 vyGHIJ = _mm_mul_ps(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 66 vyKLMN = _mm_mul_ps(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 67 vyOPQR = _mm_mul_ps(vyOPQR, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 68 vySTUV = _mm_mul_ps(vySTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() 86 vy = _mm_mul_ps(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__sse41_x32() [all …]
|
D | vcvt-sse2-x24.c | 32 const __m128 vscale = _mm_load_ps(params->sse2.scale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() local 59 vy0123 = _mm_mul_ps(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 60 vy4567 = _mm_mul_ps(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 61 vy89AB = _mm_mul_ps(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 62 vyCDEF = _mm_mul_ps(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 63 vyGHIJ = _mm_mul_ps(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 64 vyKLMN = _mm_mul_ps(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 85 vy_lo = _mm_mul_ps(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 86 vy_hi = _mm_mul_ps(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() 101 vy = _mm_mul_ps(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__sse2_x24() [all …]
|
D | vcvt-neon-x24.c | 31 const float32x4_t vscale = vld1q_dup_f32(¶ms->neon.scale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() local 55 vy0123 = vmulq_f32(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 56 vy4567 = vmulq_f32(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 57 vy89AB = vmulq_f32(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 58 vyCDEF = vmulq_f32(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 59 vyGHIJ = vmulq_f32(vyGHIJ, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 60 vyKLMN = vmulq_f32(vyKLMN, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 80 vy_lo = vmulq_f32(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 81 vy_hi = vmulq_f32(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() 98 vy = vmulq_f32(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__neon_x24() [all …]
|
D | vcvt-wasmsimd-x16.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() local 50 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 51 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 52 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 53 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 72 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 73 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 89 vy = wasm_f32x4_mul(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16() 95 vy = wasm_f32x4_mul(vy, vscale); in xnn_qu8_f32_vcvt_ukernel__wasmsimd_x16()
|
/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-wasmsimd-x32.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() local 62 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 63 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 64 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 65 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 66 vyGHIJ = wasm_f32x4_mul(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 67 vyKLMN = wasm_f32x4_mul(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 68 vyOPQR = wasm_f32x4_mul(vyOPQR, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 69 vySTUV = wasm_f32x4_mul(vySTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() 92 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32() [all …]
|
D | vcvt-neon-x32.c | 31 const float32x4_t vscale = vld1q_dup_f32(¶ms->neon.scale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() local 61 vy0123 = vmulq_f32(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 62 vy4567 = vmulq_f32(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 63 vy89AB = vmulq_f32(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 64 vyCDEF = vmulq_f32(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 65 vyGHIJ = vmulq_f32(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 66 vyKLMN = vmulq_f32(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 67 vyOPQR = vmulq_f32(vyOPQR, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 68 vySTUV = vmulq_f32(vySTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() 90 vy_lo = vmulq_f32(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x32() [all …]
|
D | vcvt-wasmsimd-x24.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() local 56 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 57 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 58 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 59 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 60 vyGHIJ = wasm_f32x4_mul(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 61 vyKLMN = wasm_f32x4_mul(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 82 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 83 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() 99 vy = wasm_f32x4_mul(vy, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x24() [all …]
|
D | vcvt-sse2-x32.c | 33 const __m128 vscale = _mm_load_ps(params->sse2.scale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() local 70 vy0123 = _mm_mul_ps(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 71 vy4567 = _mm_mul_ps(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 72 vy89AB = _mm_mul_ps(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 73 vyCDEF = _mm_mul_ps(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 74 vyGHIJ = _mm_mul_ps(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 75 vyKLMN = _mm_mul_ps(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 76 vyOPQR = _mm_mul_ps(vyOPQR, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 77 vySTUV = _mm_mul_ps(vySTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() 101 vy_lo = _mm_mul_ps(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-sse41-x32.c | 31 const __m128 vscale = _mm_load_ps(params->sse4.scale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() local 61 vy0123 = _mm_mul_ps(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 62 vy4567 = _mm_mul_ps(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 63 vy89AB = _mm_mul_ps(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 64 vyCDEF = _mm_mul_ps(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 65 vyGHIJ = _mm_mul_ps(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 66 vyKLMN = _mm_mul_ps(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 67 vyOPQR = _mm_mul_ps(vyOPQR, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 68 vySTUV = _mm_mul_ps(vySTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() 86 vy = _mm_mul_ps(vy, vscale); in xnn_qs8_f32_vcvt_ukernel__sse41_x32() [all …]
|
D | vcvt-neon-x24.c | 31 const float32x4_t vscale = vld1q_dup_f32(¶ms->neon.scale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() local 55 vy0123 = vmulq_f32(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 56 vy4567 = vmulq_f32(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 57 vy89AB = vmulq_f32(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 58 vyCDEF = vmulq_f32(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 59 vyGHIJ = vmulq_f32(vyGHIJ, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 60 vyKLMN = vmulq_f32(vyKLMN, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 80 vy_lo = vmulq_f32(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 81 vy_hi = vmulq_f32(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() 98 vy = vmulq_f32(vy, vscale); in xnn_qs8_f32_vcvt_ukernel__neon_x24() [all …]
|
D | vcvt-wasmsimd-x16.c | 31 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd.scale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() local 50 vy0123 = wasm_f32x4_mul(vy0123, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 51 vy4567 = wasm_f32x4_mul(vy4567, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 52 vy89AB = wasm_f32x4_mul(vy89AB, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 53 vyCDEF = wasm_f32x4_mul(vyCDEF, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 72 vy_lo = wasm_f32x4_mul(vy_lo, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 73 vy_hi = wasm_f32x4_mul(vy_hi, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 89 vy = wasm_f32x4_mul(vy, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16() 95 vy = wasm_f32x4_mul(vy, vscale); in xnn_qs8_f32_vcvt_ukernel__wasmsimd_x16()
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 30 const float32x4_t vscale = vld1q_dup_f32(¶ms->neonv8.scale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local 44 vx0123 = vmulq_f32(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 45 vx4567 = vmulq_f32(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 46 vx89AB = vmulq_f32(vx89AB, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 47 vxCDEF = vmulq_f32(vxCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 48 vxGHIJ = vmulq_f32(vxGHIJ, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 49 vxKLMN = vmulq_f32(vxKLMN, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 50 vxOPQR = vmulq_f32(vxOPQR, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 88 vx_lo = vmulq_f32(vx_lo, vscale); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() [all …]
|
D | vcvt-sse41-x32.c | 30 const __m128 vscale = _mm_load_ps(params->sse4.scale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() local 46 vx0123 = _mm_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 47 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 48 vx89AB = _mm_mul_ps(vx89AB, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 49 vxCDEF = _mm_mul_ps(vxCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 50 vxGHIJ = _mm_mul_ps(vxGHIJ, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 51 vxKLMN = _mm_mul_ps(vxKLMN, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 52 vxOPQR = _mm_mul_ps(vxOPQR, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 99 vx_lo = _mm_mul_ps(vx_lo, vscale); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() [all …]
|
D | vcvt-sse2-x32.c | 30 const __m128 vscale = _mm_load_ps(params->sse2.scale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local 46 vx0123 = _mm_mul_ps(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 47 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 48 vx89AB = _mm_mul_ps(vx89AB, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 49 vxCDEF = _mm_mul_ps(vxCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 50 vxGHIJ = _mm_mul_ps(vxGHIJ, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 51 vxKLMN = _mm_mul_ps(vxKLMN, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 52 vxOPQR = _mm_mul_ps(vxOPQR, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 101 vx_lo = _mm_mul_ps(vx_lo, vscale); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-wasmsimd-cvt-x32.c | 30 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd_cvt.scale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local 45 vx0123 = wasm_f32x4_mul(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 46 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 47 vx89AB = wasm_f32x4_mul(vx89AB, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 48 vxCDEF = wasm_f32x4_mul(vxCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 49 vxGHIJ = wasm_f32x4_mul(vxGHIJ, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 50 vxKLMN = wasm_f32x4_mul(vxKLMN, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 51 vxOPQR = wasm_f32x4_mul(vxOPQR, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 100 vx_lo = wasm_f32x4_mul(vx_lo, vscale); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() [all …]
|
D | vcvt-neon-x32.c | 30 const float32x4_t vscale = vld1q_dup_f32(¶ms->neon.scale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() local 45 vx0123 = vmulq_f32(vx0123, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 46 vx4567 = vmulq_f32(vx4567, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 47 vx89AB = vmulq_f32(vx89AB, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 48 vxCDEF = vmulq_f32(vxCDEF, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 49 vxGHIJ = vmulq_f32(vxGHIJ, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 50 vxKLMN = vmulq_f32(vxKLMN, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 51 vxOPQR = vmulq_f32(vxOPQR, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 52 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 93 vx_lo = vmulq_f32(vx_lo, vscale); in xnn_f32_qs8_vcvt_ukernel__neon_x32() [all …]
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 30 const float32x4_t vscale = vld1q_dup_f32(¶ms->neonv8.scale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local 44 vx0123 = vmulq_f32(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 45 vx4567 = vmulq_f32(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 46 vx89AB = vmulq_f32(vx89AB, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 47 vxCDEF = vmulq_f32(vxCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 48 vxGHIJ = vmulq_f32(vxGHIJ, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 49 vxKLMN = vmulq_f32(vxKLMN, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 50 vxOPQR = vmulq_f32(vxOPQR, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 51 vxSTUV = vmulq_f32(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 88 vx_lo = vmulq_f32(vx_lo, vscale); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() [all …]
|
D | vcvt-sse2-x32.c | 30 const __m128 vscale = _mm_load_ps(params->sse2.scale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local 46 vx0123 = _mm_mul_ps(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 47 vx4567 = _mm_mul_ps(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 48 vx89AB = _mm_mul_ps(vx89AB, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 49 vxCDEF = _mm_mul_ps(vxCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 50 vxGHIJ = _mm_mul_ps(vxGHIJ, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 51 vxKLMN = _mm_mul_ps(vxKLMN, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 52 vxOPQR = _mm_mul_ps(vxOPQR, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 53 vxSTUV = _mm_mul_ps(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 99 vx_lo = _mm_mul_ps(vx_lo, vscale); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-wasmsimd-cvt-x32.c | 30 const v128_t vscale = wasm_v128_load64_splat(params->wasmsimd_cvt.scale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local 45 vx0123 = wasm_f32x4_mul(vx0123, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 46 vx4567 = wasm_f32x4_mul(vx4567, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 47 vx89AB = wasm_f32x4_mul(vx89AB, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 48 vxCDEF = wasm_f32x4_mul(vxCDEF, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 49 vxGHIJ = wasm_f32x4_mul(vxGHIJ, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 50 vxKLMN = wasm_f32x4_mul(vxKLMN, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 51 vxOPQR = wasm_f32x4_mul(vxOPQR, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 52 vxSTUV = wasm_f32x4_mul(vxSTUV, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 100 vx_lo = wasm_f32x4_mul(vx_lo, vscale); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() [all …]
|