/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2c4-wasmrelaxedsimd-fma.c | 128 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma() local 131 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 133 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 135 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 137 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_igemm_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-relu-wasmsimd.c | 128 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() local 131 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 133 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 137 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-relu-wasmrelaxedsimd-fma.c | 128 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() local 131 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 133 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 135 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 137 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-wasmsimd.c | 128 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() local 131 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 133 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 137 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmrelaxedsimd-fma.c | 130 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() local 133 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 135 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 137 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 139 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-minmax-wasmrelaxedsimd.c | 130 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() local 133 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 135 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 137 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 139 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmrelaxedsimd()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 130 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 133 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 135 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 137 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 139 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
D | 4x2c4-minmax-sse.c | 127 const __m128 vmask1 = _mm_cmpeq_ps(_mm_setzero_ps(), vb1); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() local 130 vacc0x1c4 = _mm_add_ps(vacc0x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va0), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 132 vacc1x1c4 = _mm_add_ps(vacc1x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 134 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va2), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 136 vacc3x1c4 = _mm_add_ps(vacc3x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va3), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 130 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 133 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 135 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 137 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 139 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2c4-relu-wasmrelaxedsimd-fma.c | 110 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() local 113 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 115 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 117 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma() 119 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-wasmsimd.c | 110 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() local 113 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 115 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 119 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-wasmrelaxedsimd-fma.c | 110 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma() local 113 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 115 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 117 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma() 119 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_gemm_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-minmax-sse.c | 109 const __m128 vmask1 = _mm_cmpeq_ps(_mm_setzero_ps(), vb1); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() local 112 vacc0x1c4 = _mm_add_ps(vacc0x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va0), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 114 vacc1x1c4 = _mm_add_ps(vacc1x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 116 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va2), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 118 vacc3x1c4 = _mm_add_ps(vacc3x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va3), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse()
|
D | 4x2c4-relu-wasmsimd.c | 110 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() local 113 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 115 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 119 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 112 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 115 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 117 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 119 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 121 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
D | 4x2c4-minmax-wasmrelaxedsimd.c | 112 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() local 115 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 117 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 119 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd() 121 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd()
|
D | 4x2c4-minmax-wasmrelaxedsimd-fma.c | 112 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() local 115 vacc0x1c4 = __builtin_wasm_fma_f32x4(vacc0x1c4, wasm_v128_andnot(va0, vmask1), vb1); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 117 vacc1x1c4 = __builtin_wasm_fma_f32x4(vacc1x1c4, wasm_v128_andnot(va1, vmask1), vb1); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 119 vacc2x1c4 = __builtin_wasm_fma_f32x4(vacc2x1c4, wasm_v128_andnot(va2, vmask1), vb1); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma() 121 vacc3x1c4 = __builtin_wasm_fma_f32x4(vacc3x1c4, wasm_v128_andnot(va3, vmask1), vb1); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmrelaxedsimd_fma()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 112 const v128_t vmask1 = wasm_f32x4_eq(vb1, vzero); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 115 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 117 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 119 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 121 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-sse2-int16-x16.c | 64 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x16() local 72 const __m128i vxmask2 = _mm_unpacklo_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x16() 75 const __m128i vxmask3 = _mm_unpackhi_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x16()
|
D | vcvt-sse2-int16-x24.c | 73 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24() local 82 const __m128i vxmask2 = _mm_unpacklo_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24() 85 const __m128i vxmask3 = _mm_unpackhi_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x24()
|
D | vcvt-sse41-int16-x16.c | 64 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16() local 71 _mm_blendv_epi8(vdenorm2, vnorm2, _mm_cvtepi16_epi32(vmask1))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16() 73 _mm_blendv_epi8(vdenorm3, vnorm3, _mm_unpackhi_epi16(vmask1, vmask1))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x16()
|
D | vcvt-avx-int16-x16.c | 64 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int16_x16() local 71 _mm_blendv_epi8(vdenorm2, vnorm2, _mm_cvtepi16_epi32(vmask1))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x16() 73 _mm_blendv_epi8(vdenorm3, vnorm3, _mm_unpackhi_epi16(vmask1, vmask1))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x16()
|
D | vcvt-sse2-int16-x32.c | 82 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() local 92 const __m128i vxmask2 = _mm_unpacklo_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32() 95 const __m128i vxmask3 = _mm_unpackhi_epi16(vmask1, vmask1); in xnn_f16_f32_vcvt_ukernel__sse2_int16_x32()
|
D | vcvt-sse41-int16-x24.c | 73 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() local 81 _mm_blendv_epi8(vdenorm2, vnorm2, _mm_cvtepi16_epi32(vmask1))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 83 _mm_blendv_epi8(vdenorm3, vnorm3, _mm_unpackhi_epi16(vmask1, vmask1))); in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
|
D | vcvt-avx-int16-x24.c | 73 const __m128i vmask1 = _mm_cmpgt_epi16(vnonsign1, vdenorm_cutoff); in xnn_f16_f32_vcvt_ukernel__avx_int16_x24() local 81 _mm_blendv_epi8(vdenorm2, vnorm2, _mm_cvtepi16_epi32(vmask1))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x24() 83 _mm_blendv_epi8(vdenorm3, vnorm3, _mm_unpackhi_epi16(vmask1, vmask1))); in xnn_f16_f32_vcvt_ukernel__avx_int16_x24()
|