/external/XNNPACK/src/f32-f16-vcvt/gen/ |
D | vcvt-scalar-fabsf-x4.c | 67 uint32_t vbias3 = vnonsignw3 + vexp_bias; in xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x4() local 80 vbias3 &= vexpw_max; in xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x4() 85 vbias3 = math_max_u32(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x4() 90 vf3 += uint32_as_float(vbias3); in xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x4()
|
D | vcvt-scalar-bitcast-x4.c | 63 uint32_t vbias3 = vnonsignw3 + vexp_bias; in xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4() local 72 vbias3 &= vexpw_max; in xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4() 81 vbias3 = math_max_u32(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4() 86 vf3 += uint32_as_float(vbias3); in xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4()
|
D | vcvt-neon-x16.c | 54 uint32x4_t vbias3 = vaddq_u32(vreinterpretq_u32_f32(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__neon_x16() local 68 vbias3 = vandq_u32(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__neon_x16() 79 vbias3 = vmaxq_u32(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__neon_x16() 84 vf3 = vaddq_f32(vf3, vreinterpretq_f32_u32(vbias3)); in xnn_f32_f16_vcvt_ukernel__neon_x16()
|
D | vcvt-wasmsimd-x16.c | 59 v128_t vbias3 = wasm_i32x4_add(vabsx3, vexp_bias); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x16() local 74 vbias3 = wasm_v128_and(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x16() 90 vbias3 = wasm_i16x8_max(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x16() 95 vf3 = wasm_f32x4_add(vf3, vbias3); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x16()
|
D | vcvt-neon-x24.c | 58 uint32x4_t vbias3 = vaddq_u32(vreinterpretq_u32_f32(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__neon_x24() local 78 vbias3 = vandq_u32(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__neon_x24() 94 vbias3 = vmaxq_u32(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__neon_x24() 101 vf3 = vaddq_f32(vf3, vreinterpretq_f32_u32(vbias3)); in xnn_f32_f16_vcvt_ukernel__neon_x24()
|
D | vcvt-sse41-x16.c | 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16() local 76 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 92 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 98 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
|
D | vcvt-avx-x16.c | 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16() local 76 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16() 92 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__avx_x16() 98 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__avx_x16()
|
D | vcvt-sse2-x16.c | 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() local 76 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 92 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 100 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
|
D | vcvt-wasmsimd-x24.c | 65 v128_t vbias3 = wasm_i32x4_add(vabsx3, vexp_bias); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x24() local 86 vbias3 = wasm_v128_and(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x24() 108 vbias3 = wasm_i16x8_max(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x24() 115 vf3 = wasm_f32x4_add(vf3, vbias3); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x24()
|
D | vcvt-neon-x32.c | 62 uint32x4_t vbias3 = vaddq_u32(vreinterpretq_u32_f32(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__neon_x32() local 88 vbias3 = vandq_u32(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__neon_x32() 109 vbias3 = vmaxq_u32(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__neon_x32() 118 vf3 = vaddq_f32(vf3, vreinterpretq_f32_u32(vbias3)); in xnn_f32_f16_vcvt_ukernel__neon_x32()
|
D | vcvt-sse2-x24.c | 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() local 88 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 110 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 121 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
|
D | vcvt-sse41-x24.c | 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() local 88 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 110 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 118 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
|
D | vcvt-avx-x24.c | 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() local 88 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() 110 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__avx_x24() 118 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__avx_x24()
|
D | vcvt-wasmsimd-x32.c | 71 v128_t vbias3 = wasm_i32x4_add(vabsx3, vexp_bias); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x32() local 98 vbias3 = wasm_v128_and(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x32() 126 vbias3 = wasm_i16x8_max(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x32() 135 vf3 = wasm_f32x4_add(vf3, vbias3); in xnn_f32_f16_vcvt_ukernel__wasmsimd_x32()
|
D | vcvt-avx-x32.c | 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() local 100 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x32() 128 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__avx_x32() 138 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__avx_x32()
|
D | vcvt-sse2-x32.c | 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() local 100 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 128 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 142 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
|
D | vcvt-sse41-x32.c | 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() local 100 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 128 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 138 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-scalar-2x.c | 68 const float vbias3 = w[7]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 73 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 77 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
D | c4-minmax-wasm-2x.c | 68 const float vbias3 = w[7]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 73 vacc0x3 = vacc0x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 77 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
/external/XNNPACK/src/amalgam/ |
D | avx.c | 1282 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() local 1303 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() 1325 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__avx_x24() 1333 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__avx_x24()
|
D | sse2.c | 1037 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() local 1052 vbias3 = _mm_and_si128(vbias3, vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 1068 vbias3 = _mm_max_epi16(vbias3, vbias_min); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 1076 vf3 = _mm_add_ps(vf3, _mm_castsi128_ps(vbias3)); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
|