/external/XNNPACK/src/f32-ppmm/gen/ |
D | 2x4-minmax-scalar.c | 46 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 67 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 80 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 90 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 97 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 117 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 3x3-minmax-scalar.c | 49 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() local 74 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 88 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 99 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 108 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar() 130 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_3x3__scalar()
|
D | 4x4-minmax-scalar.c | 54 float vacc1x2 = vacc0x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 89 vacc1x2 += va1 * vb2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 110 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 128 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 147 c1[2] = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 175 vacc1x0 = vacc1x2; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-wasm-2x.c | 60 float vacc1x2 = i1[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 75 vacc1x2 = vacc1x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 84 vacc1x2 = __builtin_wasm_max_f32(vacc1x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 93 vacc1x2 = __builtin_wasm_min_f32(vacc1x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 103 o1[2] = vacc1x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 60 float vacc1x2 = i1[2]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 75 vacc1x2 = vacc1x2 * vscale2 + vbias2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 84 vacc1x2 = math_max_f32(vacc1x2, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 93 vacc1x2 = math_min_f32(vacc1x2, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 103 o1[2] = vacc1x2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x4c8-xw-minmax-wasmsimd.c | 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 107 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 93 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 100 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 111 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 57 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 90 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 91 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 107 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 131 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 111 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 120 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 135 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 63 v128_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 108 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 109 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 131 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 2x4c8-xw-minmax-sse2.c | 56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local 80 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() 92 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
|
D | 2x4c8-minmax-sse2-ld128.c | 56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local 85 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() 94 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local 83 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() 96 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x4c8-xw-minmax-xop.c | 61 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local 85 vacc1x2 = _mm_maddd_epi16(vxa1, vxb2, vacc1x2); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
|
D | 2x8c8-minmax-neon-mull-padal.c | 60 int32x4_t vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 88 vacc1x2 = vpadalq_s16(vacc1x2, vprod1x2); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 124 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 148 const int32x2_t vpsum1x2 = vadd_s32(vget_low_s32(vacc1x2), vget_high_s32(vacc1x2)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x4c8-minmax-sse41-ld64.c | 56 __m128i vacc1x2 = vacc0x2; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local 83 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x4c8-minmax-wasmsimd-ld128.c | 58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 106 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 113 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 126 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 58 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 103 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 104 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 122 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 126 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 135 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 152 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 62 v128_t vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 123 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_low_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 124 vacc1x2 = wasm_i32x4_add(vacc1x2, wasm_i32x4_widen_high_i16x8(vprod1x2)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 148 …02 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x0, vacc1x2, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x0, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 2x4c8-minmax-sse2-ld128.c | 57 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() local 98 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() 109 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 57 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() local 96 vacc1x2 = _mm_add_epi32(vacc1x2, _mm_madd_epi16(vxa1, vxb2)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() 111 … vacc1x02 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x0, vacc1x2), _mm_unpackhi_epi32(vacc1x0, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x4c8-minmax-xop-ld64.c | 62 __m128i vacc1x2 = vacc0x2; in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local 101 vacc1x2 = _mm_maddd_epi16(vxa1, vxb2, vacc1x2); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 74 float vacc1x2 = __builtin_wasm_max_f32(vi1x2, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() local 85 vacc1x2 += vi1x2 * vw2; in xnn_f32_prelu_ukernel__wasm_2x4() 95 o1[2] = vacc1x2; in xnn_f32_prelu_ukernel__wasm_2x4()
|