/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-wasm-2x.c | 61 float vacc1x3 = i1[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local 76 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 85 vacc1x3 = __builtin_wasm_max_f32(vacc1x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 94 vacc1x3 = __builtin_wasm_min_f32(vacc1x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() 104 o1[3] = vacc1x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
|
D | c4-minmax-scalar-2x.c | 61 float vacc1x3 = i1[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local 76 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 85 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 94 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() 104 o1[3] = vacc1x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 2x4-minmax-scalar.c | 47 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local 69 vacc1x3 += va1 * vb3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 82 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 92 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() 98 c1[3] = vacc1x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
|
D | 4x4-minmax-scalar.c | 55 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local 93 vacc1x3 += va1 * vb3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 114 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 132 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() 148 c1[3] = vacc1x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x4c8-xw-minmax-wasmsimd.c | 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() 108 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 103 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 112 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 108 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() 132 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 126 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 136 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 132 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 2x4c8-xw-minmax-sse2.c | 57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local 84 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() 93 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
|
D | 2x4c8-minmax-sse2-ld128.c | 57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local 86 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() 95 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local 88 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() 97 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x4c8-xw-minmax-xop.c | 62 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local 89 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
|
D | 2x8c8-minmax-neon-mull-padal.c | 61 int32x4_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local 93 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 124 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() 149 const int32x2_t vpsum1x3 = vadd_s32(vget_low_s32(vacc1x3), vget_high_s32(vacc1x3)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
|
D | 2x4c8-minmax-sse41-ld64.c | 57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local 88 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
|
D | 2x4c8-xw-minmax-sse41.c | 57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() local 84 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x4c8-minmax-wasmsimd-ld128.c | 59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 116 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() 127 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 111 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() 123 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 141 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() 153 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 135 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() 149 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 2x4c8-minmax-sse2-ld128.c | 58 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() local 99 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() 110 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 58 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() local 101 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() 112 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x4c8-minmax-xop-ld64.c | 63 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local 106 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 76 float vacc1x3 = __builtin_wasm_max_f32(vi1x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() local 86 vacc1x3 += vi1x3 * vw3; in xnn_f32_prelu_ukernel__wasm_2x4() 96 o1[3] = vacc1x3; in xnn_f32_prelu_ukernel__wasm_2x4()
|