Home
last modified time | relevance | path

Searched refs:vacc1x3 (Results 1 – 25 of 93) sorted by relevance

1234

/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc4-minmax-wasm-2x.c61 float vacc1x3 = i1[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x() local
76 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
85 vacc1x3 = __builtin_wasm_max_f32(vacc1x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
94 vacc1x3 = __builtin_wasm_min_f32(vacc1x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
104 o1[3] = vacc1x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x()
Dc4-minmax-scalar-2x.c61 float vacc1x3 = i1[3]; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x() local
76 vacc1x3 = vacc1x3 * vscale3 + vbias3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
85 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
94 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
104 o1[3] = vacc1x3; in xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x()
/external/XNNPACK/src/f32-ppmm/gen/
D2x4-minmax-scalar.c47 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar() local
69 vacc1x3 += va1 * vb3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
82 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
92 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
98 c1[3] = vacc1x3; in xnn_f32_ppmm_minmax_ukernel_2x4__scalar()
D4x4-minmax-scalar.c55 float vacc1x3 = vacc0x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar() local
93 vacc1x3 += va1 * vb3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
114 vacc1x3 = math_min_f32(vacc1x3, vmax); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
132 vacc1x3 = math_max_f32(vacc1x3, vmin); in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
148 c1[3] = vacc1x3; in xnn_f32_ppmm_minmax_ukernel_4x4__scalar()
/external/XNNPACK/src/qs8-gemm/gen/
D2x4c8-xw-minmax-wasmsimd.c58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local
98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
108 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
D2x4c8-minmax-wasmsimd-ld128.c58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
103 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
112 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c58 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
98 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
99 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
108 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x4c8-xw-minmax-wasmsimd.c64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
132 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
D3x4c8-minmax-wasmsimd-ld128.c64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
126 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
136 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c64 v128_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
119 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
120 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
132 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D2x4c8-xw-minmax-sse2.c57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local
84 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
93 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
D2x4c8-minmax-sse2-ld128.c57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local
86 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
95 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
D2x4c8-minmax-sse2-ld64.c57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local
88 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
97 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
D2x4c8-xw-minmax-xop.c62 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() local
89 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
D2x8c8-minmax-neon-mull-padal.c61 int32x4_t vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
93 vacc1x3 = vpadalq_s16(vacc1x3, vprod1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
124 const int32x4_t vsum1x23 = vpaddq_s32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
149 const int32x2_t vpsum1x3 = vadd_s32(vget_low_s32(vacc1x3), vget_high_s32(vacc1x3)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x4c8-minmax-sse41-ld64.c57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() local
88 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
D2x4c8-xw-minmax-sse41.c57 __m128i vacc1x3 = vacc0x3; in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() local
84 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
/external/XNNPACK/src/qs8-igemm/gen/
D2x4c8-minmax-wasmsimd-ld128.c59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local
112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
116 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
127 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
D2x4c8-minmax-wasmsimd-ld64.c59 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local
111 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
112 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
123 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
D3x4c8-minmax-wasmsimd-ld128.c63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local
134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
141 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
153 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
D3x4c8-minmax-wasmsimd-ld64.c63 v128_t vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local
134 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_low_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
135 vacc1x3 = wasm_i32x4_add(vacc1x3, wasm_i32x4_widen_high_i16x8(vprod1x3)); in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
149 …13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x1, in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
D2x4c8-minmax-sse2-ld128.c58 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() local
99 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
110 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
D2x4c8-minmax-sse2-ld64.c58 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() local
101 vacc1x3 = _mm_add_epi32(vacc1x3, _mm_madd_epi16(vxa1, vxb3)); in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
112 … vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(vacc1x1, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
D2x4c8-minmax-xop-ld64.c63 __m128i vacc1x3 = vacc0x3; in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() local
106 vacc1x3 = _mm_maddd_epi16(vxa1, vxb3, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
/external/XNNPACK/src/f32-prelu/gen/
Dwasm-2x4.c76 float vacc1x3 = __builtin_wasm_max_f32(vi1x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() local
86 vacc1x3 += vi1x3 * vw3; in xnn_f32_prelu_ukernel__wasm_2x4()
96 o1[3] = vacc1x3; in xnn_f32_prelu_ukernel__wasm_2x4()

1234