Home
last modified time | relevance | path

Searched refs:vacc4567 (Results 1 – 25 of 209) sorted by relevance

123456789

/external/XNNPACK/src/qs8-dwconv/gen/
Dup8x9-minmax-neon-mul16.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() local
97 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
103 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
115 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
133 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
139 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16()
[all …]
Dup16x9-minmax-neon-mul16.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16() local
101 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
111 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
121 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
131 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
141 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
151 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
161 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
171 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__neon_mul16()
[all …]
Dup8x9-minmax-wasmsimd-mul16.c84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16() local
94 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
103 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
112 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
121 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
130 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
139 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
148 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
157 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
166 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__wasmsimd_mul16()
[all …]
Dup16x9-minmax-wasmsimd-mul16.c84 v128_t vacc4567 = wasm_v128_load((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local
99 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
113 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
127 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
141 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
155 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
169 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
183 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
197 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
211 vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_widen_high_i16x8(vprod8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16()
[all …]
Dup24x9-minmax-neon-mul16.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16() local
105 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
119 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
133 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
147 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
161 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
175 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
189 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
203 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up24x9__neon_mul16()
[all …]
Dup8x9-minmax-sse41-mul16.c84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local
98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
[all …]
Dup32x9-minmax-neon-mul16.c90 int32x4_t vacc4567 = vld1q_s32(w); w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16() local
109 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi0x01234567), vget_high_s16(vk0x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
127 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi1x01234567), vget_high_s16(vk1x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
145 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi2x01234567), vget_high_s16(vk2x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
163 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi3x01234567), vget_high_s16(vk3x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
181 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi4x01234567), vget_high_s16(vk4x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
199 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi5x01234567), vget_high_s16(vk5x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
217 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi6x01234567), vget_high_s16(vk6x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
235 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi7x01234567), vget_high_s16(vk7x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
253 vacc4567 = vmlal_s16(vacc4567, vget_high_s16(vi8x01234567), vget_high_s16(vk8x01234567)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__neon_mul16()
[all …]
Dup8x9-minmax-sse2-mul16.c84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local
98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
[all …]
Dup8x9-minmax-ssse3-mul16.c84 __m128i vacc4567 = _mm_loadu_si128((const __m128i*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local
98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp0x01234567lo, vp0x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
111 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp1x01234567lo, vp1x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
124 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp2x01234567lo, vp2x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
137 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp3x01234567lo, vp3x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
150 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp4x01234567lo, vp4x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
163 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp5x01234567lo, vp5x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
176 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp6x01234567lo, vp6x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
189 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp7x01234567lo, vp7x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
202 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-neon-ld64-x8.c42 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() local
45 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
51 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
53 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
72 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8() local
75 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
78 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
81 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
83 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x8()
Dminmax-xop-mul32-ld32-x8.c49 __m128i vacc4567 = _mm_macc_epi32(vx4567, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local
52 vacc4567 = _mm_macc_epi32(vy4567, vy_multiplier, vacc4567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
55 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
58vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
60 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
79 __m128i vacc4567 = _mm_macc_epi32(vx4567, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() local
82 vacc4567 = _mm_macc_epi32(vy4567, vy_multiplier, vacc4567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
85 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
88vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
90 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
Dminmax-wasmsimd-x8.c41 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local
44vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
47 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
50vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
52 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
69 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8() local
72vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
75 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
78vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
80 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x8()
Dminmax-neon-ld64-x16.c46 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() local
51 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
56 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
61 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
65 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
85 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16() local
88 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
91 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
94 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
96 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x16()
Dminmax-sse41-mul32-ld32-x8.c44 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local
47 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
50 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
53vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
55 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
74 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() local
77 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
80 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
83vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
85 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
Dminmax-wasmsimd-x16.c43 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() local
48vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
53 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
58vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
62 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
82 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16() local
85vacc4567 = wasm_i32x4_add(vacc4567, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(vy01234567), vy_mul… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
88 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
91vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
93 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vadd_minmax_ukernel__wasmsimd_x16()
Dminmax-sse41-mul16-ld64-x8.c55 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
58 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
61 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
64vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
66 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
95 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() local
98 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
101 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
104vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
106 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-neon-ld64-x24.c50 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() local
57 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
64 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
71 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
77 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
102 int32x4_t vacc4567 = vmulq_s32(vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24() local
105 vacc4567 = vmlaq_s32(vacc4567, vmovl_s16(vget_high_s16(vey01234567)), vy_multiplier); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
108 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
111 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
113 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vadd_minmax_ukernel__neon_ld64_x24()
Dminmax-xop-mul32-ld32-x16.c53 __m128i vacc4567 = _mm_macc_epi32(vx4567, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() local
58 vacc4567 = _mm_macc_epi32(vy4567, vy_multiplier, vacc4567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
63 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
68vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
72 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
96 __m128i vacc4567 = _mm_macc_epi32(vx4567, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() local
99 vacc4567 = _mm_macc_epi32(vy4567, vy_multiplier, vacc4567); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
102 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
105vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
107 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
Dminmax-sse2-mul16-ld64-x8.c57 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
60 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
63 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
66vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
68 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
99 …__m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_unpackhi_epi16(vxprod01234567lo, vxprod0… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8() local
102 vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vyprod01234567lo, vyprod01234567hi)); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
105 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
108vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
110 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x8()
Dminmax-sse41-mul32-ld32-x16.c48 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() local
53 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
58 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
63vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
67 __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
91 __m128i vacc4567 = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vx4567, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() local
94 vacc4567 = _mm_add_epi32(vacc4567, _mm_mullo_epi32(vy4567, vy_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
97 … vrem4567 = _mm_add_epi32(_mm_and_si128(vacc4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
100vacc4567 = _mm_sub_epi32(_mm_sra_epi32(vacc4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_thr… in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
102 … __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-neon-ld64-x8.c45 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local
48 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
51 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
53 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
70 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8() local
73 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
76 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
78 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x8()
Dminmax-neon-ld64-x16.c47 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() local
52 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
57 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
61 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
79 int32x4_t vacc4567 = vmlaq_s32(vy_bias, vmovl_s16(vget_high_s16(vex01234567)), vx_multiplier); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16() local
82 vacc4567 = vsraq_n_s32(vacc4567, vbicq_s32(vacc4567, vzero_shift_mask), 31); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
85 vacc4567 = vrshlq_s32(vacc4567, vright_shift); in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
87 …vacc01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc0123), vqmovn_s32(vacc4567)), voutput_zero_p… in xnn_qs8_vaddc_minmax_ukernel__neon_ld64_x16()
Dminmax-wasmsimd-x8.c40 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local
43 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
46vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
48 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
64 …v128_t vacc4567 = wasm_i32x4_add(vzero_point_product, wasm_i32x4_mul(wasm_i32x4_widen_high_i16x8(v… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8() local
67 …nst v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vacc4567, vremainder_mask), wasm_i32x4_shr(vacc… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
70vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vacc4567, vshift), wasm_i32x4_gt(vrem4567, vremainder_thr… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
72 …v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x8()
/external/XNNPACK/src/f32-hswish/gen/
Dhswish-sse-x8.c38 __m128 vacc4567 = _mm_mul_ps(vx4567, vsixth); in xnn_f32_hswish_ukernel__sse_x8() local
41 vacc4567 = _mm_add_ps(vacc4567, vhalf); in xnn_f32_hswish_ukernel__sse_x8()
44 vacc4567 = _mm_max_ps(vacc4567, vzero); in xnn_f32_hswish_ukernel__sse_x8()
47 vacc4567 = _mm_min_ps(vacc4567, vone); in xnn_f32_hswish_ukernel__sse_x8()
50 vacc4567 = _mm_mul_ps(vacc4567, vx4567); in xnn_f32_hswish_ukernel__sse_x8()
53 _mm_storeu_ps(y + 4, vacc4567); in xnn_f32_hswish_ukernel__sse_x8()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-neon-c8-acc2.c63 const int32x4_t vacc4567 = vaddw_s16(vbias, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
66 vst1q_s32(b, vacc4567); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
100 int32x4_t vacc4567 = vld1q_s32(b + 4); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
103 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
106 vst1q_s32(b, vacc4567); b += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
165 int32x4_t vacc4567 = vld1q_s32(buffer); buffer += 4; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local
168 vacc4567 = vaddw_s16(vacc4567, vget_high_s16(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
171 const int32x4_t vsgnacc4567 = vreinterpretq_s32_u32(vcltq_s32(vacc4567, vmovq_n_s32(0))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
176 const int64x2_t vprod45 = vmull_s32(vget_low_s32(vacc4567), vget_low_s32(vmultiplier)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
177 const int64x2_t vprod67 = vmull_high_s32(vacc4567, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
[all …]

123456789