/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 50 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 60 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 95 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 105 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 160 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 170 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 230 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 240 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 50 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 61 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 96 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 107 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 161 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 172 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 229 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 240 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 62 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 68 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 116 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 122 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 189 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 195 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 273 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 279 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 62 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 68 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 116 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 122 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 189 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 195 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 270 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 276 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 56 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 76 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 118 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 128 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 172 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 192 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 234 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 244 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 308 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 328 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() [all …]
|
D | 7p7x-minmax-sse41-c24-acc2.c | 56 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 77 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 119 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 130 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 174 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 195 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 237 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 248 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 311 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 332 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 53 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 68 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 115 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 130 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 197 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 212 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 300 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 310 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 53 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 69 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 116 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 132 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 198 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 214 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 296 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 307 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c8-acc2.c | 68 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 78 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 137 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 147 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 67 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 78 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 134 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 145 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 82 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 98 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 155 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 161 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 232 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 248 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 305 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 311 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 401 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 417 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 82 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 98 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 155 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 161 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 232 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 248 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 305 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 311 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 401 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 417 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-ssse3-c8-acc2.c | 79 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 85 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 159 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 165 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 72 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 83 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 151 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 162 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 249 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 260 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 365 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 371 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 79 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 85 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 162 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 168 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 70 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 86 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 167 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 178 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 71 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 86 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 173 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 183 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 72 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 83 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 151 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 162 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 249 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 260 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 370 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 376 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 73 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 94 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 202 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 213 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 89 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 100 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 204 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 210 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 74 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 94 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 211 const v128_t vxi3x01234567 = wasm_i16x8_load_8x8(i3); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 221 v128_t vacc1x01234567 = wasm_i16x8_add(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 89 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 100 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 209 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 215 __m128i vacc1x01234567 = _mm_add_epi16(vxi2x01234567, vxi3x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse41-mul16.c | 127 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(vi3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 292 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(vi3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() local 297 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16() 298 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
|
D | up8x9-minmax-sse2-mul16.c | 130 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 315 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 318 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 130 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 133 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 134 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 315 …const __m128i vxi3x01234567 = _mm_unpacklo_epi8(vi3x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), … in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 318 const __m128i vp3x01234567lo = _mm_mullo_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 319 const __m128i vp3x01234567hi = _mm_mulhi_epi16(vxi3x01234567, vxk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|