/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x4c8-xw-minmax-wasmsimd.c | 108 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd() local 111 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd()
|
D | 2x4c8-minmax-wasmsimd-ld128.c | 112 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 115 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 108 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 111 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 2x4c8-xw-minmax-sse2.c | 93 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2() local 96 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2()
|
D | 2x4c8-minmax-sse2-ld128.c | 95 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128() local 98 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 97 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64() local 100 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x16c8-minmax-neon-mull-padal.c | 79 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local 159 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 189 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() 250 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 79 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 205 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 298 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 328 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal() 389 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x4c8-xw-minmax-wasmsimd.c | 132 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 137 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 136 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 141 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 132 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 137 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 79 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 189 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 221 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal() 282 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|
D | 3x4c8-minmax-sse2-ld64.c | 114 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() local 119 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 112 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() local 117 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-xw-minmax-sse2.c | 110 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() local 115 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x4c8-minmax-wasmsimd-ld128.c | 127 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128() local 130 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld128()
|
D | 2x4c8-minmax-wasmsimd-ld64.c | 123 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64() local 126 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_igemm_minmax_ukernel_2x4c8__wasmsimd_ld64()
|
D | 2x4c8-minmax-sse2-ld128.c | 110 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128() local 113 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld128()
|
D | 2x4c8-minmax-sse2-ld64.c | 112 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64() local 115 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_igemm_minmax_ukernel_2x4c8__sse2_ld64()
|
D | 2x16c8-minmax-neon-mull-padal.c | 80 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local 172 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 205 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() 266 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
|
D | 2x16c8-minmax-neon-mlal-padal.c | 80 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() local 218 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 311 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 344 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal() 405 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mlal_padal()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 153 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 158 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 149 …const v128_t vacc1x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x1, vacc1x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 154 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc1x02, vacc1x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc1x02, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 129 …const __m128i vacc1x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x1, vacc1x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() local 134 …1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 2x16c16-minmax-neon-mlal-padal.c | 80 int32x4_t vacc1x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() local 202 vacc1x13 = vpadalq_s16(vacc1x13, vprod1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 237 const int32x4_t vsum1xCD = vpaddq_s32(vacc1x12, vacc1x13); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal() 298 const int32x2_t vpsum1xD = vadd_s32(vget_low_s32(vacc1x13), vget_high_s32(vacc1x13)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
|