/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-xw-minmax-wasmsimd.c | 134 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd() local 138 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, … in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd()
|
D | 3x4c8-minmax-wasmsimd-ld128.c | 138 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 142 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 134 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 138 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, … in xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-sse2-ld64.c | 116 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64() local 120 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 114 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128() local 118 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-xw-minmax-sse2.c | 112 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2() local 116 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2()
|
D | 3x16c8-minmax-neon-mull-padal.c | 101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() local 210 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 252 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal() 345 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 271 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 399 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 441 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal() 534 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 101 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 254 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 300 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal() 393 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() local 260 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 307 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal() 412 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 336 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 499 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 546 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal() 651 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 107 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 318 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 371 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal() 476 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-wasmsimd-ld128.c | 155 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128() local 159 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld128()
|
D | 3x4c8-minmax-wasmsimd-ld64.c | 151 …const v128_t vacc2x13 = wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x1, vacc2x3, 0, 4, 1, 5), wasm_v32x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64() local 155 …= wasm_i32x4_add(wasm_v32x4_shuffle(vacc2x02, vacc2x13, 0, 4, 1, 5), wasm_v32x4_shuffle(vacc2x02, … in xnn_qs8_igemm_minmax_ukernel_3x4c8__wasmsimd_ld64()
|
D | 3x4c8-minmax-sse2-ld128.c | 131 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128() local 135 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld128()
|
D | 3x4c8-minmax-sse2-ld64.c | 133 …const __m128i vacc2x13 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x1, vacc2x3), _mm_unpackhi_epi32(va… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64() local 137 …2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x… in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse2_ld64()
|
D | 3x16c8-minmax-neon-mull-padal.c | 100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() local 225 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 270 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal() 363 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mull_padal()
|
D | 3x16c8-minmax-neon-mlal-padal.c | 100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() local 286 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 414 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 459 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal() 552 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__neon_mlal_padal()
|
D | 3x16c16-minmax-neon-mlal-padal.c | 100 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() local 269 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 318 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal() 411 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
|
D | 4x16c8-minmax-neon-mull-padal.c | 104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() local 277 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 327 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal() 432 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mull_padal()
|
D | 4x16c8-minmax-neon-mlal-padal.c | 104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() local 353 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 516 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 566 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal() 671 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__neon_mlal_padal()
|
D | 4x16c16-minmax-neon-mlal-padal.c | 104 int32x4_t vacc2x13 = vacc0x13; in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() local 335 vacc2x13 = vpadalq_s16(vacc2x13, vprod2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 391 const int32x4_t vsum2xCD = vpaddq_s32(vacc2x12, vacc2x13); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal() 496 const int32x2_t vpsum2xD = vadd_s32(vget_low_s32(vacc2x13), vget_high_s32(vacc2x13)); in xnn_qs8_igemm_minmax_ukernel_4x16c16__neon_mlal_padal()
|