/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 63 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 97 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 108 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 64 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 98 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 109 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 63 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 97 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 108 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 74 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 128 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 147 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld2r.c | 73 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 127 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 146 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 73 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 127 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 146 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 65 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 123 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 142 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c2-minmax-rndnu-neon-mlal-ld2r.c | 52 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() local 85 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r() 96 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 52 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 85 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 96 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 53 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 86 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 97 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 61 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 114 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 133 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-rndnu-neon-mlal-ld2r.c | 60 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() local 113 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r() 132 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 60 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 113 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 132 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 1x8c4-minmax-fp32-neonv8-mlal-ld1r.c | 56 const int32x2_t va01x1 = vld1_dup_s32((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r() local 87 const int8x8_t va0c1x1 = vreinterpret_s8_s32(va01x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld1r()
|
D | 1x16c2-minmax-rndnu-neon-mlal-ld2r.c | 54 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() local 111 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r() 130 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 55 const int16x4_t va01x1 = vld1_dup_s16((const void*)(a0 + 2)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 79 const int8x8_t va0c1x1 = vreinterpret_s8_s16(va01x1); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 64 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 98 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 109 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 63 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 97 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 108 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 73 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 127 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 146 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 74 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 128 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 147 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld1r.c | 66 const int16x4_t va01x1 = vld1_dup_s16((const void*)(a0 + 2)); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r() local 91 const int8x8_t va0c1x1 = vreinterpret_s8_s16(va01x1); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld1r()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8c2-minmax-fp32-neon-mlal-ld2r.c | 52 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() local 85 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r() 96 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neon_mlal_ld2r()
|
D | 1x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 53 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() local 86 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r() 97 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8c2__neonv8_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neon-mlal-ld2r.c | 60 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() local 113 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r() 132 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neon_mlal_ld2r()
|
D | 2x8c2-minmax-fp32-neonv8-mlal-ld2r.c | 61 const int16x4x2_t va01x1 = vld2_dup_s16((const void*)(a0 + 4)); a0 += 8; in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() local 114 const int8x8_t va0c2x1 = vreinterpret_s8_s16(va01x1.val[0]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r() 133 const int8x8_t va0c3x1 = vreinterpret_s8_s16(va01x1.val[1]); in xnn_qc8_gemm_minmax_fp32_ukernel_2x8c2__neonv8_mlal_ld2r()
|