/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c2-minmax-rndnu-neon-mull-ld2r.c | 73 const int8x8_t va1c0 = vreinterpret_s8_s16(va10.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local 132 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 71 const int8x8_t va1c0 = vreinterpret_s8_s16(va1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 130 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-dup.c | 71 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local 130 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld1r.c | 77 const int8x8_t va1c0 = vreinterpret_s8_s16(va10); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r() local 136 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-ld2r.c | 76 const int8x8_t va1c0 = vreinterpret_s8_s32(va1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r() local 135 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-ld1r.c | 78 const int8x8_t va1c0 = vreinterpret_s8_s32(va10); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r() local 137 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-dup.c | 76 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup() local 135 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup() local
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 80 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 160 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld2r.c | 83 const int8x8_t va1c0 = vreinterpret_s8_s16(va10.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 163 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld4r.c | 80 const int8x8_t va1c0 = vreinterpret_s8_s16(va1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 160 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
|
D | 3x8c4-minmax-rndnu-neon-mull-ld2r.c | 87 const int8x8_t va1c0 = vreinterpret_s8_s32(va1.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local 165 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld1r.c | 89 const int8x8_t va1c0 = vreinterpret_s8_s16(va10); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 169 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
|
D | 3x8c4-minmax-rndnu-neon-mull-dup.c | 87 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local 165 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup() local
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 2x8c2-minmax-rndnu-neon-mull-dup.c | 84 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local 143 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 84 const int8x8_t va1c0 = vreinterpret_s8_s16(va1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 143 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-dup.c | 89 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup() local 148 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld1r.c | 90 const int8x8_t va1c0 = vreinterpret_s8_s16(va10); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r() local 149 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld2r.c | 86 const int8x8_t va1c0 = vreinterpret_s8_s16(va10.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local 145 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-ld2r.c | 89 const int8x8_t va1c0 = vreinterpret_s8_s32(va1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r() local 148 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r() local
|
D | 2x8c4-minmax-rndnu-neon-mull-ld1r.c | 91 const int8x8_t va1c0 = vreinterpret_s8_s32(va10); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r() local 150 const int8x8_t va1c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r() local
|
D | 3x8c2-minmax-rndnu-neon-mull-dup.c | 95 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local 175 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld4r.c | 95 const int8x8_t va1c0 = vreinterpret_s8_s16(va1.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local 175 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld2r.c | 98 const int8x8_t va1c0 = vreinterpret_s8_s16(va10.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local 178 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r() local
|
D | 3x8c2-minmax-rndnu-neon-mull-ld1r.c | 104 const int8x8_t va1c0 = vreinterpret_s8_s16(va10); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local 184 const int8x8_t va1c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va1), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r() local
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-minmax-neonfma-lane-ld64.c | 82 const float32x2_t va1c0 = vdup_lane_f32(va1, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() local
|