/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x8c2-minmax-rndnu-neon-mull-dup.c | 61 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup() local 95 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup() local
|
D | 1x8c2-minmax-rndnu-neon-mull-ld4r.c | 61 const int8x8_t va0c0 = vreinterpret_s8_s16(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r() local 95 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r() local
|
D | 1x8c2-minmax-rndnu-neon-mull-ld1r.c | 64 const int8x8_t va0c0 = vreinterpret_s8_s16(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r() local 98 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r() local
|
D | 1x8c2-minmax-rndnu-neon-mull-ld2r.c | 62 const int8x8_t va0c0 = vreinterpret_s8_s16(va00.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r() local 96 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-ld1r.c | 65 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local 97 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-ld2r.c | 64 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local 96 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-dup.c | 64 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local 96 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
|
D | 1x8c4-minmax-fp32-neonv8-mlal-ld2r.c | 119 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r() local 151 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-dup.c | 70 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local 125 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local
|
D | 1x8c4-minmax-rndnu-neon-mlal-ld2r.c | 118 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r() local 150 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 70 const int8x8_t va0c0 = vreinterpret_s8_s16(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 125 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|
D | 1x8c4-minmax-fp32-neonv8-mlal-dup.c | 119 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup() local 151 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_dup() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld2r.c | 72 const int8x8_t va0c0 = vreinterpret_s8_s16(va00.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local 127 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r() local
|
D | 1x8c4-minmax-rndnu-neon-mlal-dup.c | 118 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup() local 150 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup() local
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x8c2-minmax-rndnu-neon-mull-ld2r.c | 73 const int8x8_t va0c0 = vreinterpret_s8_s16(va00.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r() local 107 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r() local
|
D | 1x8c2-minmax-rndnu-neon-mull-dup.c | 72 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup() local 106 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup() local
|
D | 1x8c2-minmax-rndnu-neon-mull-ld4r.c | 72 const int8x8_t va0c0 = vreinterpret_s8_s16(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r() local 106 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-ld2r.c | 75 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local 107 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-ld1r.c | 76 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local 108 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
|
D | 1x8c4-minmax-rndnu-neon-mull-dup.c | 75 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local 107 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
|
D | 1x8c2-minmax-rndnu-neon-mull-ld1r.c | 75 const int8x8_t va0c0 = vreinterpret_s8_s16(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r() local 109 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-ld4r.c | 83 const int8x8_t va0c0 = vreinterpret_s8_s16(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local 138 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r() local
|
D | 2x8c2-minmax-rndnu-neon-mull-dup.c | 83 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local 138 const int8x8_t va0c0 = vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_s16_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup() local
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8-minmax-neon-dup-ld64.c | 53 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64() local
|
/external/XNNPACK/src/f16-gemm/gen/ |
D | 1x8-minmax-neonfp16arith-ld64.c | 56 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local
|