/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 147 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 149 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 152 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 155 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 158 const int16x8_t vprod3x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va3, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 278 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 280 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 283 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 286 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 289 const int16x8_t vprod3x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va3, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 123 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 125 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 128 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 131 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 229 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 231 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 234 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 237 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 99 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 101 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 104 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 180 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 182 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 185 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 75 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 77 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 131 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 133 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
D | 1x8-minmax-rndnu-neon-mlal-lane-prfm.c | 79 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() local 80 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() 136 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() local 137 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm()
|
D | 1x8-minmax-rndnu-neon-mlal-lane.c | 78 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 79 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() 135 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 136 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x8-minmax-rndnu-neon-mull-addw-dup.c | 164 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 166 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 169 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 172 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 175 const int16x8_t vprod3x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va3, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 295 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() local 297 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 300 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 303 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup() 306 const int16x8_t vprod3x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va3, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup()
|
D | 3x8-minmax-rndnu-neon-mull-addw-dup.c | 138 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 140 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 143 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 146 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 244 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() local 246 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 249 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup() 252 const int16x8_t vprod2x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va2, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup()
|
D | 2x8-minmax-rndnu-neon-mull-addw-dup.c | 112 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 114 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 117 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 193 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() local 195 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup() 198 const int16x8_t vprod1x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va1, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup()
|
D | 1x8-minmax-rndnu-neon-mlal-lane-prfm.c | 90 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() local 91 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() 147 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm() local 148 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm()
|
D | 1x8-minmax-rndnu-neon-mlal-lane.c | 89 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 90 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() 146 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 147 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane()
|
D | 1x8-minmax-rndnu-neon-mull-addw-dup.c | 86 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 88 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() 142 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup() local 144 const int16x8_t vprod0x01234567c5 = vmull_s8(vb01234567c5, vdup_lane_s8(va0, 5)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mull_addw_dup()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x8-minmax-fp32-neon-mlal-lane-prfm.c | 79 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() local 80 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() 136 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() local 137 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm()
|
D | 1x8-minmax-fp32-neonv8-mlal-lane.c | 79 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() local 80 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() 136 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() local 137 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane()
|
D | 1x8-minmax-fp32-neon-mlal-lane.c | 78 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 79 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() 135 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 136 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane()
|
D | 1x8-minmax-fp32-neonv8-mlal-lane-prfm.c | 80 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() local 81 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() 137 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() local 138 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_gemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x8-minmax-fp32-neon-mlal-lane.c | 89 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 90 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() 146 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 147 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane()
|
D | 1x8-minmax-fp32-neonv8-mlal-lane.c | 90 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() local 91 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() 147 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane() local 148 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane()
|
D | 1x8-minmax-fp32-neon-mlal-lane-prfm.c | 90 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() local 91 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() 147 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm() local 148 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane_prfm()
|
D | 1x8-minmax-fp32-neonv8-mlal-lane-prfm.c | 91 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() local 92 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() 148 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm() local 149 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_1x8__neonv8_mlal_lane_prfm()
|
D | 2x8-minmax-fp32-neonv8-mlal-lane.c | 112 const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane() local 113 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane() 187 … const int8x8_t vb01234567c5 = vld1_s8(w); w = (const void*) ((const int8_t*) w + 8); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane() local 188 const int16x8_t vxb01234567c5 = vmovl_s8(vb01234567c5); in xnn_qc8_igemm_minmax_fp32_ukernel_2x8__neonv8_mlal_lane()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 1x8-minmax-rndnu-neon-mlal-lane.c | 79 const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 80 const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() 136 … const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 137 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane()
|
D | 1x8-minmax-fp32-neon-mlal-lane.c | 79 const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 80 const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() 136 … const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 137 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_gemm_minmax_fp32_ukernel_1x8__neon_mlal_lane()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 1x8-minmax-rndnu-neon-mlal-lane.c | 90 const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 91 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() 147 … const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane() local 148 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane()
|
D | 1x8-minmax-fp32-neon-mlal-lane.c | 90 const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 91 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() 147 … const uint8x8_t vb01234567c5 = vld1_u8(w); w = (const void*) ((const uint8_t*) w + 8); in xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane() local 148 … const int16x8_t vxb01234567c5 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c5, vb_zero_point)); in xnn_qu8_igemm_minmax_fp32_ukernel_1x8__neon_mlal_lane()
|