/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-neondot.c | 161 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 171 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 175 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 179 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 183 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 187 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 191 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 195 vacc6x89AB = vdotq_lane_s32(vacc6x89AB, vb0123x89AB, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 199 vacc7x89AB = vdotq_lane_s32(vacc7x89AB, vb0123x89AB, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 251 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-neondot.c | 135 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 145 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 149 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 153 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 157 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 161 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 165 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 207 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 213 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 217 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 109 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 119 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 123 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 127 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 131 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 163 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 169 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 173 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 177 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 181 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 70 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 80 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 97 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 103 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-neondot.c | 138 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 148 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 152 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 156 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 160 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 164 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 168 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 172 vacc6x89AB = vdotq_lane_s32(vacc6x89AB, vb0123x89AB, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 176 vacc7x89AB = vdotq_lane_s32(vacc7x89AB, vb0123x89AB, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 228 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-neondot.c | 116 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 126 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 130 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 134 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 138 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 142 vacc4x89AB = vdotq_lane_s32(vacc4x89AB, vb0123x89AB, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 146 vacc5x89AB = vdotq_lane_s32(vacc5x89AB, vb0123x89AB, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 188 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 194 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 198 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 94 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 104 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 108 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 112 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 116 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 148 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 154 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 158 vacc1x89AB = vdotq_lane_s32(vacc1x89AB, vb0123x89AB, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 162 vacc2x89AB = vdotq_lane_s32(vacc2x89AB, vb0123x89AB, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 166 vacc3x89AB = vdotq_lane_s32(vacc3x89AB, vb0123x89AB, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 61 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 71 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 88 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 94 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
|