/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-neondot.c | 122 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 142 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 144 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 146 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 148 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 150 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 152 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 154 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 156 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-neondot.c | 104 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 120 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 122 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 124 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 126 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 128 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 130 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-neondot.c | 86 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local 98 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 100 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 102 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 104 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 1x8c4-minmax-neondot.c | 59 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local 65 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 140 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 178 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 182 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 186 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 190 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 194 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 198 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 202 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 206 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 118 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 148 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 152 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 156 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 160 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 164 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 168 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 96 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 118 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 122 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 126 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 130 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 63 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 73 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-neondot.c | 145 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 165 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 167 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 169 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 171 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 173 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 175 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 177 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 179 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-neondot.c | 123 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 139 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 141 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 143 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 145 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 147 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 149 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-neondot.c | 101 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local 113 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 115 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 117 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 119 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 163 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 201 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 205 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 209 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 213 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 217 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 221 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 225 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 229 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
D | 1x8c4-minmax-neondot.c | 68 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local 74 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 137 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 167 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 171 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 175 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 179 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 183 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 187 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 111 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 133 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 137 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 141 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 145 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 72 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 82 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
|