/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x16c4-minmax-neondot.c | 162 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 172 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 176 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 180 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 184 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 188 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 192 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 196 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 200 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 252 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-neondot.c | 136 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 146 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 150 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 154 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 158 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 162 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 166 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 208 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 214 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 218 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 110 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 120 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 124 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 128 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 132 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 164 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 170 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 174 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 178 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 182 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 71 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 81 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 98 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 104 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x16c4-minmax-neondot.c | 139 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 149 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 153 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 157 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 161 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 165 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 169 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 173 vacc6xCDEF = vdotq_lane_s32(vacc6xCDEF, vb0123xCDEF, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 177 vacc7xCDEF = vdotq_lane_s32(vacc7xCDEF, vb0123xCDEF, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 229 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-neondot.c | 117 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 127 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 131 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 135 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 139 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 143 vacc4xCDEF = vdotq_lane_s32(vacc4xCDEF, vb0123xCDEF, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 147 vacc5xCDEF = vdotq_lane_s32(vacc5xCDEF, vb0123xCDEF, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 189 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 195 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 199 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 95 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 105 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 109 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 113 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 117 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 149 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 155 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 159 vacc1xCDEF = vdotq_lane_s32(vacc1xCDEF, vb0123xCDEF, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 163 vacc2xCDEF = vdotq_lane_s32(vacc2xCDEF, vb0123xCDEF, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 167 vacc3xCDEF = vdotq_lane_s32(vacc3xCDEF, vb0123xCDEF, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 62 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 72 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 89 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 95 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
|