/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-neondot.c | 121 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 127 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 129 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 131 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 133 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 135 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 137 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 139 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 141 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 175 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 103 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 109 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 111 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 113 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 115 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 117 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 119 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 151 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 153 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 85 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local 91 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 93 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 95 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 97 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 119 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local 123 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 125 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 127 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 129 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 1x8c4-minmax-neondot.c | 58 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local 64 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 77 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local 81 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 137 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 147 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 151 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 155 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 159 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 163 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 167 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 171 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 175 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 227 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 6x16c4-minmax-neondot.c | 115 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 125 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 129 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 133 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 137 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 141 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 145 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 187 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 193 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 197 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 93 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 103 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 107 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 111 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 115 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 153 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 157 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 161 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 165 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 60 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 70 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 87 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 93 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-neondot.c | 144 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 150 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 152 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 154 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 156 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 158 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 160 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 162 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 164 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 198 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 122 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 128 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 130 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 132 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 134 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 136 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 138 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 166 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 172 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() [all …]
|
D | 4x8c4-minmax-neondot.c | 100 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local 106 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 108 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 110 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 112 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local 138 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 140 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 142 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 144 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 160 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 174 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 178 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 182 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 186 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 190 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 194 vacc6x4567 = vdotq_lane_s32(vacc6x4567, vb0123x4567, va6x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 198 vacc7x4567 = vdotq_lane_s32(vacc7x4567, vb0123x4567, va7x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 250 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 1x8c4-minmax-neondot.c | 67 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local 73 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 86 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local 90 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 144 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 148 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 152 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 156 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 160 vacc4x4567 = vdotq_lane_s32(vacc4x4567, vb0123x4567, va4x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 164 vacc5x4567 = vdotq_lane_s32(vacc5x4567, vb0123x4567, va5x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 206 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 212 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 216 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() [all …]
|
D | 4x16c4-minmax-neondot.c | 108 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 118 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 122 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 126 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 130 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 162 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 168 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 172 vacc1x4567 = vdotq_lane_s32(vacc1x4567, vb0123x4567, va1x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 176 vacc2x4567 = vdotq_lane_s32(vacc2x4567, vb0123x4567, va2x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 180 vacc3x4567 = vdotq_lane_s32(vacc3x4567, vb0123x4567, va3x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
|
D | 1x16c4-minmax-neondot.c | 69 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 79 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 96 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 102 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
|