/external/XNNPACK/src/qc8-gemm/gen/ |
D | 8x8c4-minmax-fp32-neondot.c | 124 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() local 144 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 146 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 148 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 150 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 152 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 154 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 156 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot() 158 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-fp32-neondot.c | 106 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() local 122 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 124 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 126 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 128 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 130 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot() 132 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-fp32-neondot.c | 88 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() local 100 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 102 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 104 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot() 106 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_gemm_minmax_fp32_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 8x8c4-minmax-fp32-neondot.c | 146 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() local 166 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 168 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 170 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 172 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 174 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 176 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 178 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot() 180 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-fp32-neondot.c | 124 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() local 140 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 142 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 144 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 146 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 148 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot() 150 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-fp32-neondot.c | 102 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() local 114 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 116 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 118 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot() 120 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qc8_igemm_minmax_fp32_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-rndnu-neondot.c | 145 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() local 165 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 167 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 169 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 171 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 173 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 175 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 177 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 179 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 123 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() local 139 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 141 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 143 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 145 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 147 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 149 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 101 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() local 113 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 115 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 117 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 119 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-rndnu-neondot.c | 123 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() local 143 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 145 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 147 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 149 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 151 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 153 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 155 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 157 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 105 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() local 121 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 123 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 125 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 127 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 129 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 131 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 87 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() local 99 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 101 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 103 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 105 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
D | 8x16c4-minmax-rndnu-neondot.c | 141 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() local 179 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 183 vacc1x0123 = vdotq_lane_s32(vacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 187 vacc2x0123 = vdotq_lane_s32(vacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 191 vacc3x0123 = vdotq_lane_s32(vacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 195 vacc4x0123 = vdotq_lane_s32(vacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 199 vacc5x0123 = vdotq_lane_s32(vacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 203 vacc6x0123 = vdotq_lane_s32(vacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot() 207 vacc7x0123 = vdotq_lane_s32(vacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 8x8c4-minmax-rndnu-neondot.c | 157 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() local 164 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 169 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 174 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 179 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 184 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 189 vpacc5x0123 = vdotq_lane_u32(vpacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 194 vpacc6x0123 = vdotq_lane_u32(vpacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot() 199 vpacc7x0123 = vdotq_lane_u32(vpacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 133 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() local 140 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 145 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 150 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 155 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 160 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot() 165 vpacc5x0123 = vdotq_lane_u32(vpacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 5x8c4-minmax-rndnu-neondot.c | 121 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() local 128 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 133 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 138 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 143 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot() 148 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 109 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() local 116 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 121 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 126 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot() 131 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 97 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() local 104 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 109 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot() 114 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 85 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() local 92 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot() 97 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 8x8c4-minmax-rndnu-neondot.c | 133 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() local 140 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 145 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 150 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 155 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 160 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 165 vpacc5x0123 = vdotq_lane_u32(vpacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 170 vpacc6x0123 = vdotq_lane_u32(vpacc6x0123, vb4567x0123, va6x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot() 175 vpacc7x0123 = vdotq_lane_u32(vpacc7x0123, vb4567x0123, va7x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot()
|
D | 6x8c4-minmax-rndnu-neondot.c | 113 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() local 120 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 125 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 130 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 135 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 140 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot() 145 vpacc5x0123 = vdotq_lane_u32(vpacc5x0123, vb4567x0123, va5x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot()
|
D | 5x8c4-minmax-rndnu-neondot.c | 103 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() local 110 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 115 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 120 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 125 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot() 130 vpacc4x0123 = vdotq_lane_u32(vpacc4x0123, vb4567x0123, va4x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot()
|
D | 4x8c4-minmax-rndnu-neondot.c | 93 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() local 100 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 105 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 110 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot() 115 vpacc3x0123 = vdotq_lane_u32(vpacc3x0123, vb4567x0123, va3x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot()
|
D | 3x8c4-minmax-rndnu-neondot.c | 83 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() local 90 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 95 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot() 100 vpacc2x0123 = vdotq_lane_u32(vpacc2x0123, vb4567x0123, va2x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot()
|
D | 2x8c4-minmax-rndnu-neondot.c | 73 const uint8x16_t vb4567x0123 = vld1q_u8(w); w = (const void*) ((const uint8_t*) w + 16); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() local 80 vpacc0x0123 = vdotq_lane_u32(vpacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot() 85 vpacc1x0123 = vdotq_lane_u32(vpacc1x0123, vb4567x0123, va1x01234567, 1); in xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot()
|