/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x16c4-minmax-neondot.c | 65 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local 78 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 79 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 80 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 81 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 82 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 83 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 84 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 85 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() 92 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local [all …]
|
D | 1x8c4-minmax-neondot.c | 63 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local 72 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 73 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 74 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 75 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 82 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() local 89 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot() 90 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 101 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local 117 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 118 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 119 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 120 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 133 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 134 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 135 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 136 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() 155 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local [all …]
|
D | 4x8c4-minmax-neondot.c | 93 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local 105 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 106 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 113 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 114 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 127 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() local 137 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot() 138 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 125 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local 143 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 144 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 145 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 146 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 167 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 168 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 169 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 170 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() 197 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 113 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 127 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 128 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 139 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 140 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 157 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() local 169 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot() 170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 149 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 169 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 170 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 171 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 172 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 201 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 202 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 203 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 204 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 239 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 133 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 149 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 150 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 165 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 166 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 187 const int8x8_t va0x01234567 = vld1_s8(a0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 201 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 202 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x16c4-minmax-neondot.c | 56 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local 69 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 70 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 71 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 72 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 73 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 74 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 75 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 76 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() 83 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local [all …]
|
D | 1x8c4-minmax-neondot.c | 54 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local 63 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 64 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 65 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 66 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 73 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() local 80 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot() 81 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
|
D | 4x16c4-minmax-neondot.c | 86 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local 102 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 103 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 104 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 105 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 118 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 119 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 120 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 121 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() 140 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local [all …]
|
D | 4x8c4-minmax-neondot.c | 78 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local 90 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 91 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 98 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 99 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 112 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() local 122 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot() 123 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
|
D | 6x16c4-minmax-neondot.c | 106 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local 124 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 125 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 126 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 127 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 148 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 149 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 150 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 151 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() 178 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot() local [all …]
|
D | 6x8c4-minmax-neondot.c | 94 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 108 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 109 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 120 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 121 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 138 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() local 150 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot() 151 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 126 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 146 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 147 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 148 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb0123x89AB, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 149 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb0123xCDEF, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 178 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 179 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 180 vacc0x89AB = vdotq_lane_s32(vacc0x89AB, vb4567x89AB, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 181 vacc0xCDEF = vdotq_lane_s32(vacc0xCDEF, vb4567xCDEF, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 216 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local [all …]
|
D | 8x8c4-minmax-neondot.c | 110 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 8; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 126 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 127 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 142 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb4567x0123, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 143 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb4567x4567, va0x01234567, 1); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 164 const int8x8_t va0x01234567 = vld1_s8(a0); a0 += 4; in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 178 vacc0x0123 = vdotq_lane_s32(vacc0x0123, vb0123x0123, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 179 vacc0x4567 = vdotq_lane_s32(vacc0x4567, vb0123x4567, va0x01234567, 0); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|