Home
last modified time | relevance | path

Searched refs:vacc0x5 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-minmax-neon-mlal-padal.c49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
91 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
125 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
139 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
152 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c8-minmax-neon-mull-padal.c49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal() local
76 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
90 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
103 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal()
D1x8c16-minmax-neon-mlal-padal.c49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
84 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
98 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
111 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal() local
63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
102 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
121 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
140 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c8-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
124 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
177 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
196 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
215 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c55 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
63 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
116 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
137 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
156 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal() local
69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
128 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
152 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
177 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal()
D3x8c8-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
157 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
229 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
253 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
278 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal()
D1x16c8-minmax-neon-mull-padal.c49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
84 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
122 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
141 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal() local
75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
83 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
91 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
154 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
183 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
214 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal()
D3x8c16-minmax-neon-mlal-padal.c61 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
69 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
77 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
148 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
176 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
201 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D1x16c8-minmax-neon-mlal-padal.c49 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
107 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
173 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
211 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
230 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D4x8c8-minmax-neon-mlal-padal.c67 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal() local
75 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
83 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
91 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
281 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
310 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
341 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-neon-mull-padal.c52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal() local
87 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
104 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
117 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mull_padal()
D1x8c8-minmax-neon-mlal-padal.c52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal() local
102 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
136 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
153 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
166 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c8__neon_mlal_padal()
D1x8c16-minmax-neon-mlal-padal.c52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal() local
95 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
112 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
125 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x8c8-minmax-neon-mull-padal.c56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal() local
64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
115 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
137 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
156 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mull_padal()
D2x8c8-minmax-neon-mlal-padal.c56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal() local
64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
137 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
190 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
212 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
231 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c8__neon_mlal_padal()
D2x8c16-minmax-neon-mlal-padal.c56 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal() local
64 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
129 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
153 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
172 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mull-padal.c60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal() local
68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
143 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
170 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
195 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mull_padal()
D4x8c8-minmax-neon-mull-padal.c64 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal() local
72 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
80 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
88 int32x4_t vacc3x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
171 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
203 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
234 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_4x8c8__neon_mull_padal()
D1x16c8-minmax-neon-mull-padal.c52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
95 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
136 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
155 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c8-minmax-neon-mlal-padal.c52 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
118 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
184 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
225 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
244 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D3x8c16-minmax-neon-mlal-padal.c60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal() local
68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
163 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
194 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
219 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
D3x8c8-minmax-neon-mlal-padal.c60 …int32x4_t vacc0x5 = vld1q_lane_s32(w, vmovq_n_s32(0), 0); w = (const void*) ((uintptr_t) w + sizeo… in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal() local
68 int32x4_t vacc1x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
76 int32x4_t vacc2x5 = vacc0x5; in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
172 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
244 vacc0x5 = vpadalq_s16(vacc0x5, vprod0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
271 const int32x4_t vsum0x45 = vpaddq_s32(vacc0x4, vacc0x5); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()
296 const int32x2_t vpsum0x5 = vadd_s32(vget_low_s32(vacc0x5), vget_high_s32(vacc0x5)); in xnn_qs8_igemm_minmax_ukernel_3x8c8__neon_mlal_padal()

12