Home
last modified time | relevance | path

Searched refs:va0c0 (Results 1 – 25 of 499) sorted by relevance

12345678910>>...20

/external/XNNPACK/src/qs8-igemm/gen/
D1x16c4-minmax-rndnu-neon-mull-ld2r.c87 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local
89 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
91 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
93 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
95 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
97 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
99 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
101 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
103 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
139 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local
[all …]
D1x16c4-minmax-rndnu-neon-mull-ld1r.c88 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local
90 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
92 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
94 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
96 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
98 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
100 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
102 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
104 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
140 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local
[all …]
D1x16c4-minmax-rndnu-neon-mull-dup.c87 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local
89 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
91 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
93 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
95 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
97 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
99 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
101 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
103 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
139 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local
[all …]
D1x8c4-minmax-rndnu-neon-mull-ld2r.c75 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
77 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
79 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
81 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
83 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
107 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
108 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
110 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
112 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
114 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
D1x8c4-minmax-rndnu-neon-mull-ld1r.c76 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
78 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
80 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
82 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
84 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
108 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
109 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
111 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
113 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
115 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
D1x8c4-minmax-rndnu-neon-mull-dup.c75 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
77 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
79 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
81 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
83 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
107 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
108 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
110 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
112 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
114 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
D1x16c4-minmax-rndnu-neon-mlal-dup.c181 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup() local
183 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
185 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
187 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
189 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
191 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
193 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
195 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
197 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
233 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup() local
[all …]
D1x16c4-minmax-rndnu-neon-mlal-ld2r.c181 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r() local
183 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
185 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
187 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
189 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
191 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
193 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
195 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
197 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
233 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r() local
[all …]
D2x16c4-minmax-rndnu-neon-mull-ld2r.c104 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
107 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
111 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
115 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
119 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
123 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
127 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
131 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
135 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
191 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
[all …]
D1x16c4-minmax-rndnu-neon-mlal-ld1r.c184 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r() local
186 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
188 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
190 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
192 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
194 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
196 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
198 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
200 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
236 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r() local
[all …]
D2x16c4-minmax-rndnu-neon-mull-dup.c104 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
107 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
111 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
115 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
119 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
123 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
127 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
131 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
135 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
191 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
[all …]
D2x16c4-minmax-rndnu-neon-mull-ld1r.c106 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
109 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
113 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
117 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
121 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
125 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
129 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
133 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
137 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
193 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x16c4-minmax-rndnu-neon-mull-ld1r.c77 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local
79 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
81 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
83 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
85 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
87 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
89 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
91 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
93 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r()
129 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r() local
[all …]
D1x16c4-minmax-rndnu-neon-mull-ld2r.c76 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local
78 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
80 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
82 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
84 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
86 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
88 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
90 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
92 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r()
128 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r() local
[all …]
D1x16c4-minmax-rndnu-neon-mull-dup.c76 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local
78 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
80 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
82 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
84 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
86 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
88 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
90 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
92 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup()
128 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup() local
[all …]
D1x8c4-minmax-rndnu-neon-mull-ld1r.c65 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
67 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
69 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
71 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
73 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
97 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r() local
98 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
100 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
102 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
104 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r()
D1x8c4-minmax-rndnu-neon-mull-ld2r.c64 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
66 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
68 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
70 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
72 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
96 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r() local
97 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
99 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
101 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
103 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r()
D1x8c4-minmax-rndnu-neon-mull-dup.c64 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
66 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
68 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
70 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
72 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
96 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup() local
97 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
99 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
101 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
103 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup()
D2x16c4-minmax-rndnu-neon-mull-dup.c91 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
94 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
98 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
102 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
106 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
110 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
114 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
118 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
122 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup()
178 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup() local
[all …]
D1x16c4-minmax-rndnu-neon-mlal-ld2r.c170 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r() local
172 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
174 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
176 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
178 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
180 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
182 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
184 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
186 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r()
222 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r() local
[all …]
D2x16c4-minmax-rndnu-neon-mull-ld1r.c93 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
96 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
100 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
104 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
108 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
112 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
116 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
120 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
124 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r()
180 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r() local
[all …]
D2x16c4-minmax-rndnu-neon-mull-ld2r.c91 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
94 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
98 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
102 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
106 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
110 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
114 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
118 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
122 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r()
178 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r() local
[all …]
D1x16c4-minmax-rndnu-neon-mlal-ld1r.c173 const int8x8_t va0c0 = vreinterpret_s8_s32(va00); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r() local
175 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
177 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
179 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
181 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
183 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
185 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
187 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
189 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r()
225 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r() local
[all …]
D1x16c4-minmax-rndnu-neon-mlal-dup.c170 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup() local
172 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
174 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
176 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
178 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
180 const int16x8_t vprod0x89c0 = vmull_s8(vb89c0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
182 const int16x8_t vprod0xABc0 = vmull_s8(vbABc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
184 const int16x8_t vprod0xCDc0 = vmull_s8(vbCDc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
186 const int16x8_t vprod0xEFc0 = vmull_s8(vbEFc0, va0c0); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup()
222 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup() local
[all …]
D1x8c4-minmax-fp32-neonv8-mlal-ld2r.c119 const int8x8_t va0c0 = vreinterpret_s8_s32(va0.val[0]); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r() local
121 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
123 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
125 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
127 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
151 const int8x8_t va0c0 = vreinterpret_s8_s32(vdup_lane_s32(vreinterpret_s32_s8(va0), 0)); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r() local
152 const int16x8_t vprod0x01c0 = vmull_s8(vb01c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
154 const int16x8_t vprod0x23c0 = vmull_s8(vb23c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
156 const int16x8_t vprod0x45c0 = vmull_s8(vb45c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()
158 const int16x8_t vprod0x67c0 = vmull_s8(vb67c0, va0c0); in xnn_qs8_gemm_minmax_fp32_ukernel_1x8c4__neonv8_mlal_ld2r()

12345678910>>...20