Lines Matching refs:b
24 movi v19.16b, #0xe1
26 ext v3.16b, v17.16b, v17.16b, #8
29 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
32 and v18.16b, v18.16b, v16.16b
34 ext v18.16b, v18.16b, v18.16b, #8
35 and v16.16b, v16.16b, v17.16b
36 orr v3.16b, v3.16b, v18.16b // H<<<=1
37 eor v5.16b, v3.16b, v16.16b // twisted H
48 ld1 {v3.16b}, [x0] // load Xi
54 rev64 v3.16b, v3.16b // byteswap Xi
55 ext v3.16b, v3.16b, v3.16b, #8
56 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
59 b Lgmult_neon
68 ld1 {v0.16b}, [x0] // load Xi
74 rev64 v0.16b, v0.16b // byteswap Xi
75 ext v0.16b, v0.16b, v0.16b, #8
76 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
79 ld1 {v3.16b}, [x2], #16 // load inp
80 rev64 v3.16b, v3.16b // byteswap inp
81 ext v3.16b, v3.16b, v3.16b, #8
82 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
88 ext v16.8b, v5.8b, v5.8b, #1 // A1
89 pmull v16.8h, v16.8b, v3.8b // F = A1*B
90 ext v0.8b, v3.8b, v3.8b, #1 // B1
91 pmull v0.8h, v5.8b, v0.8b // E = A*B1
92 ext v17.8b, v5.8b, v5.8b, #2 // A2
93 pmull v17.8h, v17.8b, v3.8b // H = A2*B
94 ext v19.8b, v3.8b, v3.8b, #2 // B2
95 pmull v19.8h, v5.8b, v19.8b // G = A*B2
96 ext v18.8b, v5.8b, v5.8b, #3 // A3
97 eor v16.16b, v16.16b, v0.16b // L = E + F
98 pmull v18.8h, v18.8b, v3.8b // J = A3*B
99 ext v0.8b, v3.8b, v3.8b, #3 // B3
100 eor v17.16b, v17.16b, v19.16b // M = G + H
101 pmull v0.8h, v5.8b, v0.8b // I = A*B3
126 ext v19.8b, v3.8b, v3.8b, #4 // B4
127 eor v18.16b, v18.16b, v0.16b // N = I + J
128 pmull v19.8h, v5.8b, v19.8b // K = A*B4
136 eor v20.16b, v20.16b, v21.16b
137 eor v22.16b, v22.16b, v23.16b
138 and v21.16b, v21.16b, v24.16b
139 and v23.16b, v23.16b, v25.16b
140 eor v20.16b, v20.16b, v21.16b
141 eor v22.16b, v22.16b, v23.16b
147 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
148 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
149 pmull v0.8h, v5.8b, v3.8b // D = A*B
150 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
151 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
152 eor v16.16b, v16.16b, v17.16b
153 eor v18.16b, v18.16b, v19.16b
154 eor v0.16b, v0.16b, v16.16b
155 eor v0.16b, v0.16b, v18.16b
156 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
157 ext v16.8b, v7.8b, v7.8b, #1 // A1
158 pmull v16.8h, v16.8b, v3.8b // F = A1*B
159 ext v1.8b, v3.8b, v3.8b, #1 // B1
160 pmull v1.8h, v7.8b, v1.8b // E = A*B1
161 ext v17.8b, v7.8b, v7.8b, #2 // A2
162 pmull v17.8h, v17.8b, v3.8b // H = A2*B
163 ext v19.8b, v3.8b, v3.8b, #2 // B2
164 pmull v19.8h, v7.8b, v19.8b // G = A*B2
165 ext v18.8b, v7.8b, v7.8b, #3 // A3
166 eor v16.16b, v16.16b, v1.16b // L = E + F
167 pmull v18.8h, v18.8b, v3.8b // J = A3*B
168 ext v1.8b, v3.8b, v3.8b, #3 // B3
169 eor v17.16b, v17.16b, v19.16b // M = G + H
170 pmull v1.8h, v7.8b, v1.8b // I = A*B3
195 ext v19.8b, v3.8b, v3.8b, #4 // B4
196 eor v18.16b, v18.16b, v1.16b // N = I + J
197 pmull v19.8h, v7.8b, v19.8b // K = A*B4
205 eor v20.16b, v20.16b, v21.16b
206 eor v22.16b, v22.16b, v23.16b
207 and v21.16b, v21.16b, v24.16b
208 and v23.16b, v23.16b, v25.16b
209 eor v20.16b, v20.16b, v21.16b
210 eor v22.16b, v22.16b, v23.16b
216 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
217 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
218 pmull v1.8h, v7.8b, v3.8b // D = A*B
219 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
220 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
221 eor v16.16b, v16.16b, v17.16b
222 eor v18.16b, v18.16b, v19.16b
223 eor v1.16b, v1.16b, v16.16b
224 eor v1.16b, v1.16b, v18.16b
225 ext v16.8b, v6.8b, v6.8b, #1 // A1
226 pmull v16.8h, v16.8b, v4.8b // F = A1*B
227 ext v2.8b, v4.8b, v4.8b, #1 // B1
228 pmull v2.8h, v6.8b, v2.8b // E = A*B1
229 ext v17.8b, v6.8b, v6.8b, #2 // A2
230 pmull v17.8h, v17.8b, v4.8b // H = A2*B
231 ext v19.8b, v4.8b, v4.8b, #2 // B2
232 pmull v19.8h, v6.8b, v19.8b // G = A*B2
233 ext v18.8b, v6.8b, v6.8b, #3 // A3
234 eor v16.16b, v16.16b, v2.16b // L = E + F
235 pmull v18.8h, v18.8b, v4.8b // J = A3*B
236 ext v2.8b, v4.8b, v4.8b, #3 // B3
237 eor v17.16b, v17.16b, v19.16b // M = G + H
238 pmull v2.8h, v6.8b, v2.8b // I = A*B3
263 ext v19.8b, v4.8b, v4.8b, #4 // B4
264 eor v18.16b, v18.16b, v2.16b // N = I + J
265 pmull v19.8h, v6.8b, v19.8b // K = A*B4
273 eor v20.16b, v20.16b, v21.16b
274 eor v22.16b, v22.16b, v23.16b
275 and v21.16b, v21.16b, v24.16b
276 and v23.16b, v23.16b, v25.16b
277 eor v20.16b, v20.16b, v21.16b
278 eor v22.16b, v22.16b, v23.16b
284 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
285 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
286 pmull v2.8h, v6.8b, v4.8b // D = A*B
287 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
288 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
289 eor v16.16b, v16.16b, v17.16b
290 eor v18.16b, v18.16b, v19.16b
291 eor v2.16b, v2.16b, v16.16b
292 eor v2.16b, v2.16b, v18.16b
293 ext v16.16b, v0.16b, v2.16b, #8
294 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
295 eor v1.16b, v1.16b, v2.16b
296 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
304 eor v18.16b, v18.16b, v17.16b //
306 eor v18.16b, v18.16b, v17.16b //
308 eor v18.16b, v18.16b, v1.16b
313 eor v2.16b, v2.16b,v0.16b
314 eor v0.16b, v0.16b,v18.16b //
317 eor v0.16b, v0.16b, v2.16b //
318 eor v0.16b, v0.16b, v18.16b //
323 rev64 v0.16b, v0.16b // byteswap Xi and write
324 ext v0.16b, v0.16b, v0.16b, #8
325 st1 {v0.16b}, [x0]