Lines Matching refs:b
24 movi v19.16b, #0xe1
26 ext v3.16b, v17.16b, v17.16b, #8
29 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
32 and v18.16b, v18.16b, v16.16b
34 ext v18.16b, v18.16b, v18.16b, #8
35 and v16.16b, v16.16b, v17.16b
36 orr v3.16b, v3.16b, v18.16b // H<<<=1
37 eor v5.16b, v3.16b, v16.16b // twisted H
47 ld1 {v3.16b}, [x0] // load Xi
53 rev64 v3.16b, v3.16b // byteswap Xi
54 ext v3.16b, v3.16b, v3.16b, #8
55 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
58 b .Lgmult_neon
66 ld1 {v0.16b}, [x0] // load Xi
72 rev64 v0.16b, v0.16b // byteswap Xi
73 ext v0.16b, v0.16b, v0.16b, #8
74 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
77 ld1 {v3.16b}, [x2], #16 // load inp
78 rev64 v3.16b, v3.16b // byteswap inp
79 ext v3.16b, v3.16b, v3.16b, #8
80 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
86 ext v16.8b, v5.8b, v5.8b, #1 // A1
87 pmull v16.8h, v16.8b, v3.8b // F = A1*B
88 ext v0.8b, v3.8b, v3.8b, #1 // B1
89 pmull v0.8h, v5.8b, v0.8b // E = A*B1
90 ext v17.8b, v5.8b, v5.8b, #2 // A2
91 pmull v17.8h, v17.8b, v3.8b // H = A2*B
92 ext v19.8b, v3.8b, v3.8b, #2 // B2
93 pmull v19.8h, v5.8b, v19.8b // G = A*B2
94 ext v18.8b, v5.8b, v5.8b, #3 // A3
95 eor v16.16b, v16.16b, v0.16b // L = E + F
96 pmull v18.8h, v18.8b, v3.8b // J = A3*B
97 ext v0.8b, v3.8b, v3.8b, #3 // B3
98 eor v17.16b, v17.16b, v19.16b // M = G + H
99 pmull v0.8h, v5.8b, v0.8b // I = A*B3
124 ext v19.8b, v3.8b, v3.8b, #4 // B4
125 eor v18.16b, v18.16b, v0.16b // N = I + J
126 pmull v19.8h, v5.8b, v19.8b // K = A*B4
134 eor v20.16b, v20.16b, v21.16b
135 eor v22.16b, v22.16b, v23.16b
136 and v21.16b, v21.16b, v24.16b
137 and v23.16b, v23.16b, v25.16b
138 eor v20.16b, v20.16b, v21.16b
139 eor v22.16b, v22.16b, v23.16b
145 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
146 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
147 pmull v0.8h, v5.8b, v3.8b // D = A*B
148 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
149 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
150 eor v16.16b, v16.16b, v17.16b
151 eor v18.16b, v18.16b, v19.16b
152 eor v0.16b, v0.16b, v16.16b
153 eor v0.16b, v0.16b, v18.16b
154 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
155 ext v16.8b, v7.8b, v7.8b, #1 // A1
156 pmull v16.8h, v16.8b, v3.8b // F = A1*B
157 ext v1.8b, v3.8b, v3.8b, #1 // B1
158 pmull v1.8h, v7.8b, v1.8b // E = A*B1
159 ext v17.8b, v7.8b, v7.8b, #2 // A2
160 pmull v17.8h, v17.8b, v3.8b // H = A2*B
161 ext v19.8b, v3.8b, v3.8b, #2 // B2
162 pmull v19.8h, v7.8b, v19.8b // G = A*B2
163 ext v18.8b, v7.8b, v7.8b, #3 // A3
164 eor v16.16b, v16.16b, v1.16b // L = E + F
165 pmull v18.8h, v18.8b, v3.8b // J = A3*B
166 ext v1.8b, v3.8b, v3.8b, #3 // B3
167 eor v17.16b, v17.16b, v19.16b // M = G + H
168 pmull v1.8h, v7.8b, v1.8b // I = A*B3
193 ext v19.8b, v3.8b, v3.8b, #4 // B4
194 eor v18.16b, v18.16b, v1.16b // N = I + J
195 pmull v19.8h, v7.8b, v19.8b // K = A*B4
203 eor v20.16b, v20.16b, v21.16b
204 eor v22.16b, v22.16b, v23.16b
205 and v21.16b, v21.16b, v24.16b
206 and v23.16b, v23.16b, v25.16b
207 eor v20.16b, v20.16b, v21.16b
208 eor v22.16b, v22.16b, v23.16b
214 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
215 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
216 pmull v1.8h, v7.8b, v3.8b // D = A*B
217 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
218 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
219 eor v16.16b, v16.16b, v17.16b
220 eor v18.16b, v18.16b, v19.16b
221 eor v1.16b, v1.16b, v16.16b
222 eor v1.16b, v1.16b, v18.16b
223 ext v16.8b, v6.8b, v6.8b, #1 // A1
224 pmull v16.8h, v16.8b, v4.8b // F = A1*B
225 ext v2.8b, v4.8b, v4.8b, #1 // B1
226 pmull v2.8h, v6.8b, v2.8b // E = A*B1
227 ext v17.8b, v6.8b, v6.8b, #2 // A2
228 pmull v17.8h, v17.8b, v4.8b // H = A2*B
229 ext v19.8b, v4.8b, v4.8b, #2 // B2
230 pmull v19.8h, v6.8b, v19.8b // G = A*B2
231 ext v18.8b, v6.8b, v6.8b, #3 // A3
232 eor v16.16b, v16.16b, v2.16b // L = E + F
233 pmull v18.8h, v18.8b, v4.8b // J = A3*B
234 ext v2.8b, v4.8b, v4.8b, #3 // B3
235 eor v17.16b, v17.16b, v19.16b // M = G + H
236 pmull v2.8h, v6.8b, v2.8b // I = A*B3
261 ext v19.8b, v4.8b, v4.8b, #4 // B4
262 eor v18.16b, v18.16b, v2.16b // N = I + J
263 pmull v19.8h, v6.8b, v19.8b // K = A*B4
271 eor v20.16b, v20.16b, v21.16b
272 eor v22.16b, v22.16b, v23.16b
273 and v21.16b, v21.16b, v24.16b
274 and v23.16b, v23.16b, v25.16b
275 eor v20.16b, v20.16b, v21.16b
276 eor v22.16b, v22.16b, v23.16b
282 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
283 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
284 pmull v2.8h, v6.8b, v4.8b // D = A*B
285 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
286 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
287 eor v16.16b, v16.16b, v17.16b
288 eor v18.16b, v18.16b, v19.16b
289 eor v2.16b, v2.16b, v16.16b
290 eor v2.16b, v2.16b, v18.16b
291 ext v16.16b, v0.16b, v2.16b, #8
292 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
293 eor v1.16b, v1.16b, v2.16b
294 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
302 eor v18.16b, v18.16b, v17.16b //
304 eor v18.16b, v18.16b, v17.16b //
306 eor v18.16b, v18.16b, v1.16b
311 eor v2.16b, v2.16b,v0.16b
312 eor v0.16b, v0.16b,v18.16b //
315 eor v0.16b, v0.16b, v2.16b //
316 eor v0.16b, v0.16b, v18.16b //
321 rev64 v0.16b, v0.16b // byteswap Xi and write
322 ext v0.16b, v0.16b, v0.16b, #8
323 st1 {v0.16b}, [x0]