• Home
  • Raw
  • Download

Lines Matching refs:b

23 	movi	v19.16b, #0xe1
25 ext v3.16b, v17.16b, v17.16b, #8
28 ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
31 and v18.16b, v18.16b, v16.16b
33 ext v18.16b, v18.16b, v18.16b, #8
34 and v16.16b, v16.16b, v17.16b
35 orr v3.16b, v3.16b, v18.16b // H<<<=1
36 eor v5.16b, v3.16b, v16.16b // twisted H
46 ld1 {v3.16b}, [x0] // load Xi
52 rev64 v3.16b, v3.16b // byteswap Xi
53 ext v3.16b, v3.16b, v3.16b, #8
54 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
57 b Lgmult_neon
65 ld1 {v0.16b}, [x0] // load Xi
71 rev64 v0.16b, v0.16b // byteswap Xi
72 ext v0.16b, v0.16b, v0.16b, #8
73 eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
76 ld1 {v3.16b}, [x2], #16 // load inp
77 rev64 v3.16b, v3.16b // byteswap inp
78 ext v3.16b, v3.16b, v3.16b, #8
79 eor v3.16b, v3.16b, v0.16b // inp ^= Xi
85 ext v16.8b, v5.8b, v5.8b, #1 // A1
86 pmull v16.8h, v16.8b, v3.8b // F = A1*B
87 ext v0.8b, v3.8b, v3.8b, #1 // B1
88 pmull v0.8h, v5.8b, v0.8b // E = A*B1
89 ext v17.8b, v5.8b, v5.8b, #2 // A2
90 pmull v17.8h, v17.8b, v3.8b // H = A2*B
91 ext v19.8b, v3.8b, v3.8b, #2 // B2
92 pmull v19.8h, v5.8b, v19.8b // G = A*B2
93 ext v18.8b, v5.8b, v5.8b, #3 // A3
94 eor v16.16b, v16.16b, v0.16b // L = E + F
95 pmull v18.8h, v18.8b, v3.8b // J = A3*B
96 ext v0.8b, v3.8b, v3.8b, #3 // B3
97 eor v17.16b, v17.16b, v19.16b // M = G + H
98 pmull v0.8h, v5.8b, v0.8b // I = A*B3
123 ext v19.8b, v3.8b, v3.8b, #4 // B4
124 eor v18.16b, v18.16b, v0.16b // N = I + J
125 pmull v19.8h, v5.8b, v19.8b // K = A*B4
133 eor v20.16b, v20.16b, v21.16b
134 eor v22.16b, v22.16b, v23.16b
135 and v21.16b, v21.16b, v24.16b
136 and v23.16b, v23.16b, v25.16b
137 eor v20.16b, v20.16b, v21.16b
138 eor v22.16b, v22.16b, v23.16b
144 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
145 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
146 pmull v0.8h, v5.8b, v3.8b // D = A*B
147 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
148 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
149 eor v16.16b, v16.16b, v17.16b
150 eor v18.16b, v18.16b, v19.16b
151 eor v0.16b, v0.16b, v16.16b
152 eor v0.16b, v0.16b, v18.16b
153 eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
154 ext v16.8b, v7.8b, v7.8b, #1 // A1
155 pmull v16.8h, v16.8b, v3.8b // F = A1*B
156 ext v1.8b, v3.8b, v3.8b, #1 // B1
157 pmull v1.8h, v7.8b, v1.8b // E = A*B1
158 ext v17.8b, v7.8b, v7.8b, #2 // A2
159 pmull v17.8h, v17.8b, v3.8b // H = A2*B
160 ext v19.8b, v3.8b, v3.8b, #2 // B2
161 pmull v19.8h, v7.8b, v19.8b // G = A*B2
162 ext v18.8b, v7.8b, v7.8b, #3 // A3
163 eor v16.16b, v16.16b, v1.16b // L = E + F
164 pmull v18.8h, v18.8b, v3.8b // J = A3*B
165 ext v1.8b, v3.8b, v3.8b, #3 // B3
166 eor v17.16b, v17.16b, v19.16b // M = G + H
167 pmull v1.8h, v7.8b, v1.8b // I = A*B3
192 ext v19.8b, v3.8b, v3.8b, #4 // B4
193 eor v18.16b, v18.16b, v1.16b // N = I + J
194 pmull v19.8h, v7.8b, v19.8b // K = A*B4
202 eor v20.16b, v20.16b, v21.16b
203 eor v22.16b, v22.16b, v23.16b
204 and v21.16b, v21.16b, v24.16b
205 and v23.16b, v23.16b, v25.16b
206 eor v20.16b, v20.16b, v21.16b
207 eor v22.16b, v22.16b, v23.16b
213 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
214 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
215 pmull v1.8h, v7.8b, v3.8b // D = A*B
216 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
217 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
218 eor v16.16b, v16.16b, v17.16b
219 eor v18.16b, v18.16b, v19.16b
220 eor v1.16b, v1.16b, v16.16b
221 eor v1.16b, v1.16b, v18.16b
222 ext v16.8b, v6.8b, v6.8b, #1 // A1
223 pmull v16.8h, v16.8b, v4.8b // F = A1*B
224 ext v2.8b, v4.8b, v4.8b, #1 // B1
225 pmull v2.8h, v6.8b, v2.8b // E = A*B1
226 ext v17.8b, v6.8b, v6.8b, #2 // A2
227 pmull v17.8h, v17.8b, v4.8b // H = A2*B
228 ext v19.8b, v4.8b, v4.8b, #2 // B2
229 pmull v19.8h, v6.8b, v19.8b // G = A*B2
230 ext v18.8b, v6.8b, v6.8b, #3 // A3
231 eor v16.16b, v16.16b, v2.16b // L = E + F
232 pmull v18.8h, v18.8b, v4.8b // J = A3*B
233 ext v2.8b, v4.8b, v4.8b, #3 // B3
234 eor v17.16b, v17.16b, v19.16b // M = G + H
235 pmull v2.8h, v6.8b, v2.8b // I = A*B3
260 ext v19.8b, v4.8b, v4.8b, #4 // B4
261 eor v18.16b, v18.16b, v2.16b // N = I + J
262 pmull v19.8h, v6.8b, v19.8b // K = A*B4
270 eor v20.16b, v20.16b, v21.16b
271 eor v22.16b, v22.16b, v23.16b
272 and v21.16b, v21.16b, v24.16b
273 and v23.16b, v23.16b, v25.16b
274 eor v20.16b, v20.16b, v21.16b
275 eor v22.16b, v22.16b, v23.16b
281 ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
282 ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
283 pmull v2.8h, v6.8b, v4.8b // D = A*B
284 ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
285 ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
286 eor v16.16b, v16.16b, v17.16b
287 eor v18.16b, v18.16b, v19.16b
288 eor v2.16b, v2.16b, v16.16b
289 eor v2.16b, v2.16b, v18.16b
290 ext v16.16b, v0.16b, v2.16b, #8
291 eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
292 eor v1.16b, v1.16b, v2.16b
293 eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
301 eor v18.16b, v18.16b, v17.16b //
303 eor v18.16b, v18.16b, v17.16b //
305 eor v18.16b, v18.16b, v1.16b
310 eor v2.16b, v2.16b,v0.16b
311 eor v0.16b, v0.16b,v18.16b //
314 eor v0.16b, v0.16b, v2.16b //
315 eor v0.16b, v0.16b, v18.16b //
320 rev64 v0.16b, v0.16b // byteswap Xi and write
321 ext v0.16b, v0.16b, v0.16b, #8
322 st1 {v0.16b}, [x0]