Lines Matching +full:k +full:- +full:block
1 // This file is generated from a similarly-named Perl script in the BoringSSL
10 .arch armv8-a+crypto
18 stp x29, x30, [sp, #-128]!
32 ldr q31, [x19, #-16] // load round N-1 keys
37 … // special case vector load initial counter so we can start first AES block as quickly as poss…
38 sub x5, x5, #1 // byte_len - 1
44 fmov d2, x10 // CTR block 2
47 fmov d1, x10 // CTR block 1
49 aesmc v0.16b, v0.16b // AES block 0 - round 0
51 rev w9, w12 // CTR block 1
52 fmov d3, x10 // CTR block 3
53 orr x9, x11, x9, lsl #32 // CTR block 1
54 add w12, w12, #1 // CTR block 1
56 fmov v1.d[1], x9 // CTR block 1
57 rev w9, w12 // CTR block 2
58 add w12, w12, #1 // CTR block 2
59 orr x9, x11, x9, lsl #32 // CTR block 2
61 fmov v2.d[1], x9 // CTR block 2
62 rev w9, w12 // CTR block 3
64 aesmc v0.16b, v0.16b // AES block 0 - round 1
65 orr x9, x11, x9, lsl #32 // CTR block 3
66 fmov v3.d[1], x9 // CTR block 3
68 aesmc v1.16b, v1.16b // AES block 1 - round 0
71 aesmc v0.16b, v0.16b // AES block 0 - round 2
74 aesmc v2.16b, v2.16b // AES block 2 - round 0
77 aesmc v1.16b, v1.16b // AES block 1 - round 1
81 aesmc v3.16b, v3.16b // AES block 3 - round 0
83 aesmc v2.16b, v2.16b // AES block 2 - round 1
86 aesmc v1.16b, v1.16b // AES block 1 - round 2
90 aesmc v3.16b, v3.16b // AES block 3 - round 1
93 aesmc v2.16b, v2.16b // AES block 2 - round 2
97 aesmc v1.16b, v1.16b // AES block 1 - round 3
100 aesmc v3.16b, v3.16b // AES block 3 - round 2
103 aesmc v2.16b, v2.16b // AES block 2 - round 3
104 add w12, w12, #1 // CTR block 3
106 aesmc v0.16b, v0.16b // AES block 0 - round 3
108 aesmc v3.16b, v3.16b // AES block 3 - round 3
113 aesmc v2.16b, v2.16b // AES block 2 - round 4
115 aesmc v0.16b, v0.16b // AES block 0 - round 4
117 aesmc v1.16b, v1.16b // AES block 1 - round 4
119 aesmc v3.16b, v3.16b // AES block 3 - round 4
120 cmp x17, #12 // setup flags for AES-128/192/256 check
122 aesmc v0.16b, v0.16b // AES block 0 - round 5
124 aesmc v1.16b, v1.16b // AES block 1 - round 5
126 aesmc v3.16b, v3.16b // AES block 3 - round 5
128 aesmc v2.16b, v2.16b // AES block 2 - round 5
130 aesmc v1.16b, v1.16b // AES block 1 - round 6
133 aesmc v3.16b, v3.16b // AES block 3 - round 6
136 aesmc v0.16b, v0.16b // AES block 0 - round 6
140 aesmc v2.16b, v2.16b // AES block 2 - round 6
143 aesmc v1.16b, v1.16b // AES block 1 - round 7
146 aesmc v0.16b, v0.16b // AES block 0 - round 7
148 aesmc v2.16b, v2.16b // AES block 2 - round 7
150 aesmc v3.16b, v3.16b // AES block 3 - round 7
153 aesmc v1.16b, v1.16b // AES block 1 - round 8
155 aesmc v2.16b, v2.16b // AES block 2 - round 8
157 aesmc v3.16b, v3.16b // AES block 3 - round 8
159 aesmc v0.16b, v0.16b // AES block 0 - round 8
160 b.lt .Lenc_finish_first_blocks // branch if AES-128
163 aesmc v1.16b, v1.16b // AES block 1 - round 9
165 aesmc v2.16b, v2.16b // AES block 2 - round 9
167 aesmc v3.16b, v3.16b // AES block 3 - round 9
169 aesmc v0.16b, v0.16b // AES block 0 - round 9
171 aesmc v1.16b, v1.16b // AES block 1 - round 10
173 aesmc v2.16b, v2.16b // AES block 2 - round 10
175 aesmc v3.16b, v3.16b // AES block 3 - round 10
177 aesmc v0.16b, v0.16b // AES block 0 - round 10
178 b.eq .Lenc_finish_first_blocks // branch if AES-192
181 aesmc v1.16b, v1.16b // AES block 1 - round 11
183 aesmc v2.16b, v2.16b // AES block 2 - round 11
185 aesmc v0.16b, v0.16b // AES block 0 - round 11
187 aesmc v3.16b, v3.16b // AES block 3 - round 11
189 aesmc v1.16b, v1.16b // AES block 1 - round 12
191 aesmc v2.16b, v2.16b // AES block 2 - round 12
193 aesmc v0.16b, v0.16b // AES block 0 - round 12
195 aesmc v3.16b, v3.16b // AES block 3 - round 12
200 aese v2.16b, v31.16b // AES block 2 - round N-1
202 aese v1.16b, v31.16b // AES block 1 - round N-1
203 aese v0.16b, v31.16b // AES block 0 - round N-1
204 aese v3.16b, v31.16b // AES block 3 - round N-1
208 ldp x19, x20, [x0, #16] // AES block 1 - load plaintext
209 rev w9, w12 // CTR block 4
210 ldp x6, x7, [x0, #0] // AES block 0 - load plaintext
211 ldp x23, x24, [x0, #48] // AES block 3 - load plaintext
212 ldp x21, x22, [x0, #32] // AES block 2 - load plaintext
214 eor x19, x19, x13 // AES block 1 - round N low
215 eor x20, x20, x14 // AES block 1 - round N high
216 fmov d5, x19 // AES block 1 - mov low
217 eor x6, x6, x13 // AES block 0 - round N low
218 eor x7, x7, x14 // AES block 0 - round N high
219 eor x24, x24, x14 // AES block 3 - round N high
220 fmov d4, x6 // AES block 0 - mov low
222 fmov v4.d[1], x7 // AES block 0 - mov high
223 eor x23, x23, x13 // AES block 3 - round N low
224 eor x21, x21, x13 // AES block 2 - round N low
225 fmov v5.d[1], x20 // AES block 1 - mov high
226 fmov d6, x21 // AES block 2 - mov low
227 add w12, w12, #1 // CTR block 4
228 orr x9, x11, x9, lsl #32 // CTR block 4
229 fmov d7, x23 // AES block 3 - mov low
230 eor x22, x22, x14 // AES block 2 - round N high
231 fmov v6.d[1], x22 // AES block 2 - mov high
232 eor v4.16b, v4.16b, v0.16b // AES block 0 - result
233 fmov d0, x10 // CTR block 4
234 fmov v0.d[1], x9 // CTR block 4
235 rev w9, w12 // CTR block 5
236 add w12, w12, #1 // CTR block 5
237 eor v5.16b, v5.16b, v1.16b // AES block 1 - result
238 fmov d1, x10 // CTR block 5
239 orr x9, x11, x9, lsl #32 // CTR block 5
240 fmov v1.d[1], x9 // CTR block 5
241 rev w9, w12 // CTR block 6
242 st1 { v4.16b}, [x2], #16 // AES block 0 - store result
243 fmov v7.d[1], x24 // AES block 3 - mov high
244 orr x9, x11, x9, lsl #32 // CTR block 6
245 eor v6.16b, v6.16b, v2.16b // AES block 2 - result
246 st1 { v5.16b}, [x2], #16 // AES block 1 - store result
247 add w12, w12, #1 // CTR block 6
248 fmov d2, x10 // CTR block 6
249 fmov v2.d[1], x9 // CTR block 6
250 st1 { v6.16b}, [x2], #16 // AES block 2 - store result
251 rev w9, w12 // CTR block 7
252 orr x9, x11, x9, lsl #32 // CTR block 7
253 eor v7.16b, v7.16b, v3.16b // AES block 3 - result
254 st1 { v7.16b}, [x2], #16 // AES block 3 - store result
259 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
260 rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free)
262 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
263 fmov d3, x10 // CTR block 4k+3
265 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
268 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
269 fmov v3.d[1], x9 // CTR block 4k+3
271 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
272 ldp x23, x24, [x0, #48] // AES block 4k+7 - load plaintext
274 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
275 ldp x21, x22, [x0, #32] // AES block 4k+6 - load plaintext
277 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
280 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
282 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
283 eor x23, x23, x13 // AES block 4k+7 - round N low
285 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
286 mov d10, v17.d[1] // GHASH block 4k - mid
287 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
288 eor x22, x22, x14 // AES block 4k+6 - round N high
289 mov d8, v4.d[1] // GHASH block 4k - mid
291 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
292 rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free)
294 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
295 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
296 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
298 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
300 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
301 …rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 fre…
302 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
303 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
304 rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free)
305 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
306 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
307 mov d4, v5.d[1] // GHASH block 4k+1 - mid
309 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
311 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
312 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
314 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
316 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
317 mov d8, v6.d[1] // GHASH block 4k+2 - mid
319 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
320 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
322 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
324 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
325 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
327 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
328 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
330 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
332 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
333 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
335 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
337 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
339 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
341 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
342 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
343 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
344 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
346 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
347 pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
348 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
350 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
351 ldp x19, x20, [x0, #16] // AES block 4k+5 - load plaintext
353 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
354 mov d4, v7.d[1] // GHASH block 4k+3 - mid
356 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
357 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
358 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
359 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
360 eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid
362 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
363 eor x19, x19, x13 // AES block 4k+5 - round N low
365 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
366 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
368 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
369 eor x21, x21, x13 // AES block 4k+6 - round N low
371 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
373 pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid
374 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
375 cmp x17, #12 // setup flags for AES-128/192/256 check
376 fmov d5, x19 // AES block 4k+5 - mov low
377 ldp x6, x7, [x0, #0] // AES block 4k+4 - load plaintext
378 b.lt .Lenc_main_loop_continue // branch if AES-128
381 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
383 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
385 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
387 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
389 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
391 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
393 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
395 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
396 b.eq .Lenc_main_loop_continue // branch if AES-192
399 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
401 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
403 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
405 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
407 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
409 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
411 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
413 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
417 eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low
418 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid
419 add w12, w12, #1 // CTR block 4k+3
420 eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
422 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
423 rev w9, w12 // CTR block 4k+8
424 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
425 eor x6, x6, x13 // AES block 4k+4 - round N low
426 eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up
427 eor x7, x7, x14 // AES block 4k+4 - round N high
428 fmov d4, x6 // AES block 4k+4 - mov low
429 orr x9, x11, x9, lsl #32 // CTR block 4k+8
430 eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid
431 eor x20, x20, x14 // AES block 4k+5 - round N high
432 eor x24, x24, x14 // AES block 4k+7 - round N high
433 add w12, w12, #1 // CTR block 4k+8
434 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
435 fmov v4.d[1], x7 // AES block 4k+4 - mov high
436 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
437 fmov d7, x23 // AES block 4k+7 - mov low
438 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
439 fmov v5.d[1], x20 // AES block 4k+5 - mov high
440 fmov d6, x21 // AES block 4k+6 - mov low
442 fmov v6.d[1], x22 // AES block 4k+6 - mov high
443 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
444 eor v4.16b, v4.16b, v0.16b // AES block 4k+4 - result
445 fmov d0, x10 // CTR block 4k+8
446 fmov v0.d[1], x9 // CTR block 4k+8
447 rev w9, w12 // CTR block 4k+9
448 add w12, w12, #1 // CTR block 4k+9
449 eor v5.16b, v5.16b, v1.16b // AES block 4k+5 - result
450 fmov d1, x10 // CTR block 4k+9
451 orr x9, x11, x9, lsl #32 // CTR block 4k+9
452 fmov v1.d[1], x9 // CTR block 4k+9
453 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
454 rev w9, w12 // CTR block 4k+10
455 st1 { v4.16b}, [x2], #16 // AES block 4k+4 - store result
456 orr x9, x11, x9, lsl #32 // CTR block 4k+10
457 eor v11.16b, v11.16b, v9.16b // MODULO - fold into low
458 fmov v7.d[1], x24 // AES block 4k+7 - mov high
459 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
460 st1 { v5.16b}, [x2], #16 // AES block 4k+5 - store result
461 add w12, w12, #1 // CTR block 4k+10
462 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
463 eor v6.16b, v6.16b, v2.16b // AES block 4k+6 - result
464 fmov d2, x10 // CTR block 4k+10
465 st1 { v6.16b}, [x2], #16 // AES block 4k+6 - store result
466 fmov v2.d[1], x9 // CTR block 4k+10
467 rev w9, w12 // CTR block 4k+11
468 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
469 orr x9, x11, x9, lsl #32 // CTR block 4k+11
470 eor v7.16b, v7.16b, v3.16b // AES block 4k+7 - result
471 st1 { v7.16b}, [x2], #16 // AES block 4k+7 - store result
476 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
477 rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free)
479 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
480 fmov d3, x10 // CTR block 4k+3
482 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
483 rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free)
484 fmov v3.d[1], x9 // CTR block 4k+3
487 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
489 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
491 rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free)
493 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
495 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
496 mov d10, v17.d[1] // GHASH block 4k - mid
498 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
499 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
500 mov d8, v4.d[1] // GHASH block 4k - mid
501 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
503 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
505 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
506 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
508 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
510 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
512 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
513 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
514 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
515 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
517 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
518 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
519 mov d4, v5.d[1] // GHASH block 4k+1 - mid
521 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
522 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
524 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
525 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
526 mov d8, v6.d[1] // GHASH block 4k+2 - mid
528 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
529 …rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 fre…
531 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
532 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
533 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
534 add w12, w12, #1 // CTR block 4k+3
535 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
537 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
539 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
540 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
541 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
542 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
543 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
545 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
546 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
547 mov d4, v7.d[1] // GHASH block 4k+3 - mid
549 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
550 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
551 eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid
552 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
554 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
555 pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid
556 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
558 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
560 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
562 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
564 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
567 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
569 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
570 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
572 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
574 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
577 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
578 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid
579 pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
581 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
582 cmp x17, #12 // setup flags for AES-128/192/256 check
584 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
585 eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low
587 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
590 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
594 b.lt .Lenc_finish_prepretail // branch if AES-128
597 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
599 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
601 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
603 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
605 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
607 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
609 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
611 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
612 b.eq .Lenc_finish_prepretail // branch if AES-192
615 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
617 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
619 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
621 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
623 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
625 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
627 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
629 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
636 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
638 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
639 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
640 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
646 ldp x6, x7, [x0], #16 // AES block 4k+4 - load plaintext
647 eor x6, x6, x13 // AES block 4k+4 - round N low
648 eor x7, x7, x14 // AES block 4k+4 - round N high
650 fmov d4, x6 // AES block 4k+4 - mov low
651 fmov v4.d[1], x7 // AES block 4k+4 - mov high
652 eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result
669 st1 { v5.16b}, [x2], #16 // AES final-3 block - store result
670 ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high
671 rev64 v4.16b, v5.16b // GHASH final-3 block
672 eor x6, x6, x13 // AES final-2 block - round N low
674 eor x7, x7, x14 // AES final-2 block - round N high
675 mov d22, v4.d[1] // GHASH final-3 block - mid
676 fmov d5, x6 // AES final-2 block - mov low
677 fmov v5.d[1], x7 // AES final-2 block - mov high
678 eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid
680 mov d10, v17.d[1] // GHASH final-3 block - mid
681 pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
682 pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
683 pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
684 eor v5.16b, v5.16b, v1.16b // AES final-2 block - result
686 st1 { v5.16b}, [x2], #16 // AES final-2 block - store result
687 ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high
688 rev64 v4.16b, v5.16b // GHASH final-2 block
689 eor x6, x6, x13 // AES final-1 block - round N low
691 fmov d5, x6 // AES final-1 block - mov low
692 eor x7, x7, x14 // AES final-1 block - round N high
693 fmov v5.d[1], x7 // AES final-1 block - mov high
695 pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high
696 mov d22, v4.d[1] // GHASH final-2 block - mid
697 pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low
698 eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid
699 eor v5.16b, v5.16b, v2.16b // AES final-1 block - result
700 eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high
701 pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
702 eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
703 eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
705 st1 { v5.16b}, [x2], #16 // AES final-1 block - store result
706 rev64 v4.16b, v5.16b // GHASH final-1 block
707 ldp x6, x7, [x0], #16 // AES final block - load input low & high
710 eor x6, x6, x13 // AES final block - round N low
711 mov d22, v4.d[1] // GHASH final-1 block - mid
712 pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high
713 eor x7, x7, x14 // AES final block - round N high
714 eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid
715 eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
716 ins v22.d[1], v22.d[0] // GHASH final-1 block - mid
717 fmov d5, x6 // AES final block - mov low
718 fmov v5.d[1], x7 // AES final block - mov high
719 pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid
720 pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low
721 eor v5.16b, v5.16b, v3.16b // AES final block - result
722 eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
723 eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
727 sub x1, x1, #128 // bit_length -= 128
728 neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
729 … // load existing bytes where the possibly partial last block is to be stored
732 lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
736 fmov d0, x6 // ctr0b is mask for last block
738 …and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in …
739 rev64 v4.16b, v5.16b // GHASH final block
742 pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
743 mov d8, v4.d[1] // GHASH final block - mid
745 pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
746 eor v9.16b, v9.16b, v20.16b // GHASH final block - high
747 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid
748 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid
749 eor v11.16b, v11.16b, v21.16b // GHASH final block - low
750 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid
752 eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
754 eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up
755 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
756 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
757 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
758 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
759 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
760 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
763 eor v11.16b, v11.16b, v9.16b // MODULO - fold into low
764 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
779 .size aes_gcm_enc_kernel,.-aes_gcm_enc_kernel
786 stp x29, x30, [sp, #-128]!
800 ldr q31, [x19, #-16] // load round N-1 keys
805 sub x5, x5, #1 // byte_len - 1
817 fmov d3, x10 // CTR block 3
818 rev w9, w12 // CTR block 1
819 add w12, w12, #1 // CTR block 1
820 fmov d1, x10 // CTR block 1
821 orr x9, x11, x9, lsl #32 // CTR block 1
822 … // special case vector load initial counter so we can start first AES block as quickly as poss…
823 fmov v1.d[1], x9 // CTR block 1
824 rev w9, w12 // CTR block 2
825 add w12, w12, #1 // CTR block 2
826 fmov d2, x10 // CTR block 2
827 orr x9, x11, x9, lsl #32 // CTR block 2
828 fmov v2.d[1], x9 // CTR block 2
829 rev w9, w12 // CTR block 3
830 orr x9, x11, x9, lsl #32 // CTR block 3
832 fmov v3.d[1], x9 // CTR block 3
833 add w12, w12, #1 // CTR block 3
837 aesmc v0.16b, v0.16b // AES block 0 - round 0
841 aesmc v3.16b, v3.16b // AES block 3 - round 0
845 aesmc v1.16b, v1.16b // AES block 1 - round 0
849 aesmc v2.16b, v2.16b // AES block 2 - round 0
852 aesmc v0.16b, v0.16b // AES block 0 - round 1
854 aesmc v1.16b, v1.16b // AES block 1 - round 1
859 aesmc v2.16b, v2.16b // AES block 2 - round 1
862 aesmc v3.16b, v3.16b // AES block 3 - round 1
865 aesmc v0.16b, v0.16b // AES block 0 - round 2
869 aesmc v2.16b, v2.16b // AES block 2 - round 2
872 aesmc v3.16b, v3.16b // AES block 3 - round 2
874 aesmc v0.16b, v0.16b // AES block 0 - round 3
876 aesmc v1.16b, v1.16b // AES block 1 - round 2
878 aesmc v3.16b, v3.16b // AES block 3 - round 3
880 aesmc v0.16b, v0.16b // AES block 0 - round 4
882 aesmc v2.16b, v2.16b // AES block 2 - round 3
884 aesmc v1.16b, v1.16b // AES block 1 - round 3
886 aesmc v3.16b, v3.16b // AES block 3 - round 4
888 aesmc v2.16b, v2.16b // AES block 2 - round 4
890 aesmc v1.16b, v1.16b // AES block 1 - round 4
892 aesmc v3.16b, v3.16b // AES block 3 - round 5
894 aesmc v0.16b, v0.16b // AES block 0 - round 5
896 aesmc v1.16b, v1.16b // AES block 1 - round 5
898 aesmc v2.16b, v2.16b // AES block 2 - round 5
900 aesmc v0.16b, v0.16b // AES block 0 - round 6
902 aesmc v3.16b, v3.16b // AES block 3 - round 6
903 cmp x17, #12 // setup flags for AES-128/192/256 check
905 aesmc v1.16b, v1.16b // AES block 1 - round 6
907 aesmc v2.16b, v2.16b // AES block 2 - round 6
909 aesmc v0.16b, v0.16b // AES block 0 - round 7
911 aesmc v1.16b, v1.16b // AES block 1 - round 7
913 aesmc v3.16b, v3.16b // AES block 3 - round 7
915 aesmc v0.16b, v0.16b // AES block 0 - round 8
917 aesmc v2.16b, v2.16b // AES block 2 - round 7
919 aesmc v3.16b, v3.16b // AES block 3 - round 8
921 aesmc v1.16b, v1.16b // AES block 1 - round 8
924 aesmc v2.16b, v2.16b // AES block 2 - round 8
925 b.lt .Ldec_finish_first_blocks // branch if AES-128
928 aesmc v0.16b, v0.16b // AES block 0 - round 9
930 aesmc v1.16b, v1.16b // AES block 1 - round 9
932 aesmc v3.16b, v3.16b // AES block 3 - round 9
934 aesmc v2.16b, v2.16b // AES block 2 - round 9
936 aesmc v0.16b, v0.16b // AES block 0 - round 10
938 aesmc v1.16b, v1.16b // AES block 1 - round 10
940 aesmc v3.16b, v3.16b // AES block 3 - round 10
942 aesmc v2.16b, v2.16b // AES block 2 - round 10
943 b.eq .Ldec_finish_first_blocks // branch if AES-192
946 aesmc v0.16b, v0.16b // AES block 0 - round 11
948 aesmc v3.16b, v3.16b // AES block 3 - round 11
950 aesmc v1.16b, v1.16b // AES block 1 - round 11
952 aesmc v2.16b, v2.16b // AES block 2 - round 11
954 aesmc v1.16b, v1.16b // AES block 1 - round 12
956 aesmc v0.16b, v0.16b // AES block 0 - round 12
958 aesmc v2.16b, v2.16b // AES block 2 - round 12
960 aesmc v3.16b, v3.16b // AES block 3 - round 12
969 aese v1.16b, v31.16b // AES block 1 - round N-1
970 aese v2.16b, v31.16b // AES block 2 - round N-1
972 aese v3.16b, v31.16b // AES block 3 - round N-1
973 aese v0.16b, v31.16b // AES block 0 - round N-1
976 ldr q4, [x0, #0] // AES block 0 - load ciphertext
977 ldr q5, [x0, #16] // AES block 1 - load ciphertext
978 rev w9, w12 // CTR block 4
979 eor v0.16b, v4.16b, v0.16b // AES block 0 - result
980 eor v1.16b, v5.16b, v1.16b // AES block 1 - result
981 rev64 v5.16b, v5.16b // GHASH block 1
982 ldr q7, [x0, #48] // AES block 3 - load ciphertext
983 mov x7, v0.d[1] // AES block 0 - mov high
984 mov x6, v0.d[0] // AES block 0 - mov low
985 rev64 v4.16b, v4.16b // GHASH block 0
986 add w12, w12, #1 // CTR block 4
987 fmov d0, x10 // CTR block 4
988 orr x9, x11, x9, lsl #32 // CTR block 4
989 fmov v0.d[1], x9 // CTR block 4
990 rev w9, w12 // CTR block 5
991 add w12, w12, #1 // CTR block 5
992 mov x19, v1.d[0] // AES block 1 - mov low
993 orr x9, x11, x9, lsl #32 // CTR block 5
994 mov x20, v1.d[1] // AES block 1 - mov high
995 eor x7, x7, x14 // AES block 0 - round N high
996 eor x6, x6, x13 // AES block 0 - round N low
997 stp x6, x7, [x2], #16 // AES block 0 - store result
998 fmov d1, x10 // CTR block 5
999 ldr q6, [x0, #32] // AES block 2 - load ciphertext
1001 fmov v1.d[1], x9 // CTR block 5
1002 rev w9, w12 // CTR block 6
1003 add w12, w12, #1 // CTR block 6
1004 eor x19, x19, x13 // AES block 1 - round N low
1005 orr x9, x11, x9, lsl #32 // CTR block 6
1006 eor x20, x20, x14 // AES block 1 - round N high
1007 stp x19, x20, [x2], #16 // AES block 1 - store result
1008 eor v2.16b, v6.16b, v2.16b // AES block 2 - result
1013 mov x21, v2.d[0] // AES block 4k+2 - mov low
1015 eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result
1017 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
1018 mov x22, v2.d[1] // AES block 4k+2 - mov high
1020 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
1021 fmov d2, x10 // CTR block 4k+6
1022 fmov v2.d[1], x9 // CTR block 4k+6
1024 rev w9, w12 // CTR block 4k+7
1026 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
1027 mov x24, v3.d[1] // AES block 4k+3 - mov high
1029 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
1030 mov x23, v3.d[0] // AES block 4k+3 - mov low
1031 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
1032 mov d8, v4.d[1] // GHASH block 4k - mid
1033 fmov d3, x10 // CTR block 4k+7
1035 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
1036 orr x9, x11, x9, lsl #32 // CTR block 4k+7
1038 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
1039 fmov v3.d[1], x9 // CTR block 4k+7
1041 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
1042 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
1044 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
1045 eor x22, x22, x14 // AES block 4k+2 - round N high
1047 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
1048 mov d10, v17.d[1] // GHASH block 4k - mid
1050 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
1051 rev64 v6.16b, v6.16b // GHASH block 4k+2
1053 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
1054 eor x21, x21, x13 // AES block 4k+2 - round N low
1056 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
1057 stp x21, x22, [x2], #16 // AES block 4k+2 - store result
1058 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
1059 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
1061 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
1062 rev64 v7.16b, v7.16b // GHASH block 4k+3
1063 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
1064 eor x23, x23, x13 // AES block 4k+3 - round N low
1065 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
1066 eor x24, x24, x14 // AES block 4k+3 - round N high
1067 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
1069 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
1071 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
1072 mov d4, v5.d[1] // GHASH block 4k+1 - mid
1074 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
1075 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
1077 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
1078 add w12, w12, #1 // CTR block 4k+7
1080 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
1081 mov d8, v6.d[1] // GHASH block 4k+2 - mid
1083 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
1084 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
1085 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
1087 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
1088 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
1090 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
1092 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
1093 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
1094 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
1095 rev w9, w12 // CTR block 4k+8
1097 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
1098 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
1100 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
1101 add w12, w12, #1 // CTR block 4k+8
1103 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
1105 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
1106 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
1108 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
1109 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
1110 mov d6, v7.d[1] // GHASH block 4k+3 - mid
1112 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
1113 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
1115 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
1116 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
1118 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
1119 pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
1120 orr x9, x11, x9, lsl #32 // CTR block 4k+8
1121 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
1122 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
1123 cmp x17, #12 // setup flags for AES-128/192/256 check
1124 eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid
1126 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
1128 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
1129 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
1130 pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid
1133 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
1134 eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low
1136 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
1139 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
1140 eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid
1142 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
1143 b.lt .Ldec_main_loop_continue // branch if AES-128
1146 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
1148 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
1150 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
1152 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
1154 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
1156 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
1158 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
1160 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
1161 b.eq .Ldec_main_loop_continue // branch if AES-192
1164 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
1166 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
1168 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
1170 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
1172 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
1174 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
1176 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
1178 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
1181 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1182 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1183 ldr q4, [x0, #0] // AES block 4k+4 - load ciphertext
1184 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
1185 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1186 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1187 ldr q5, [x0, #16] // AES block 4k+5 - load ciphertext
1188 eor v0.16b, v4.16b, v0.16b // AES block 4k+4 - result
1189 stp x23, x24, [x2], #16 // AES block 4k+3 - store result
1190 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1191 ldr q7, [x0, #48] // AES block 4k+7 - load ciphertext
1192 ldr q6, [x0, #32] // AES block 4k+6 - load ciphertext
1193 mov x7, v0.d[1] // AES block 4k+4 - mov high
1194 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1195 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
1197 mov x6, v0.d[0] // AES block 4k+4 - mov low
1198 fmov d0, x10 // CTR block 4k+8
1199 fmov v0.d[1], x9 // CTR block 4k+8
1200 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1201 eor v1.16b, v5.16b, v1.16b // AES block 4k+5 - result
1202 rev w9, w12 // CTR block 4k+9
1203 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
1204 orr x9, x11, x9, lsl #32 // CTR block 4k+9
1206 add w12, w12, #1 // CTR block 4k+9
1207 eor x6, x6, x13 // AES block 4k+4 - round N low
1208 eor x7, x7, x14 // AES block 4k+4 - round N high
1209 mov x20, v1.d[1] // AES block 4k+5 - mov high
1210 eor v2.16b, v6.16b, v2.16b // AES block 4k+6 - result
1211 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1212 mov x19, v1.d[0] // AES block 4k+5 - mov low
1213 fmov d1, x10 // CTR block 4k+9
1214 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1215 fmov v1.d[1], x9 // CTR block 4k+9
1216 rev w9, w12 // CTR block 4k+10
1217 add w12, w12, #1 // CTR block 4k+10
1218 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
1219 orr x9, x11, x9, lsl #32 // CTR block 4k+10
1220 rev64 v5.16b, v5.16b // GHASH block 4k+5
1221 eor x20, x20, x14 // AES block 4k+5 - round N high
1222 stp x6, x7, [x2], #16 // AES block 4k+4 - store result
1223 eor x19, x19, x13 // AES block 4k+5 - round N low
1224 stp x19, x20, [x2], #16 // AES block 4k+5 - store result
1225 rev64 v4.16b, v4.16b // GHASH block 4k+4
1226 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1231 mov x21, v2.d[0] // AES block 4k+2 - mov low
1232 eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result
1234 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
1235 mov x22, v2.d[1] // AES block 4k+2 - mov high
1237 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
1238 fmov d2, x10 // CTR block 4k+6
1239 fmov v2.d[1], x9 // CTR block 4k+6
1240 rev w9, w12 // CTR block 4k+7
1242 rev64 v6.16b, v6.16b // GHASH block 4k+2
1243 orr x9, x11, x9, lsl #32 // CTR block 4k+7
1244 mov x23, v3.d[0] // AES block 4k+3 - mov low
1246 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
1247 mov x24, v3.d[1] // AES block 4k+3 - mov high
1248 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
1249 mov d8, v4.d[1] // GHASH block 4k - mid
1250 fmov d3, x10 // CTR block 4k+7
1251 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
1252 fmov v3.d[1], x9 // CTR block 4k+7
1254 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
1255 mov d10, v17.d[1] // GHASH block 4k - mid
1257 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
1258 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
1259 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
1261 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
1262 rev64 v7.16b, v7.16b // GHASH block 4k+3
1264 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
1265 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
1266 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
1267 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
1269 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
1270 mov d4, v5.d[1] // GHASH block 4k+1 - mid
1272 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
1274 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
1275 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
1277 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
1279 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
1280 mov d8, v6.d[1] // GHASH block 4k+2 - mid
1282 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
1283 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
1284 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
1286 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
1288 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
1289 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
1290 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
1292 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
1293 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
1295 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
1296 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
1297 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
1298 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
1300 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
1301 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
1303 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
1305 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
1306 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
1307 pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
1309 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
1310 mov d6, v7.d[1] // GHASH block 4k+3 - mid
1312 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
1313 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
1315 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
1316 eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid
1318 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
1320 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
1321 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
1323 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
1325 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
1328 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
1329 eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low
1330 pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid
1332 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
1333 cmp x17, #12 // setup flags for AES-128/192/256 check
1334 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
1336 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
1338 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
1339 eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid
1341 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
1343 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
1344 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1346 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
1348 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
1351 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
1352 b.lt .Ldec_finish_prepretail // branch if AES-128
1355 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
1357 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
1359 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
1361 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
1363 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
1365 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
1367 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
1369 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
1370 b.eq .Ldec_finish_prepretail // branch if AES-192
1373 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
1375 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
1377 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
1379 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
1381 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
1383 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
1385 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
1387 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
1390 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1391 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1392 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1393 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1394 eor x22, x22, x14 // AES block 4k+2 - round N high
1395 eor x23, x23, x13 // AES block 4k+3 - round N low
1396 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1397 add w12, w12, #1 // CTR block 4k+7
1398 eor x21, x21, x13 // AES block 4k+2 - round N low
1399 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1400 eor x24, x24, x14 // AES block 4k+3 - round N high
1401 stp x21, x22, [x2], #16 // AES block 4k+2 - store result
1402 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1403 stp x23, x24, [x2], #16 // AES block 4k+3 - store result
1405 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1406 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
1407 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
1408 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
1409 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
1410 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1414 ld1 { v5.16b}, [x0], #16 // AES block 4k+4 - load ciphertext
1415 eor v0.16b, v5.16b, v0.16b // AES block 4k+4 - result
1416 mov x6, v0.d[0] // AES block 4k+4 - mov low
1417 mov x7, v0.d[1] // AES block 4k+4 - mov high
1420 eor x6, x6, x13 // AES block 4k+4 - round N low
1421 eor x7, x7, x14 // AES block 4k+4 - round N high
1438 rev64 v4.16b, v5.16b // GHASH final-3 block
1439 ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext
1440 stp x6, x7, [x2], #16 // AES final-3 block - store result
1441 mov d10, v17.d[1] // GHASH final-3 block - mid
1443 eor v0.16b, v5.16b, v1.16b // AES final-2 block - result
1444 mov d22, v4.d[1] // GHASH final-3 block - mid
1445 mov x6, v0.d[0] // AES final-2 block - mov low
1446 mov x7, v0.d[1] // AES final-2 block - mov high
1447 eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid
1449 pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
1450 pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
1451 eor x6, x6, x13 // AES final-2 block - round N low
1452 pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
1453 eor x7, x7, x14 // AES final-2 block - round N high
1455 rev64 v4.16b, v5.16b // GHASH final-2 block
1456 ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext
1458 stp x6, x7, [x2], #16 // AES final-2 block - store result
1459 eor v0.16b, v5.16b, v2.16b // AES final-1 block - result
1460 mov d22, v4.d[1] // GHASH final-2 block - mid
1461 pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low
1462 pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high
1463 eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid
1464 mov x6, v0.d[0] // AES final-1 block - mov low
1465 mov x7, v0.d[1] // AES final-1 block - mov high
1466 eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
1468 pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
1469 eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high
1470 eor x6, x6, x13 // AES final-1 block - round N low
1471 eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
1472 eor x7, x7, x14 // AES final-1 block - round N high
1474 stp x6, x7, [x2], #16 // AES final-1 block - store result
1475 rev64 v4.16b, v5.16b // GHASH final-1 block
1476 ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext
1479 mov d22, v4.d[1] // GHASH final-1 block - mid
1480 eor v0.16b, v5.16b, v3.16b // AES final block - result
1481 pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high
1482 eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid
1483 pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low
1484 mov x6, v0.d[0] // AES final block - mov low
1485 ins v22.d[1], v22.d[0] // GHASH final-1 block - mid
1486 mov x7, v0.d[1] // AES final block - mov high
1487 pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid
1488 eor x6, x6, x13 // AES final block - round N low
1489 eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
1490 eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
1491 eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
1492 eor x7, x7, x14 // AES final block - round N high
1496 sub x1, x1, #128 // bit_length -= 128
1499 neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
1501 lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
1505 fmov d0, x9 // ctr0b is mask for last block
1514 …and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in…
1515 rev64 v4.16b, v5.16b // GHASH final block
1517 pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
1518 mov d8, v4.d[1] // GHASH final block - mid
1519 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid
1520 pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
1521 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid
1522 eor v9.16b, v9.16b, v20.16b // GHASH final block - high
1523 eor v11.16b, v11.16b, v21.16b // GHASH final block - low
1524 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid
1526 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1528 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1529 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1530 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1531 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1532 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1533 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1534 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1535 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1538 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1553 .size aes_gcm_dec_kernel,.-aes_gcm_dec_kernel