Lines Matching +full:- +full:b
1 // This file is generated from a similarly-named Perl script in the BoringSSL
10 .arch armv8-a+crypto
18 stp x29, x30, [sp, #-128]!
32 ldr q31, [x19, #-16] // load round N-1 keys
37 …ld1 { v0.16b}, [x16] // special case vector load initial counter so we…
38 sub x5, x5, #1 // byte_len - 1
48 aese v0.16b, v18.16b
49 aesmc v0.16b, v0.16b // AES block 0 - round 0
63 aese v0.16b, v19.16b
64 aesmc v0.16b, v0.16b // AES block 0 - round 1
67 aese v1.16b, v18.16b
68 aesmc v1.16b, v1.16b // AES block 1 - round 0
70 aese v0.16b, v20.16b
71 aesmc v0.16b, v0.16b // AES block 0 - round 2
73 aese v2.16b, v18.16b
74 aesmc v2.16b, v2.16b // AES block 2 - round 0
76 aese v1.16b, v19.16b
77 aesmc v1.16b, v1.16b // AES block 1 - round 1
79 ext v14.16b, v14.16b, v14.16b, #8
80 aese v3.16b, v18.16b
81 aesmc v3.16b, v3.16b // AES block 3 - round 0
82 aese v2.16b, v19.16b
83 aesmc v2.16b, v2.16b // AES block 2 - round 1
85 aese v1.16b, v20.16b
86 aesmc v1.16b, v1.16b // AES block 1 - round 2
88 ext v13.16b, v13.16b, v13.16b, #8
89 aese v3.16b, v19.16b
90 aesmc v3.16b, v3.16b // AES block 3 - round 1
92 aese v2.16b, v20.16b
93 aesmc v2.16b, v2.16b // AES block 2 - round 2
95 ext v15.16b, v15.16b, v15.16b, #8
96 aese v1.16b, v21.16b
97 aesmc v1.16b, v1.16b // AES block 1 - round 3
99 aese v3.16b, v20.16b
100 aesmc v3.16b, v3.16b // AES block 3 - round 2
102 aese v2.16b, v21.16b
103 aesmc v2.16b, v2.16b // AES block 2 - round 3
105 aese v0.16b, v21.16b
106 aesmc v0.16b, v0.16b // AES block 0 - round 3
107 aese v3.16b, v21.16b
108 aesmc v3.16b, v3.16b // AES block 3 - round 3
109 ld1 { v11.16b}, [x3]
110 ext v11.16b, v11.16b, v11.16b, #8
111 rev64 v11.16b, v11.16b
112 aese v2.16b, v22.16b
113 aesmc v2.16b, v2.16b // AES block 2 - round 4
114 aese v0.16b, v22.16b
115 aesmc v0.16b, v0.16b // AES block 0 - round 4
116 aese v1.16b, v22.16b
117 aesmc v1.16b, v1.16b // AES block 1 - round 4
118 aese v3.16b, v22.16b
119 aesmc v3.16b, v3.16b // AES block 3 - round 4
120 cmp x17, #12 // setup flags for AES-128/192/256 check
121 aese v0.16b, v23.16b
122 aesmc v0.16b, v0.16b // AES block 0 - round 5
123 aese v1.16b, v23.16b
124 aesmc v1.16b, v1.16b // AES block 1 - round 5
125 aese v3.16b, v23.16b
126 aesmc v3.16b, v3.16b // AES block 3 - round 5
127 aese v2.16b, v23.16b
128 aesmc v2.16b, v2.16b // AES block 2 - round 5
129 aese v1.16b, v24.16b
130 aesmc v1.16b, v1.16b // AES block 1 - round 6
132 aese v3.16b, v24.16b
133 aesmc v3.16b, v3.16b // AES block 3 - round 6
135 aese v0.16b, v24.16b
136 aesmc v0.16b, v0.16b // AES block 0 - round 6
138 ext v12.16b, v12.16b, v12.16b, #8
139 aese v2.16b, v24.16b
140 aesmc v2.16b, v2.16b // AES block 2 - round 6
142 aese v1.16b, v25.16b
143 aesmc v1.16b, v1.16b // AES block 1 - round 7
145 aese v0.16b, v25.16b
146 aesmc v0.16b, v0.16b // AES block 0 - round 7
147 aese v2.16b, v25.16b
148 aesmc v2.16b, v2.16b // AES block 2 - round 7
149 aese v3.16b, v25.16b
150 aesmc v3.16b, v3.16b // AES block 3 - round 7
152 aese v1.16b, v26.16b
153 aesmc v1.16b, v1.16b // AES block 1 - round 8
154 aese v2.16b, v26.16b
155 aesmc v2.16b, v2.16b // AES block 2 - round 8
156 aese v3.16b, v26.16b
157 aesmc v3.16b, v3.16b // AES block 3 - round 8
158 aese v0.16b, v26.16b
159 aesmc v0.16b, v0.16b // AES block 0 - round 8
160 b.lt .Lenc_finish_first_blocks // branch if AES-128
162 aese v1.16b, v27.16b
163 aesmc v1.16b, v1.16b // AES block 1 - round 9
164 aese v2.16b, v27.16b
165 aesmc v2.16b, v2.16b // AES block 2 - round 9
166 aese v3.16b, v27.16b
167 aesmc v3.16b, v3.16b // AES block 3 - round 9
168 aese v0.16b, v27.16b
169 aesmc v0.16b, v0.16b // AES block 0 - round 9
170 aese v1.16b, v28.16b
171 aesmc v1.16b, v1.16b // AES block 1 - round 10
172 aese v2.16b, v28.16b
173 aesmc v2.16b, v2.16b // AES block 2 - round 10
174 aese v3.16b, v28.16b
175 aesmc v3.16b, v3.16b // AES block 3 - round 10
176 aese v0.16b, v28.16b
177 aesmc v0.16b, v0.16b // AES block 0 - round 10
178 b.eq .Lenc_finish_first_blocks // branch if AES-192
180 aese v1.16b, v29.16b
181 aesmc v1.16b, v1.16b // AES block 1 - round 11
182 aese v2.16b, v29.16b
183 aesmc v2.16b, v2.16b // AES block 2 - round 11
184 aese v0.16b, v29.16b
185 aesmc v0.16b, v0.16b // AES block 0 - round 11
186 aese v3.16b, v29.16b
187 aesmc v3.16b, v3.16b // AES block 3 - round 11
188 aese v1.16b, v30.16b
189 aesmc v1.16b, v1.16b // AES block 1 - round 12
190 aese v2.16b, v30.16b
191 aesmc v2.16b, v2.16b // AES block 2 - round 12
192 aese v0.16b, v30.16b
193 aesmc v0.16b, v0.16b // AES block 0 - round 12
194 aese v3.16b, v30.16b
195 aesmc v3.16b, v3.16b // AES block 3 - round 12
199 eor v17.16b, v17.16b, v9.16b // h4k | h3k
200 aese v2.16b, v31.16b // AES block 2 - round N-1
202 aese v1.16b, v31.16b // AES block 1 - round N-1
203 aese v0.16b, v31.16b // AES block 0 - round N-1
204 aese v3.16b, v31.16b // AES block 3 - round N-1
205 eor v16.16b, v16.16b, v8.16b // h2k | h1k
206 b.ge .Lenc_tail // handle tail
208 ldp x19, x20, [x0, #16] // AES block 1 - load plaintext
210 ldp x6, x7, [x0, #0] // AES block 0 - load plaintext
211 ldp x23, x24, [x0, #48] // AES block 3 - load plaintext
212 ldp x21, x22, [x0, #32] // AES block 2 - load plaintext
214 eor x19, x19, x13 // AES block 1 - round N low
215 eor x20, x20, x14 // AES block 1 - round N high
216 fmov d5, x19 // AES block 1 - mov low
217 eor x6, x6, x13 // AES block 0 - round N low
218 eor x7, x7, x14 // AES block 0 - round N high
219 eor x24, x24, x14 // AES block 3 - round N high
220 fmov d4, x6 // AES block 0 - mov low
222 fmov v4.d[1], x7 // AES block 0 - mov high
223 eor x23, x23, x13 // AES block 3 - round N low
224 eor x21, x21, x13 // AES block 2 - round N low
225 fmov v5.d[1], x20 // AES block 1 - mov high
226 fmov d6, x21 // AES block 2 - mov low
229 fmov d7, x23 // AES block 3 - mov low
230 eor x22, x22, x14 // AES block 2 - round N high
231 fmov v6.d[1], x22 // AES block 2 - mov high
232 eor v4.16b, v4.16b, v0.16b // AES block 0 - result
237 eor v5.16b, v5.16b, v1.16b // AES block 1 - result
242 st1 { v4.16b}, [x2], #16 // AES block 0 - store result
243 fmov v7.d[1], x24 // AES block 3 - mov high
245 eor v6.16b, v6.16b, v2.16b // AES block 2 - result
246 st1 { v5.16b}, [x2], #16 // AES block 1 - store result
250 st1 { v6.16b}, [x2], #16 // AES block 2 - store result
253 eor v7.16b, v7.16b, v3.16b // AES block 3 - result
254 st1 { v7.16b}, [x2], #16 // AES block 3 - store result
255 b.ge .Lenc_prepretail // do prepretail
258 aese v0.16b, v18.16b
259 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
260 rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free)
261 aese v1.16b, v18.16b
262 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
264 aese v2.16b, v18.16b
265 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
266 ext v11.16b, v11.16b, v11.16b, #8 // PRE 0
267 aese v0.16b, v19.16b
268 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
270 aese v1.16b, v19.16b
271 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
272 ldp x23, x24, [x0, #48] // AES block 4k+7 - load plaintext
273 aese v2.16b, v19.16b
274 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
275 ldp x21, x22, [x0, #32] // AES block 4k+6 - load plaintext
276 aese v0.16b, v20.16b
277 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
278 eor v4.16b, v4.16b, v11.16b // PRE 1
279 aese v1.16b, v20.16b
280 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
281 aese v3.16b, v18.16b
282 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
283 eor x23, x23, x13 // AES block 4k+7 - round N low
284 aese v0.16b, v21.16b
285 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
286 mov d10, v17.d[1] // GHASH block 4k - mid
287 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
288 eor x22, x22, x14 // AES block 4k+6 - round N high
289 mov d8, v4.d[1] // GHASH block 4k - mid
290 aese v3.16b, v19.16b
291 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
292 rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free)
293 aese v0.16b, v22.16b
294 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
295 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
296 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
297 aese v2.16b, v20.16b
298 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
299 aese v0.16b, v23.16b
300 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
301 …rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 fre…
302 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
303 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
304 rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free)
305 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
306 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
307 mov d4, v5.d[1] // GHASH block 4k+1 - mid
308 aese v1.16b, v21.16b
309 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
310 aese v3.16b, v20.16b
311 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
312 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
313 aese v2.16b, v21.16b
314 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
315 aese v1.16b, v22.16b
316 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
317 mov d8, v6.d[1] // GHASH block 4k+2 - mid
318 aese v3.16b, v21.16b
319 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
320 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
321 aese v2.16b, v22.16b
322 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
323 aese v0.16b, v24.16b
324 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
325 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
326 aese v3.16b, v22.16b
327 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
328 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
329 aese v0.16b, v25.16b
330 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
331 aese v3.16b, v23.16b
332 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
333 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
334 aese v1.16b, v23.16b
335 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
336 aese v0.16b, v26.16b
337 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
338 aese v2.16b, v23.16b
339 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
340 aese v1.16b, v24.16b
341 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
342 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
343 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
344 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
345 aese v1.16b, v25.16b
346 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
347 pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
348 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
349 aese v3.16b, v24.16b
350 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
351 ldp x19, x20, [x0, #16] // AES block 4k+5 - load plaintext
352 aese v1.16b, v26.16b
353 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
354 mov d4, v7.d[1] // GHASH block 4k+3 - mid
355 aese v2.16b, v24.16b
356 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
357 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
358 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
359 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
360 eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid
361 aese v2.16b, v25.16b
362 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
363 eor x19, x19, x13 // AES block 4k+5 - round N low
364 aese v2.16b, v26.16b
365 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
366 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
367 aese v3.16b, v25.16b
368 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
369 eor x21, x21, x13 // AES block 4k+6 - round N low
370 aese v3.16b, v26.16b
371 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
372 movi v8.8b, #0xc2
373 pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid
374 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
375 cmp x17, #12 // setup flags for AES-128/192/256 check
376 fmov d5, x19 // AES block 4k+5 - mov low
377 ldp x6, x7, [x0, #0] // AES block 4k+4 - load plaintext
378 b.lt .Lenc_main_loop_continue // branch if AES-128
380 aese v1.16b, v27.16b
381 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
382 aese v0.16b, v27.16b
383 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
384 aese v2.16b, v27.16b
385 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
386 aese v3.16b, v27.16b
387 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
388 aese v0.16b, v28.16b
389 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
390 aese v1.16b, v28.16b
391 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
392 aese v2.16b, v28.16b
393 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
394 aese v3.16b, v28.16b
395 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
396 b.eq .Lenc_main_loop_continue // branch if AES-192
398 aese v0.16b, v29.16b
399 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
400 aese v1.16b, v29.16b
401 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
402 aese v2.16b, v29.16b
403 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
404 aese v3.16b, v29.16b
405 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
406 aese v1.16b, v30.16b
407 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
408 aese v0.16b, v30.16b
409 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
410 aese v2.16b, v30.16b
411 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
412 aese v3.16b, v30.16b
413 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
417 eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low
418 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid
420 eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
422 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
424 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
425 eor x6, x6, x13 // AES block 4k+4 - round N low
426 eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up
427 eor x7, x7, x14 // AES block 4k+4 - round N high
428 fmov d4, x6 // AES block 4k+4 - mov low
430 eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid
431 eor x20, x20, x14 // AES block 4k+5 - round N high
432 eor x24, x24, x14 // AES block 4k+7 - round N high
434 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
435 fmov v4.d[1], x7 // AES block 4k+4 - mov high
436 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
437 fmov d7, x23 // AES block 4k+7 - mov low
438 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
439 fmov v5.d[1], x20 // AES block 4k+5 - mov high
440 fmov d6, x21 // AES block 4k+6 - mov low
442 fmov v6.d[1], x22 // AES block 4k+6 - mov high
443 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
444 eor v4.16b, v4.16b, v0.16b // AES block 4k+4 - result
449 eor v5.16b, v5.16b, v1.16b // AES block 4k+5 - result
453 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
455 st1 { v4.16b}, [x2], #16 // AES block 4k+4 - store result
457 eor v11.16b, v11.16b, v9.16b // MODULO - fold into low
458 fmov v7.d[1], x24 // AES block 4k+7 - mov high
459 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
460 st1 { v5.16b}, [x2], #16 // AES block 4k+5 - store result
462 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
463 eor v6.16b, v6.16b, v2.16b // AES block 4k+6 - result
465 st1 { v6.16b}, [x2], #16 // AES block 4k+6 - store result
468 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
470 eor v7.16b, v7.16b, v3.16b // AES block 4k+7 - result
471 st1 { v7.16b}, [x2], #16 // AES block 4k+7 - store result
472 b.lt .Lenc_main_loop
475 aese v1.16b, v18.16b
476 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
477 rev64 v6.16b, v6.16b // GHASH block 4k+2 (t0, t1, and t2 free)
478 aese v2.16b, v18.16b
479 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
481 aese v0.16b, v18.16b
482 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
483 rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free)
485 ext v11.16b, v11.16b, v11.16b, #8 // PRE 0
486 aese v2.16b, v19.16b
487 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
488 aese v0.16b, v19.16b
489 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
490 eor v4.16b, v4.16b, v11.16b // PRE 1
491 rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free)
492 aese v2.16b, v20.16b
493 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
494 aese v3.16b, v18.16b
495 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
496 mov d10, v17.d[1] // GHASH block 4k - mid
497 aese v1.16b, v19.16b
498 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
499 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
500 mov d8, v4.d[1] // GHASH block 4k - mid
501 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
502 aese v2.16b, v21.16b
503 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
504 aese v1.16b, v20.16b
505 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
506 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
507 aese v0.16b, v20.16b
508 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
509 aese v3.16b, v19.16b
510 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
511 aese v1.16b, v21.16b
512 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
513 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
514 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
515 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
516 aese v3.16b, v20.16b
517 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
518 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
519 mov d4, v5.d[1] // GHASH block 4k+1 - mid
520 aese v0.16b, v21.16b
521 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
522 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
523 aese v3.16b, v21.16b
524 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
525 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
526 mov d8, v6.d[1] // GHASH block 4k+2 - mid
527 aese v0.16b, v22.16b
528 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
529 …rev64 v7.16b, v7.16b // GHASH block 4k+3 (t0, t1, t2 and t3 fre…
530 aese v3.16b, v22.16b
531 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
532 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
533 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
535 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
536 aese v3.16b, v23.16b
537 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
538 aese v2.16b, v22.16b
539 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
540 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
541 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
542 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
543 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
544 aese v2.16b, v23.16b
545 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
546 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
547 mov d4, v7.d[1] // GHASH block 4k+3 - mid
548 aese v1.16b, v22.16b
549 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
550 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
551 eor v4.8b, v4.8b, v7.8b // GHASH block 4k+3 - mid
552 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
553 aese v1.16b, v23.16b
554 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
555 pmull v4.1q, v4.1d, v16.1d // GHASH block 4k+3 - mid
556 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
557 aese v0.16b, v23.16b
558 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
559 aese v1.16b, v24.16b
560 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
561 aese v2.16b, v24.16b
562 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
563 aese v0.16b, v24.16b
564 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
565 movi v8.8b, #0xc2
566 aese v3.16b, v24.16b
567 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
568 aese v1.16b, v25.16b
569 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
570 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
571 aese v0.16b, v25.16b
572 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
573 aese v3.16b, v25.16b
574 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
576 aese v1.16b, v26.16b
577 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
578 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+3 - mid
579 pmull v6.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
580 aese v3.16b, v26.16b
581 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
582 cmp x17, #12 // setup flags for AES-128/192/256 check
583 aese v0.16b, v26.16b
584 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
585 eor v11.16b, v11.16b, v6.16b // GHASH block 4k+3 - low
586 aese v2.16b, v25.16b
587 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
588 eor v10.16b, v10.16b, v9.16b // karatsuba tidy up
589 aese v2.16b, v26.16b
590 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
592 ext v9.16b, v9.16b, v9.16b, #8
593 eor v10.16b, v10.16b, v11.16b
594 b.lt .Lenc_finish_prepretail // branch if AES-128
596 aese v1.16b, v27.16b
597 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
598 aese v3.16b, v27.16b
599 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
600 aese v0.16b, v27.16b
601 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
602 aese v2.16b, v27.16b
603 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
604 aese v3.16b, v28.16b
605 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
606 aese v1.16b, v28.16b
607 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
608 aese v0.16b, v28.16b
609 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
610 aese v2.16b, v28.16b
611 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
612 b.eq .Lenc_finish_prepretail // branch if AES-192
614 aese v1.16b, v29.16b
615 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
616 aese v0.16b, v29.16b
617 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
618 aese v3.16b, v29.16b
619 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
620 aese v2.16b, v29.16b
621 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
622 aese v1.16b, v30.16b
623 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
624 aese v0.16b, v30.16b
625 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
626 aese v3.16b, v30.16b
627 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
628 aese v2.16b, v30.16b
629 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
632 eor v10.16b, v10.16b, v4.16b
633 eor v10.16b, v10.16b, v9.16b
635 ext v10.16b, v10.16b, v10.16b, #8
636 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
637 eor v11.16b, v11.16b, v4.16b
638 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
639 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
640 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
641 eor v11.16b, v11.16b, v10.16b
644 ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag
646 ldp x6, x7, [x0], #16 // AES block 4k+4 - load plaintext
647 eor x6, x6, x13 // AES block 4k+4 - round N low
648 eor x7, x7, x14 // AES block 4k+4 - round N high
650 fmov d4, x6 // AES block 4k+4 - mov low
651 fmov v4.d[1], x7 // AES block 4k+4 - mov high
652 eor v5.16b, v4.16b, v0.16b // AES block 4k+4 - result
653 b.gt .Lenc_blocks_more_than_3
655 mov v3.16b, v2.16b
656 movi v11.8b, #0
657 movi v9.8b, #0
659 mov v2.16b, v1.16b
660 movi v10.8b, #0
661 b.gt .Lenc_blocks_more_than_2
662 mov v3.16b, v1.16b
665 b.gt .Lenc_blocks_more_than_1
667 b .Lenc_blocks_less_than_1
669 st1 { v5.16b}, [x2], #16 // AES final-3 block - store result
670 ldp x6, x7, [x0], #16 // AES final-2 block - load input low & high
671 rev64 v4.16b, v5.16b // GHASH final-3 block
672 eor x6, x6, x13 // AES final-2 block - round N low
673 eor v4.16b, v4.16b, v8.16b // feed in partial tag
674 eor x7, x7, x14 // AES final-2 block - round N high
675 mov d22, v4.d[1] // GHASH final-3 block - mid
676 fmov d5, x6 // AES final-2 block - mov low
677 fmov v5.d[1], x7 // AES final-2 block - mov high
678 eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid
679 movi v8.8b, #0 // suppress further partial tag feed in
680 mov d10, v17.d[1] // GHASH final-3 block - mid
681 pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
682 pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
683 pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
684 eor v5.16b, v5.16b, v1.16b // AES final-2 block - result
686 st1 { v5.16b}, [x2], #16 // AES final-2 block - store result
687 ldp x6, x7, [x0], #16 // AES final-1 block - load input low & high
688 rev64 v4.16b, v5.16b // GHASH final-2 block
689 eor x6, x6, x13 // AES final-1 block - round N low
690 eor v4.16b, v4.16b, v8.16b // feed in partial tag
691 fmov d5, x6 // AES final-1 block - mov low
692 eor x7, x7, x14 // AES final-1 block - round N high
693 fmov v5.d[1], x7 // AES final-1 block - mov high
694 movi v8.8b, #0 // suppress further partial tag feed in
695 pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high
696 mov d22, v4.d[1] // GHASH final-2 block - mid
697 pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low
698 eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid
699 eor v5.16b, v5.16b, v2.16b // AES final-1 block - result
700 eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high
701 pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
702 eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
703 eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
705 st1 { v5.16b}, [x2], #16 // AES final-1 block - store result
706 rev64 v4.16b, v5.16b // GHASH final-1 block
707 ldp x6, x7, [x0], #16 // AES final block - load input low & high
708 eor v4.16b, v4.16b, v8.16b // feed in partial tag
709 movi v8.8b, #0 // suppress further partial tag feed in
710 eor x6, x6, x13 // AES final block - round N low
711 mov d22, v4.d[1] // GHASH final-1 block - mid
712 pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high
713 eor x7, x7, x14 // AES final block - round N high
714 eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid
715 eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
716 ins v22.d[1], v22.d[0] // GHASH final-1 block - mid
717 fmov d5, x6 // AES final block - mov low
718 fmov v5.d[1], x7 // AES final block - mov high
719 pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid
720 pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low
721 eor v5.16b, v5.16b, v3.16b // AES final block - result
722 eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
723 eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
727 sub x1, x1, #128 // bit_length -= 128
728 neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
729 …ld1 { v18.16b}, [x2] // load existing bytes where the possibly partial l…
732 lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
738 …and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in …
739 rev64 v4.16b, v5.16b // GHASH final block
740 eor v4.16b, v4.16b, v8.16b // feed in partial tag
741 …bif v5.16b, v18.16b, v0.16b // insert existing bytes in top end of res…
742 pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
743 mov d8, v4.d[1] // GHASH final block - mid
745 pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
746 eor v9.16b, v9.16b, v20.16b // GHASH final block - high
747 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid
748 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid
749 eor v11.16b, v11.16b, v21.16b // GHASH final block - low
750 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid
751 movi v8.8b, #0xc2
752 eor v4.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
754 eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up
755 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
756 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
757 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
758 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
759 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
760 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
762 st1 { v5.16b}, [x2] // store all 16B
763 eor v11.16b, v11.16b, v9.16b // MODULO - fold into low
764 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
765 ext v11.16b, v11.16b, v11.16b, #8
766 rev64 v11.16b, v11.16b
768 st1 { v11.16b }, [x3]
779 .size aes_gcm_enc_kernel,.-aes_gcm_enc_kernel
786 stp x29, x30, [sp, #-128]!
800 ldr q31, [x19, #-16] // load round N-1 keys
805 sub x5, x5, #1 // byte_len - 1
822 …ld1 { v0.16b}, [x16] // special case vector load initial counter so we…
836 aese v0.16b, v18.16b
837 aesmc v0.16b, v0.16b // AES block 0 - round 0
839 ext v14.16b, v14.16b, v14.16b, #8
840 aese v3.16b, v18.16b
841 aesmc v3.16b, v3.16b // AES block 3 - round 0
843 ext v15.16b, v15.16b, v15.16b, #8
844 aese v1.16b, v18.16b
845 aesmc v1.16b, v1.16b // AES block 1 - round 0
847 ext v13.16b, v13.16b, v13.16b, #8
848 aese v2.16b, v18.16b
849 aesmc v2.16b, v2.16b // AES block 2 - round 0
851 aese v0.16b, v19.16b
852 aesmc v0.16b, v0.16b // AES block 0 - round 1
853 aese v1.16b, v19.16b
854 aesmc v1.16b, v1.16b // AES block 1 - round 1
855 ld1 { v11.16b}, [x3]
856 ext v11.16b, v11.16b, v11.16b, #8
857 rev64 v11.16b, v11.16b
858 aese v2.16b, v19.16b
859 aesmc v2.16b, v2.16b // AES block 2 - round 1
861 aese v3.16b, v19.16b
862 aesmc v3.16b, v3.16b // AES block 3 - round 1
864 aese v0.16b, v20.16b
865 aesmc v0.16b, v0.16b // AES block 0 - round 2
867 ext v12.16b, v12.16b, v12.16b, #8
868 aese v2.16b, v20.16b
869 aesmc v2.16b, v2.16b // AES block 2 - round 2
871 aese v3.16b, v20.16b
872 aesmc v3.16b, v3.16b // AES block 3 - round 2
873 aese v0.16b, v21.16b
874 aesmc v0.16b, v0.16b // AES block 0 - round 3
875 aese v1.16b, v20.16b
876 aesmc v1.16b, v1.16b // AES block 1 - round 2
877 aese v3.16b, v21.16b
878 aesmc v3.16b, v3.16b // AES block 3 - round 3
879 aese v0.16b, v22.16b
880 aesmc v0.16b, v0.16b // AES block 0 - round 4
881 aese v2.16b, v21.16b
882 aesmc v2.16b, v2.16b // AES block 2 - round 3
883 aese v1.16b, v21.16b
884 aesmc v1.16b, v1.16b // AES block 1 - round 3
885 aese v3.16b, v22.16b
886 aesmc v3.16b, v3.16b // AES block 3 - round 4
887 aese v2.16b, v22.16b
888 aesmc v2.16b, v2.16b // AES block 2 - round 4
889 aese v1.16b, v22.16b
890 aesmc v1.16b, v1.16b // AES block 1 - round 4
891 aese v3.16b, v23.16b
892 aesmc v3.16b, v3.16b // AES block 3 - round 5
893 aese v0.16b, v23.16b
894 aesmc v0.16b, v0.16b // AES block 0 - round 5
895 aese v1.16b, v23.16b
896 aesmc v1.16b, v1.16b // AES block 1 - round 5
897 aese v2.16b, v23.16b
898 aesmc v2.16b, v2.16b // AES block 2 - round 5
899 aese v0.16b, v24.16b
900 aesmc v0.16b, v0.16b // AES block 0 - round 6
901 aese v3.16b, v24.16b
902 aesmc v3.16b, v3.16b // AES block 3 - round 6
903 cmp x17, #12 // setup flags for AES-128/192/256 check
904 aese v1.16b, v24.16b
905 aesmc v1.16b, v1.16b // AES block 1 - round 6
906 aese v2.16b, v24.16b
907 aesmc v2.16b, v2.16b // AES block 2 - round 6
908 aese v0.16b, v25.16b
909 aesmc v0.16b, v0.16b // AES block 0 - round 7
910 aese v1.16b, v25.16b
911 aesmc v1.16b, v1.16b // AES block 1 - round 7
912 aese v3.16b, v25.16b
913 aesmc v3.16b, v3.16b // AES block 3 - round 7
914 aese v0.16b, v26.16b
915 aesmc v0.16b, v0.16b // AES block 0 - round 8
916 aese v2.16b, v25.16b
917 aesmc v2.16b, v2.16b // AES block 2 - round 7
918 aese v3.16b, v26.16b
919 aesmc v3.16b, v3.16b // AES block 3 - round 8
920 aese v1.16b, v26.16b
921 aesmc v1.16b, v1.16b // AES block 1 - round 8
923 aese v2.16b, v26.16b
924 aesmc v2.16b, v2.16b // AES block 2 - round 8
925 b.lt .Ldec_finish_first_blocks // branch if AES-128
927 aese v0.16b, v27.16b
928 aesmc v0.16b, v0.16b // AES block 0 - round 9
929 aese v1.16b, v27.16b
930 aesmc v1.16b, v1.16b // AES block 1 - round 9
931 aese v3.16b, v27.16b
932 aesmc v3.16b, v3.16b // AES block 3 - round 9
933 aese v2.16b, v27.16b
934 aesmc v2.16b, v2.16b // AES block 2 - round 9
935 aese v0.16b, v28.16b
936 aesmc v0.16b, v0.16b // AES block 0 - round 10
937 aese v1.16b, v28.16b
938 aesmc v1.16b, v1.16b // AES block 1 - round 10
939 aese v3.16b, v28.16b
940 aesmc v3.16b, v3.16b // AES block 3 - round 10
941 aese v2.16b, v28.16b
942 aesmc v2.16b, v2.16b // AES block 2 - round 10
943 b.eq .Ldec_finish_first_blocks // branch if AES-192
945 aese v0.16b, v29.16b
946 aesmc v0.16b, v0.16b // AES block 0 - round 11
947 aese v3.16b, v29.16b
948 aesmc v3.16b, v3.16b // AES block 3 - round 11
949 aese v1.16b, v29.16b
950 aesmc v1.16b, v1.16b // AES block 1 - round 11
951 aese v2.16b, v29.16b
952 aesmc v2.16b, v2.16b // AES block 2 - round 11
953 aese v1.16b, v30.16b
954 aesmc v1.16b, v1.16b // AES block 1 - round 12
955 aese v0.16b, v30.16b
956 aesmc v0.16b, v0.16b // AES block 0 - round 12
957 aese v2.16b, v30.16b
958 aesmc v2.16b, v2.16b // AES block 2 - round 12
959 aese v3.16b, v30.16b
960 aesmc v3.16b, v3.16b // AES block 3 - round 12
968 eor v17.16b, v17.16b, v9.16b // h4k | h3k
969 aese v1.16b, v31.16b // AES block 1 - round N-1
970 aese v2.16b, v31.16b // AES block 2 - round N-1
971 eor v16.16b, v16.16b, v8.16b // h2k | h1k
972 aese v3.16b, v31.16b // AES block 3 - round N-1
973 aese v0.16b, v31.16b // AES block 0 - round N-1
974 b.ge .Ldec_tail // handle tail
976 ldr q4, [x0, #0] // AES block 0 - load ciphertext
977 ldr q5, [x0, #16] // AES block 1 - load ciphertext
979 eor v0.16b, v4.16b, v0.16b // AES block 0 - result
980 eor v1.16b, v5.16b, v1.16b // AES block 1 - result
981 rev64 v5.16b, v5.16b // GHASH block 1
982 ldr q7, [x0, #48] // AES block 3 - load ciphertext
983 mov x7, v0.d[1] // AES block 0 - mov high
984 mov x6, v0.d[0] // AES block 0 - mov low
985 rev64 v4.16b, v4.16b // GHASH block 0
992 mov x19, v1.d[0] // AES block 1 - mov low
994 mov x20, v1.d[1] // AES block 1 - mov high
995 eor x7, x7, x14 // AES block 0 - round N high
996 eor x6, x6, x13 // AES block 0 - round N low
997 stp x6, x7, [x2], #16 // AES block 0 - store result
999 ldr q6, [x0, #32] // AES block 2 - load ciphertext
1004 eor x19, x19, x13 // AES block 1 - round N low
1006 eor x20, x20, x14 // AES block 1 - round N high
1007 stp x19, x20, [x2], #16 // AES block 1 - store result
1008 eor v2.16b, v6.16b, v2.16b // AES block 2 - result
1010 b.ge .Ldec_prepretail // do prepretail
1013 mov x21, v2.d[0] // AES block 4k+2 - mov low
1014 ext v11.16b, v11.16b, v11.16b, #8 // PRE 0
1015 eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result
1016 aese v0.16b, v18.16b
1017 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
1018 mov x22, v2.d[1] // AES block 4k+2 - mov high
1019 aese v1.16b, v18.16b
1020 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
1023 eor v4.16b, v4.16b, v11.16b // PRE 1
1025 aese v0.16b, v19.16b
1026 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
1027 mov x24, v3.d[1] // AES block 4k+3 - mov high
1028 aese v1.16b, v19.16b
1029 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
1030 mov x23, v3.d[0] // AES block 4k+3 - mov low
1031 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
1032 mov d8, v4.d[1] // GHASH block 4k - mid
1034 aese v0.16b, v20.16b
1035 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
1037 aese v2.16b, v18.16b
1038 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
1040 aese v1.16b, v20.16b
1041 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
1042 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
1043 aese v0.16b, v21.16b
1044 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
1045 eor x22, x22, x14 // AES block 4k+2 - round N high
1046 aese v2.16b, v19.16b
1047 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
1048 mov d10, v17.d[1] // GHASH block 4k - mid
1049 aese v1.16b, v21.16b
1050 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
1051 rev64 v6.16b, v6.16b // GHASH block 4k+2
1052 aese v3.16b, v18.16b
1053 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
1054 eor x21, x21, x13 // AES block 4k+2 - round N low
1055 aese v2.16b, v20.16b
1056 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
1057 stp x21, x22, [x2], #16 // AES block 4k+2 - store result
1058 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
1059 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
1060 aese v2.16b, v21.16b
1061 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
1062 rev64 v7.16b, v7.16b // GHASH block 4k+3
1063 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
1064 eor x23, x23, x13 // AES block 4k+3 - round N low
1065 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
1066 eor x24, x24, x14 // AES block 4k+3 - round N high
1067 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
1068 aese v2.16b, v22.16b
1069 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
1070 aese v3.16b, v19.16b
1071 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
1072 mov d4, v5.d[1] // GHASH block 4k+1 - mid
1073 aese v0.16b, v22.16b
1074 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
1075 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
1076 aese v2.16b, v23.16b
1077 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
1079 aese v3.16b, v20.16b
1080 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
1081 mov d8, v6.d[1] // GHASH block 4k+2 - mid
1082 aese v1.16b, v22.16b
1083 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
1084 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
1085 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
1086 aese v3.16b, v21.16b
1087 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
1088 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
1089 aese v1.16b, v23.16b
1090 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
1091 aese v0.16b, v23.16b
1092 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
1093 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
1094 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
1096 aese v1.16b, v24.16b
1097 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
1098 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
1099 aese v0.16b, v24.16b
1100 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
1102 aese v3.16b, v22.16b
1103 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
1104 aese v1.16b, v25.16b
1105 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
1106 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
1107 aese v0.16b, v25.16b
1108 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
1109 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
1110 mov d6, v7.d[1] // GHASH block 4k+3 - mid
1111 aese v3.16b, v23.16b
1112 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
1113 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
1114 aese v0.16b, v26.16b
1115 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
1116 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
1117 aese v3.16b, v24.16b
1118 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
1119 pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
1121 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
1122 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
1123 cmp x17, #12 // setup flags for AES-128/192/256 check
1124 eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid
1125 aese v1.16b, v26.16b
1126 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
1127 aese v2.16b, v24.16b
1128 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
1129 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
1130 pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid
1131 movi v8.8b, #0xc2
1132 aese v2.16b, v25.16b
1133 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
1134 eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low
1135 aese v3.16b, v25.16b
1136 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
1138 aese v2.16b, v26.16b
1139 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
1140 eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid
1141 aese v3.16b, v26.16b
1142 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
1143 b.lt .Ldec_main_loop_continue // branch if AES-128
1145 aese v0.16b, v27.16b
1146 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
1147 aese v2.16b, v27.16b
1148 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
1149 aese v1.16b, v27.16b
1150 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
1151 aese v3.16b, v27.16b
1152 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
1153 aese v0.16b, v28.16b
1154 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
1155 aese v1.16b, v28.16b
1156 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
1157 aese v2.16b, v28.16b
1158 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
1159 aese v3.16b, v28.16b
1160 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
1161 b.eq .Ldec_main_loop_continue // branch if AES-192
1163 aese v0.16b, v29.16b
1164 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
1165 aese v1.16b, v29.16b
1166 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
1167 aese v2.16b, v29.16b
1168 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
1169 aese v3.16b, v29.16b
1170 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
1171 aese v0.16b, v30.16b
1172 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
1173 aese v1.16b, v30.16b
1174 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
1175 aese v2.16b, v30.16b
1176 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
1177 aese v3.16b, v30.16b
1178 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
1181 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1182 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1183 ldr q4, [x0, #0] // AES block 4k+4 - load ciphertext
1184 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
1185 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1186 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1187 ldr q5, [x0, #16] // AES block 4k+5 - load ciphertext
1188 eor v0.16b, v4.16b, v0.16b // AES block 4k+4 - result
1189 stp x23, x24, [x2], #16 // AES block 4k+3 - store result
1190 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1191 ldr q7, [x0, #48] // AES block 4k+7 - load ciphertext
1192 ldr q6, [x0, #32] // AES block 4k+6 - load ciphertext
1193 mov x7, v0.d[1] // AES block 4k+4 - mov high
1194 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1195 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
1197 mov x6, v0.d[0] // AES block 4k+4 - mov low
1200 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1201 eor v1.16b, v5.16b, v1.16b // AES block 4k+5 - result
1203 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
1207 eor x6, x6, x13 // AES block 4k+4 - round N low
1208 eor x7, x7, x14 // AES block 4k+4 - round N high
1209 mov x20, v1.d[1] // AES block 4k+5 - mov high
1210 eor v2.16b, v6.16b, v2.16b // AES block 4k+6 - result
1211 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1212 mov x19, v1.d[0] // AES block 4k+5 - mov low
1214 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1218 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
1220 rev64 v5.16b, v5.16b // GHASH block 4k+5
1221 eor x20, x20, x14 // AES block 4k+5 - round N high
1222 stp x6, x7, [x2], #16 // AES block 4k+4 - store result
1223 eor x19, x19, x13 // AES block 4k+5 - round N low
1224 stp x19, x20, [x2], #16 // AES block 4k+5 - store result
1225 rev64 v4.16b, v4.16b // GHASH block 4k+4
1226 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1227 b.lt .Ldec_main_loop
1230 ext v11.16b, v11.16b, v11.16b, #8 // PRE 0
1231 mov x21, v2.d[0] // AES block 4k+2 - mov low
1232 eor v3.16b, v7.16b, v3.16b // AES block 4k+3 - result
1233 aese v0.16b, v18.16b
1234 aesmc v0.16b, v0.16b // AES block 4k+4 - round 0
1235 mov x22, v2.d[1] // AES block 4k+2 - mov high
1236 aese v1.16b, v18.16b
1237 aesmc v1.16b, v1.16b // AES block 4k+5 - round 0
1241 eor v4.16b, v4.16b, v11.16b // PRE 1
1242 rev64 v6.16b, v6.16b // GHASH block 4k+2
1244 mov x23, v3.d[0] // AES block 4k+3 - mov low
1245 aese v1.16b, v19.16b
1246 aesmc v1.16b, v1.16b // AES block 4k+5 - round 1
1247 mov x24, v3.d[1] // AES block 4k+3 - mov high
1248 pmull v11.1q, v4.1d, v15.1d // GHASH block 4k - low
1249 mov d8, v4.d[1] // GHASH block 4k - mid
1251 pmull2 v9.1q, v4.2d, v15.2d // GHASH block 4k - high
1253 aese v2.16b, v18.16b
1254 aesmc v2.16b, v2.16b // AES block 4k+6 - round 0
1255 mov d10, v17.d[1] // GHASH block 4k - mid
1256 aese v0.16b, v19.16b
1257 aesmc v0.16b, v0.16b // AES block 4k+4 - round 1
1258 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid
1259 pmull2 v4.1q, v5.2d, v14.2d // GHASH block 4k+1 - high
1260 aese v2.16b, v19.16b
1261 aesmc v2.16b, v2.16b // AES block 4k+6 - round 1
1262 rev64 v7.16b, v7.16b // GHASH block 4k+3
1263 aese v3.16b, v18.16b
1264 aesmc v3.16b, v3.16b // AES block 4k+7 - round 0
1265 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid
1266 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high
1267 pmull v8.1q, v5.1d, v14.1d // GHASH block 4k+1 - low
1268 aese v3.16b, v19.16b
1269 aesmc v3.16b, v3.16b // AES block 4k+7 - round 1
1270 mov d4, v5.d[1] // GHASH block 4k+1 - mid
1271 aese v0.16b, v20.16b
1272 aesmc v0.16b, v0.16b // AES block 4k+4 - round 2
1273 aese v1.16b, v20.16b
1274 aesmc v1.16b, v1.16b // AES block 4k+5 - round 2
1275 eor v11.16b, v11.16b, v8.16b // GHASH block 4k+1 - low
1276 aese v2.16b, v20.16b
1277 aesmc v2.16b, v2.16b // AES block 4k+6 - round 2
1278 aese v0.16b, v21.16b
1279 aesmc v0.16b, v0.16b // AES block 4k+4 - round 3
1280 mov d8, v6.d[1] // GHASH block 4k+2 - mid
1281 aese v3.16b, v20.16b
1282 aesmc v3.16b, v3.16b // AES block 4k+7 - round 2
1283 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid
1284 pmull v5.1q, v6.1d, v13.1d // GHASH block 4k+2 - low
1285 aese v0.16b, v22.16b
1286 aesmc v0.16b, v0.16b // AES block 4k+4 - round 4
1287 aese v3.16b, v21.16b
1288 aesmc v3.16b, v3.16b // AES block 4k+7 - round 3
1289 eor v8.8b, v8.8b, v6.8b // GHASH block 4k+2 - mid
1290 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid
1291 aese v0.16b, v23.16b
1292 aesmc v0.16b, v0.16b // AES block 4k+4 - round 5
1293 eor v11.16b, v11.16b, v5.16b // GHASH block 4k+2 - low
1294 aese v3.16b, v22.16b
1295 aesmc v3.16b, v3.16b // AES block 4k+7 - round 4
1296 pmull2 v5.1q, v7.2d, v12.2d // GHASH block 4k+3 - high
1297 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid
1298 pmull2 v4.1q, v6.2d, v13.2d // GHASH block 4k+2 - high
1299 aese v3.16b, v23.16b
1300 aesmc v3.16b, v3.16b // AES block 4k+7 - round 5
1301 ins v8.d[1], v8.d[0] // GHASH block 4k+2 - mid
1302 aese v2.16b, v21.16b
1303 aesmc v2.16b, v2.16b // AES block 4k+6 - round 3
1304 aese v1.16b, v21.16b
1305 aesmc v1.16b, v1.16b // AES block 4k+5 - round 3
1306 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+2 - high
1307 pmull v4.1q, v7.1d, v12.1d // GHASH block 4k+3 - low
1308 aese v2.16b, v22.16b
1309 aesmc v2.16b, v2.16b // AES block 4k+6 - round 4
1310 mov d6, v7.d[1] // GHASH block 4k+3 - mid
1311 aese v1.16b, v22.16b
1312 aesmc v1.16b, v1.16b // AES block 4k+5 - round 4
1313 pmull2 v8.1q, v8.2d, v16.2d // GHASH block 4k+2 - mid
1314 aese v2.16b, v23.16b
1315 aesmc v2.16b, v2.16b // AES block 4k+6 - round 5
1316 eor v6.8b, v6.8b, v7.8b // GHASH block 4k+3 - mid
1317 aese v1.16b, v23.16b
1318 aesmc v1.16b, v1.16b // AES block 4k+5 - round 5
1319 aese v3.16b, v24.16b
1320 aesmc v3.16b, v3.16b // AES block 4k+7 - round 6
1321 eor v10.16b, v10.16b, v8.16b // GHASH block 4k+2 - mid
1322 aese v2.16b, v24.16b
1323 aesmc v2.16b, v2.16b // AES block 4k+6 - round 6
1324 aese v0.16b, v24.16b
1325 aesmc v0.16b, v0.16b // AES block 4k+4 - round 6
1326 movi v8.8b, #0xc2
1327 aese v1.16b, v24.16b
1328 aesmc v1.16b, v1.16b // AES block 4k+5 - round 6
1329 eor v11.16b, v11.16b, v4.16b // GHASH block 4k+3 - low
1330 pmull v6.1q, v6.1d, v16.1d // GHASH block 4k+3 - mid
1331 aese v3.16b, v25.16b
1332 aesmc v3.16b, v3.16b // AES block 4k+7 - round 7
1333 cmp x17, #12 // setup flags for AES-128/192/256 check
1334 eor v9.16b, v9.16b, v5.16b // GHASH block 4k+3 - high
1335 aese v1.16b, v25.16b
1336 aesmc v1.16b, v1.16b // AES block 4k+5 - round 7
1337 aese v0.16b, v25.16b
1338 aesmc v0.16b, v0.16b // AES block 4k+4 - round 7
1339 eor v10.16b, v10.16b, v6.16b // GHASH block 4k+3 - mid
1340 aese v3.16b, v26.16b
1341 aesmc v3.16b, v3.16b // AES block 4k+7 - round 8
1342 aese v2.16b, v25.16b
1343 aesmc v2.16b, v2.16b // AES block 4k+6 - round 7
1344 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1345 aese v1.16b, v26.16b
1346 aesmc v1.16b, v1.16b // AES block 4k+5 - round 8
1347 aese v0.16b, v26.16b
1348 aesmc v0.16b, v0.16b // AES block 4k+4 - round 8
1350 aese v2.16b, v26.16b
1351 aesmc v2.16b, v2.16b // AES block 4k+6 - round 8
1352 b.lt .Ldec_finish_prepretail // branch if AES-128
1354 aese v1.16b, v27.16b
1355 aesmc v1.16b, v1.16b // AES block 4k+5 - round 9
1356 aese v2.16b, v27.16b
1357 aesmc v2.16b, v2.16b // AES block 4k+6 - round 9
1358 aese v3.16b, v27.16b
1359 aesmc v3.16b, v3.16b // AES block 4k+7 - round 9
1360 aese v0.16b, v27.16b
1361 aesmc v0.16b, v0.16b // AES block 4k+4 - round 9
1362 aese v2.16b, v28.16b
1363 aesmc v2.16b, v2.16b // AES block 4k+6 - round 10
1364 aese v3.16b, v28.16b
1365 aesmc v3.16b, v3.16b // AES block 4k+7 - round 10
1366 aese v0.16b, v28.16b
1367 aesmc v0.16b, v0.16b // AES block 4k+4 - round 10
1368 aese v1.16b, v28.16b
1369 aesmc v1.16b, v1.16b // AES block 4k+5 - round 10
1370 b.eq .Ldec_finish_prepretail // branch if AES-192
1372 aese v2.16b, v29.16b
1373 aesmc v2.16b, v2.16b // AES block 4k+6 - round 11
1374 aese v0.16b, v29.16b
1375 aesmc v0.16b, v0.16b // AES block 4k+4 - round 11
1376 aese v1.16b, v29.16b
1377 aesmc v1.16b, v1.16b // AES block 4k+5 - round 11
1378 aese v2.16b, v30.16b
1379 aesmc v2.16b, v2.16b // AES block 4k+6 - round 12
1380 aese v3.16b, v29.16b
1381 aesmc v3.16b, v3.16b // AES block 4k+7 - round 11
1382 aese v1.16b, v30.16b
1383 aesmc v1.16b, v1.16b // AES block 4k+5 - round 12
1384 aese v0.16b, v30.16b
1385 aesmc v0.16b, v0.16b // AES block 4k+4 - round 12
1386 aese v3.16b, v30.16b
1387 aesmc v3.16b, v3.16b // AES block 4k+7 - round 12
1390 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1391 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1392 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1393 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1394 eor x22, x22, x14 // AES block 4k+2 - round N high
1395 eor x23, x23, x13 // AES block 4k+3 - round N low
1396 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1398 eor x21, x21, x13 // AES block 4k+2 - round N low
1399 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1400 eor x24, x24, x14 // AES block 4k+3 - round N high
1401 stp x21, x22, [x2], #16 // AES block 4k+2 - store result
1402 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1403 stp x23, x24, [x2], #16 // AES block 4k+3 - store result
1405 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1406 aese v1.16b, v31.16b // AES block 4k+5 - round N-1
1407 aese v0.16b, v31.16b // AES block 4k+4 - round N-1
1408 aese v3.16b, v31.16b // AES block 4k+7 - round N-1
1409 aese v2.16b, v31.16b // AES block 4k+6 - round N-1
1410 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1414 ld1 { v5.16b}, [x0], #16 // AES block 4k+4 - load ciphertext
1415 eor v0.16b, v5.16b, v0.16b // AES block 4k+4 - result
1416 mov x6, v0.d[0] // AES block 4k+4 - mov low
1417 mov x7, v0.d[1] // AES block 4k+4 - mov high
1418 ext v8.16b, v11.16b, v11.16b, #8 // prepare final partial tag
1420 eor x6, x6, x13 // AES block 4k+4 - round N low
1421 eor x7, x7, x14 // AES block 4k+4 - round N high
1422 b.gt .Ldec_blocks_more_than_3
1424 mov v3.16b, v2.16b
1425 movi v10.8b, #0
1426 movi v11.8b, #0
1428 movi v9.8b, #0
1429 mov v2.16b, v1.16b
1430 b.gt .Ldec_blocks_more_than_2
1432 mov v3.16b, v1.16b
1434 b.gt .Ldec_blocks_more_than_1
1436 b .Ldec_blocks_less_than_1
1438 rev64 v4.16b, v5.16b // GHASH final-3 block
1439 ld1 { v5.16b}, [x0], #16 // AES final-2 block - load ciphertext
1440 stp x6, x7, [x2], #16 // AES final-3 block - store result
1441 mov d10, v17.d[1] // GHASH final-3 block - mid
1442 eor v4.16b, v4.16b, v8.16b // feed in partial tag
1443 eor v0.16b, v5.16b, v1.16b // AES final-2 block - result
1444 mov d22, v4.d[1] // GHASH final-3 block - mid
1445 mov x6, v0.d[0] // AES final-2 block - mov low
1446 mov x7, v0.d[1] // AES final-2 block - mov high
1447 eor v22.8b, v22.8b, v4.8b // GHASH final-3 block - mid
1448 movi v8.8b, #0 // suppress further partial tag feed in
1449 pmull2 v9.1q, v4.2d, v15.2d // GHASH final-3 block - high
1450 pmull v10.1q, v22.1d, v10.1d // GHASH final-3 block - mid
1451 eor x6, x6, x13 // AES final-2 block - round N low
1452 pmull v11.1q, v4.1d, v15.1d // GHASH final-3 block - low
1453 eor x7, x7, x14 // AES final-2 block - round N high
1455 rev64 v4.16b, v5.16b // GHASH final-2 block
1456 ld1 { v5.16b}, [x0], #16 // AES final-1 block - load ciphertext
1457 eor v4.16b, v4.16b, v8.16b // feed in partial tag
1458 stp x6, x7, [x2], #16 // AES final-2 block - store result
1459 eor v0.16b, v5.16b, v2.16b // AES final-1 block - result
1460 mov d22, v4.d[1] // GHASH final-2 block - mid
1461 pmull v21.1q, v4.1d, v14.1d // GHASH final-2 block - low
1462 pmull2 v20.1q, v4.2d, v14.2d // GHASH final-2 block - high
1463 eor v22.8b, v22.8b, v4.8b // GHASH final-2 block - mid
1464 mov x6, v0.d[0] // AES final-1 block - mov low
1465 mov x7, v0.d[1] // AES final-1 block - mov high
1466 eor v11.16b, v11.16b, v21.16b // GHASH final-2 block - low
1467 movi v8.8b, #0 // suppress further partial tag feed in
1468 pmull v22.1q, v22.1d, v17.1d // GHASH final-2 block - mid
1469 eor v9.16b, v9.16b, v20.16b // GHASH final-2 block - high
1470 eor x6, x6, x13 // AES final-1 block - round N low
1471 eor v10.16b, v10.16b, v22.16b // GHASH final-2 block - mid
1472 eor x7, x7, x14 // AES final-1 block - round N high
1474 stp x6, x7, [x2], #16 // AES final-1 block - store result
1475 rev64 v4.16b, v5.16b // GHASH final-1 block
1476 ld1 { v5.16b}, [x0], #16 // AES final block - load ciphertext
1477 eor v4.16b, v4.16b, v8.16b // feed in partial tag
1478 movi v8.8b, #0 // suppress further partial tag feed in
1479 mov d22, v4.d[1] // GHASH final-1 block - mid
1480 eor v0.16b, v5.16b, v3.16b // AES final block - result
1481 pmull2 v20.1q, v4.2d, v13.2d // GHASH final-1 block - high
1482 eor v22.8b, v22.8b, v4.8b // GHASH final-1 block - mid
1483 pmull v21.1q, v4.1d, v13.1d // GHASH final-1 block - low
1484 mov x6, v0.d[0] // AES final block - mov low
1485 ins v22.d[1], v22.d[0] // GHASH final-1 block - mid
1486 mov x7, v0.d[1] // AES final block - mov high
1487 pmull2 v22.1q, v22.2d, v16.2d // GHASH final-1 block - mid
1488 eor x6, x6, x13 // AES final block - round N low
1489 eor v11.16b, v11.16b, v21.16b // GHASH final-1 block - low
1490 eor v9.16b, v9.16b, v20.16b // GHASH final-1 block - high
1491 eor v10.16b, v10.16b, v22.16b // GHASH final-1 block - mid
1492 eor x7, x7, x14 // AES final block - round N high
1496 sub x1, x1, #128 // bit_length -= 128
1499 neg x1, x1 // bit_length = 128 - #bits in input (in range [1,128])
1501 lsr x14, x14, x1 // rkN_h is mask for top 64b of last block
1514 …and v5.16b, v5.16b, v0.16b // possibly partial last block has zeroes in…
1515 rev64 v4.16b, v5.16b // GHASH final block
1516 eor v4.16b, v4.16b, v8.16b // feed in partial tag
1517 pmull v21.1q, v4.1d, v12.1d // GHASH final block - low
1518 mov d8, v4.d[1] // GHASH final block - mid
1519 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid
1520 pmull2 v20.1q, v4.2d, v12.2d // GHASH final block - high
1521 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid
1522 eor v9.16b, v9.16b, v20.16b // GHASH final block - high
1523 eor v11.16b, v11.16b, v21.16b // GHASH final block - low
1524 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid
1525 movi v8.8b, #0xc2
1526 eor v6.16b, v11.16b, v9.16b // MODULO - karatsuba tidy up
1528 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up
1529 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid
1530 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment
1531 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid
1532 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid
1533 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low
1534 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment
1535 eor v11.16b, v11.16b, v8.16b // MODULO - fold into low
1538 eor v11.16b, v11.16b, v10.16b // MODULO - fold into low
1539 ext v11.16b, v11.16b, v11.16b, #8
1540 rev64 v11.16b, v11.16b
1542 st1 { v11.16b }, [x3]
1553 .size aes_gcm_dec_kernel,.-aes_gcm_dec_kernel