• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_SHA3
18
19#include "crypt_arm.h"
20.arch    armv8-a+crypto
21
22/*
23 * Status matrix using register aliases
24 * A00~A04: x0~x4
25 * A10~A14: x5~x9
26 * A20~A24: x10~x14
27 * A30~A34: x15~x19
28 * A40~A44: x20~x24
29 * T0~T4: x25~x29 temporary calculation register
30 */
31A00 .req x0
32A01 .req x1
33A02 .req x2
34A03 .req x3
35A04 .req x4
36A10 .req x5
37A11 .req x6
38A12 .req x7
39A13 .req x8
40A14 .req x9
41A20 .req x10
42A21 .req x11
43A22 .req x12
44A23 .req x13
45A24 .req x14
46A30 .req x15
47A31 .req x16
48A32 .req x17
49A33 .req x18
50A34 .req x19
51A40 .req x20
52A41 .req x21
53A42 .req x22
54A43 .req x23
55A44 .req x24
56
57T0  .req x25
58T1  .req x26
59T2  .req x27
60T3  .req x28
61T4  .req x29
62
63/**
64 *  Macro Description: THETA mapping function
65 *  Input register:
66 *      A00~A44: x0~x24 State Matrix
67 *        T0~T4: x25~x29 temporary calculation register
68 *  Modify the register:
69 *      A00~A44: x0~x24 State Matrix
70 *        T0~T4: x25~x29 temporary calculation register
71 *  Output register:
72 *      A00~A44: x0~x24 The latest State Matrix, among them, The values of A10, A20, A30,
73 *               and A40 are temporarily stored by T0, T1, T2, T3.
74 *        T0~T3: x25 to x29 temporarily store the values of A10, A20, A30, and A40.
75 *  Function/Macro Call: None
76 */
77.macro  THETA
78    // for x in 0…4, C[x] = A[x,0] xor A[x,1] xor A[x,2] xor A[x,3] xor A[x,4]
79    eor T0, A00, A10
80    eor T1, A01, A11
81    eor T2, A02, A12
82    eor T3, A03, A13
83    eor T4, A04, A14
84
85    stp A00, A10, [sp, #-16]!   // Borrow A00 and A10
86
87    eor T0, T0, A20
88    eor T1, T1, A21
89    eor T2, T2, A22
90    eor T3, T3, A23
91    eor T4, T4, A24
92
93    eor T0, T0, A30
94    eor T1, T1, A31
95    eor T2, T2, A32
96    eor T3, T3, A33
97    eor T4, T4, A34
98
99    eor T0, T0, A40
100    eor T1, T1, A41
101    eor T2, T2, A42
102    eor T3, T3, A43
103    eor T4, T4, A44
104
105    // D[1] = C[0] xor rol(C[2],1)
106    eor A00, T0, T2, ror#63     // Borrow A00
107    // D[2] = C[1] xor rol(C[3],1)
108    eor A10, T1, T3, ror#63     // Borrow A10
109
110    // for y in 0…4, A[y][1] ^= D[1]
111    eor A01, A01, A00
112    eor A11, A11, A00
113    eor A21, A21, A00
114    eor A31, A31, A00
115    eor A41, A41, A00
116
117    // D[3] = C[2] xor rol(C[4],1)
118    eor T2, T2, T4, ror#63
119
120    // for y in 0…4, A[y][2] ^= D[2]
121    eor A02, A02, A10
122    eor A12, A12, A10
123    eor A22, A22, A10
124    eor A32, A32, A10
125    eor A42, A42, A10
126
127    // D[4] = C[3] xor rol(C[0],1)
128    eor T3, T3, T0, ror#63
129
130    // for y in 0…4, A[y][3] ^= D[3]
131    eor A03, A03, T2
132    eor A13, A13, T2
133    eor A23, A23, T2
134    eor A33, A33, T2
135    eor A43, A43, T2
136
137    ldp A00, A10, [sp], #16   // Restore A00 and A10
138
139    // D[0] = C[4] xor rol(C[1],1)
140    eor T4, T4, T1, ror#63
141
142    // for y in 0…4, A[y][4] ^= D[4]
143    eor A04, A04, T3
144    eor A14, A14, T3
145    eor A24, A24, T3
146    eor A34, A34, T3
147    eor A44, A44, T3
148
149    // for y in 0…4, A[y][0] ^= D[0]
150    eor A00, A00, T4
151    eor T0, A10, T4     // Store A10, A20, A30, and A40 in the rho phase in advance.
152    eor T1, A20, T4
153    eor T2, A30, T4
154    eor T3, A40, T4
155.endm
156
157/**
158 *  Macro Description: RHO mapping function and PI mapping function
159 *  Input register:
160 *      A00~A44: x0~x24 State Matrix among them, The values of A10, A20, A30, and A40 are temporarily stored by T0,
161 *               T1, T2, T3 in the THETA function.
162 *        T0~T3: x25 to x28: temporarily store the values of A10, A20, A30, and A40.
163 *  Modify the register:
164 *      A00~A44: x0~x24 State Matrix
165 *  Output register:
166 *      A00~A44: x0~x24 The latest State Matrix
167 *  Function/Macro Call: None
168 *  Implementation part:
169 *             for x in 0…4: for y in 0…4: A[x, y] = rol(A[y,3x+y], rhotates[y,3x+y])
170 */
171.macro  RHOPi
172    ror A10, A03, #64-28
173    ror A20, A01, #64-1
174    ror A30, A04, #64-27
175    ror A40, A02, #64-62
176
177    ror A01, A11, #64-44
178    ror A02, A22, #64-43
179    ror A03, A33, #64-21
180    ror A04, A44, #64-14
181
182    ror A11, A14, #64-20
183    ror A22, A23, #64-25
184    ror A33, A32, #64-15
185    ror A44, A41, #64-2
186
187    ror A14, A42, #64-61
188    ror A23, A34, #64-8
189    ror A32, A21, #64-10
190    ror A41, A13, #64-55
191
192    ror A42, A24, #64-39
193    ror A34, A43, #64-56
194    ror A21, A12, #64-6
195    ror A13, A31, #64-45
196
197    ror A24, T3, #64-18
198    ror A43, T2, #64-41
199    ror A12, T1, #64-3
200    ror A31, T0, #64-36
201.endm
202
203/**
204 *  Macro Description: CHI mapping function与IOTA mapping function
205 *  Input register:
206 *      A00~A44: x0~x24 State Matrix
207 *        T0~T3: x25~x28 temporary calculation register
208 *  Modify the register:
209 *      A00~A44: x0~x24 State Matrix
210 *        T0~T3: x25~x28 temporary calculation register
211 *  Output register:
212 *      A00~A44: x0~x24 The latest State Matrix
213 *  Function/Macro Call: None
214 *  Implementation part:
215 *      for x in 0…4: for y in 0…4: A[x, y] ^= not A[x, y+1] and A[x, y+2]
216 *      if x,y = 0,0: A[x, y] = A[x, y] xor iotas[i]
217 */
218.macro  CHIOTA offset
219    // for y in 0…4: A[0, y] ^= not A[0, y+1] and A[0, y+2]
220    bic T0, A02, A01
221    bic T1, A01, A00
222    bic T2, A00, A04
223    bic T3, A03, A02
224    eor A00, A00, T0
225    eor A01, A01, T3
226    bic T0, A04, A03
227    eor A02, A02, T0
228    eor A03, A03, T2
229    eor A04, A04, T1
230
231    adrp x25, g_roundConstant
232    add x25, x25, :lo12:g_roundConstant       // x25 === T0
233
234    ldr T3, [x25, \offset*8]
235    eor A00, A00, T3                // iota: A[0, 0] = A[0, 0] xor iotas[i]
236
237    // for y in 0…4: A[1, y] ^= not A[1, y+1] and A[1, y+2]
238    bic T0, A12, A11
239    bic T1, A11, A10
240    bic T2, A10, A14
241    bic T3, A13, A12
242    eor A10, A10, T0
243    eor A11, A11, T3
244    bic T0, A14, A13
245    eor A12, A12, T0
246    eor A13, A13, T2
247    eor A14, A14, T1
248
249    // for y in 0…4: A[2, y] ^= not A[2, y+1] and A[2, y+2]
250    bic T0, A22, A21
251    bic T1, A21, A20
252    bic T2, A20, A24
253    bic T3, A23, A22
254    eor A20, A20, T0
255    eor A21, A21, T3
256    bic T0, A24, A23
257    eor A22, A22, T0
258    eor A23, A23, T2
259    eor A24, A24, T1
260
261    // for y in 0…4: A[3, y] ^= not A[3, y+1] and A[3, y+2]
262    bic T0, A32, A31
263    bic T1, A31, A30
264    bic T2, A30, A34
265    bic T3, A33, A32
266    eor A30, A30, T0
267    eor A31, A31, T3
268    bic T0, A34, A33
269    eor A32, A32, T0
270    eor A33, A33, T2
271    eor A34, A34, T1
272
273    // for y in 0…4: A[4, y] ^= not A[4, y+1] and A[4, y+2]
274    bic T0, A42, A41
275    bic T1, A41, A40
276    bic T2, A40, A44
277    bic T3, A43, A42
278    eor A40, A40, T0
279    eor A41, A41, T3
280    bic T0, A44, A43
281    eor A42, A42, T0
282    eor A43, A43, T2
283    eor A44, A44, T1
284.endm
285
286/**
287 *  Macro Description: Round of phase mapping
288 *  Input register:
289 *      A00~A44: x0~x24 State Matrix
290 *        T0~T4: x25~x29 temporary calculation register
291 *  Modify the register:
292 *      A00~A44: x0~x24 State Matrix
293 *        T0~T4: x25~x29 temporary calculation register
294 *  Output register:
295 *      A00~A44: The latest State Matrix
296 *  Function/Macro Call: THETA RHOPi CHIOTA
297 */
298.macro  ROUND offset
299    THETA
300    RHOPi
301    CHIOTA \offset
302.endm
303
304.macro Keccak
305    /* The length of the digest after extrusion is greater than r. Then, the digest is mapped and then extruded. */
306    stp x25, x26, [sp, #-32]!
307    stp x27, x28, [sp, #8*2]
308    /* Load states: x0~x24 */
309    ldp A00, A01, [x25]
310    ldp A02, A03, [x25, #16]
311    ldp A04, A10, [x25, #16*2]
312    ldp A11, A12, [x25, #16*3]
313    ldp A13, A14, [x25, #16*4]
314    ldp A20, A21, [x25, #16*5]
315    ldp A22, A23, [x25, #16*6]
316    ldp A24, A30, [x25, #16*7]
317    ldp A31, A32, [x25, #16*8]
318    ldp A33, A34, [x25, #16*9]
319    ldp A40, A41, [x25, #16*10]
320    ldp A42, A43, [x25, #16*11]
321    ldr A44, [x25, #16*12]
322    /* Mapping */
323    ROUND #0
324    ROUND #1
325    ROUND #2
326    ROUND #3
327    ROUND #4
328    ROUND #5
329    ROUND #6
330    ROUND #7
331    ROUND #8
332    ROUND #9
333    ROUND #10
334    ROUND #11
335    ROUND #12
336    ROUND #13
337    ROUND #14
338    ROUND #15
339    ROUND #16
340    ROUND #17
341    ROUND #18
342    ROUND #19
343    ROUND #20
344    ROUND #21
345    ROUND #22
346    ROUND #23
347
348    ldp x25, x26, [sp], #8*2
349    ldp x27, x28, [sp], #8*2
350    /* Store states: x0~x24 */
351    stp A00, A01, [x25]
352    stp A02, A03, [x25, #8*2]
353    stp A04, A10, [x25, #8*4]
354    stp A11, A12, [x25, #8*6]
355    stp A13, A14, [x25, #8*8]
356    stp A20, A21, [x25, #8*10]
357    stp A22, A23, [x25, #8*12]
358    stp A24, A30, [x25, #8*14]
359    stp A31, A32, [x25, #8*16]
360    stp A33, A34, [x25, #8*18]
361    stp A40, A41, [x25, #8*20]
362    stp A42, A43, [x25, #8*22]
363    str A44, [x25, #8*24]
364
365    mov     x0, x25
366    mov     x3, x28
367.endm
368
369.section .rodata
370.balign    64
371.type    g_roundConstant, %object
372g_roundConstant:
373    .quad   0x0000000000000001
374    .quad   0x0000000000008082
375    .quad   0x800000000000808a
376    .quad   0x8000000080008000
377    .quad   0x000000000000808b
378    .quad   0x0000000080000001
379    .quad   0x8000000080008081
380    .quad   0x8000000000008009
381    .quad   0x000000000000008a
382    .quad   0x0000000000000088
383    .quad   0x0000000080008009
384    .quad   0x000000008000000a
385    .quad   0x000000008000808b
386    .quad   0x800000000000008b
387    .quad   0x8000000000008089
388    .quad   0x8000000000008003
389    .quad   0x8000000000008002
390    .quad   0x8000000000000080
391    .quad   0x000000000000800a
392    .quad   0x800000008000000a
393    .quad   0x8000000080008081
394    .quad   0x8000000000008080
395    .quad   0x0000000080000001
396    .quad   0x8000000080008008
397    .size   g_roundConstant, .-g_roundConstant
398
399/**
400 *  Function description: Perform shA3 absorption according to the input message.
401 *  Function prototype: const uint8_t *SHA3_Absorb(uint8_t *state, const uint8_t *in, uinT32_t inLen, uinT32_t r);
402 *  Input register:
403 *         x0: Pointer to the address of the State Matrix
404 *         x1: Pointer to the input data address
405 *         x2: Message length
406 *         x3: Different shA3 algorithms are executed based on the shA3 parameter r.
407 *  Register usage: A00~A44: x0~x24 State Matrix
408 *                  T0~T4: x25~x29 temporary calculation register
409 *  Output register: x0 Returns the address of the message for which shA3 calculation is not performed.
410 *  Function/Macro Call: ROUND
411 */
412
413.text
414.balign 16
415.global SHA3_Absorb
416.type   SHA3_Absorb, %function
417SHA3_Absorb:
418AARCH64_PACIASP
419    /* push stack protection */
420    stp x29, x30, [sp, #-96]!
421    stp x19, x20, [sp, #8*2]
422    stp x21, x22, [sp, #8*4]
423    stp x23, x24, [sp, #8*6]
424    stp x25, x26, [sp, #8*8]
425    stp x27, x28, [sp, #8*10]
426
427    stp x0, x1, [sp, #-32]!
428    stp x2, x3, [sp, #8*2]
429    mov x25, x0
430    mov x26, x1
431    mov x27, x2
432    mov x28, x3
433
434    cmp x2, x3
435    blo .Labsorb_end
436
437    /* Load states: x0~x24 */
438    ldp A00, A01, [x25]
439    ldp A02, A03, [x25, #16]
440    ldp A04, A10, [x25, #16*2]
441    ldp A11, A12, [x25, #16*3]
442    ldp A13, A14, [x25, #16*4]
443    ldp A20, A21, [x25, #16*5]
444    ldp A22, A23, [x25, #16*6]
445    ldp A24, A30, [x25, #16*7]
446    ldp A31, A32, [x25, #16*8]
447    ldp A33, A34, [x25, #16*9]
448    ldp A40, A41, [x25, #16*10]
449    ldp A42, A43, [x25, #16*11]
450    ldr A44, [x25, #16*12]
451
452.Labsorb:
453    /* Absorb from inputs according to r */
454    ldr x25, [x26], #8
455#ifdef  HITLS_BIG_ENDIAN
456    rev x25, x25
457#endif
458    eor A00, A00, x25
459
460    ldr x25, [x26], #8
461#ifdef  HITLS_BIG_ENDIAN
462    rev x25, x25
463#endif
464    eor A01, A01, x25
465
466    ldr x25, [x26], #8
467#ifdef  HITLS_BIG_ENDIAN
468    rev x25, x25
469#endif
470    eor A02, A02, x25
471
472    ldr x25, [x26], #8
473#ifdef  HITLS_BIG_ENDIAN
474    rev x25, x25
475#endif
476    eor A03, A03, x25
477
478    ldr x25, [x26], #8
479#ifdef  HITLS_BIG_ENDIAN
480    rev x25, x25
481#endif
482    eor A04, A04, x25
483
484    ldr x25, [x26], #8
485#ifdef  HITLS_BIG_ENDIAN
486    rev x25, x25
487#endif
488    eor A10, A10, x25
489
490    ldr x25, [x26], #8
491#ifdef  HITLS_BIG_ENDIAN
492    rev x25, x25
493#endif
494    eor A11, A11, x25
495
496    ldr x25, [x26], #8
497#ifdef  HITLS_BIG_ENDIAN
498    rev x25, x25
499#endif
500    eor A12, A12, x25
501
502    ldr x25, [x26], #8
503#ifdef  HITLS_BIG_ENDIAN
504    rev x25, x25
505#endif
506    eor A13, A13, x25
507
508    cmp x28, #72            // SHA3_512: 72=8*9: (x0~x8)
509    beq .Labsorb_mapping
510
511    ldr x25, [x26], #8
512#ifdef  HITLS_BIG_ENDIAN
513    rev x25, x25
514#endif
515    eor A14, A14, x25
516
517    ldr x25, [x26], #8
518#ifdef  HITLS_BIG_ENDIAN
519    rev x25, x25
520#endif
521    eor A20, A20, x25
522
523    ldr x25, [x26], #8
524#ifdef  HITLS_BIG_ENDIAN
525    rev x25, x25
526#endif
527    eor A21, A21, x25
528
529    ldr x25, [x26], #8
530#ifdef  HITLS_BIG_ENDIAN
531    rev x25, x25
532#endif
533    eor A22, A22, x25
534
535    cmp x28, #104           // SHA3_384: 104=8*13: (x0~x12)
536    beq .Labsorb_mapping
537
538    ldr x25, [x26], #8
539#ifdef  HITLS_BIG_ENDIAN
540    rev x25, x25
541#endif
542    eor A23, A23, x25
543
544    ldr x25, [x26], #8
545#ifdef  HITLS_BIG_ENDIAN
546    rev x25, x25
547#endif
548    eor A24, A24, x25
549
550    ldr x25, [x26], #8
551#ifdef  HITLS_BIG_ENDIAN
552    rev x25, x25
553#endif
554    eor A30, A30, x25
555
556    ldr x25, [x26], #8
557#ifdef  HITLS_BIG_ENDIAN
558    rev x25, x25
559#endif
560    eor A31, A31, x25
561
562    cmp x28, #136           // SHA3_256: 136=8*17: (x0~x16)
563    beq .Labsorb_mapping
564
565    ldr x25, [x26], #8
566#ifdef  HITLS_BIG_ENDIAN
567    rev x25, x25
568#endif
569    eor A32, A32, x25
570
571    cmp x28, #144           // SHA3_224: 144=8*18: (x0~x17)
572    beq .Labsorb_mapping
573
574    ldr x25, [x26], #8
575#ifdef  HITLS_BIG_ENDIAN
576    rev x25, x25
577#endif
578    eor A33, A33, x25
579
580    ldr x25, [x26], #8
581#ifdef  HITLS_BIG_ENDIAN
582    rev x25, x25
583#endif
584    eor A34, A34, x25
585
586    ldr x25, [x26], #8
587#ifdef  HITLS_BIG_ENDIAN
588    rev x25, x25
589#endif
590    eor A40, A40, x25
591
592    cmp x28, #168           // SHAKE128: 168=8*21: (0~20)
593    beq .Labsorb_mapping
594
595    ldr x25, [x26], #8
596#ifdef  HITLS_BIG_ENDIAN
597    rev x25, x25
598#endif
599    eor A41, A41, x25
600
601    ldr x25, [x26], #8
602#ifdef  HITLS_BIG_ENDIAN
603    rev x25, x25
604#endif
605    eor A42, A42, x25
606
607    ldr x25, [x26], #8
608#ifdef  HITLS_BIG_ENDIAN
609    rev x25, x25
610#endif
611    eor A43, A43, x25
612
613    ldr x25, [x26], #8
614#ifdef  HITLS_BIG_ENDIAN
615    rev x25, x25
616#endif
617    eor A44, A44, x25
618
619.Labsorb_mapping:
620    /* Updating the Input Data Pointer and Length */
621    sub x27, x27, x28
622    stp x26, x27, [sp, #8]
623    /* Mapping */
624    ROUND #0
625    ROUND #1
626    ROUND #2
627    ROUND #3
628    ROUND #4
629    ROUND #5
630    ROUND #6
631    ROUND #7
632    ROUND #8
633    ROUND #9
634    ROUND #10
635    ROUND #11
636    ROUND #12
637    ROUND #13
638    ROUND #14
639    ROUND #15
640    ROUND #16
641    ROUND #17
642    ROUND #18
643    ROUND #19
644    ROUND #20
645    ROUND #21
646    ROUND #22
647    ROUND #23
648    ldp x26, x27, [sp, #8]
649    ldr x28, [sp, #24]
650    cmp x27, x28
651    bhs .Labsorb
652
653    /* Store states: x0~x24 */
654    ldr x25, [sp]
655    stp A00, A01, [x25]
656    stp A02, A03, [x25, #8*2]
657    stp A04, A10, [x25, #8*4]
658    stp A11, A12, [x25, #8*6]
659    stp A13, A14, [x25, #8*8]
660    stp A20, A21, [x25, #8*10]
661    stp A22, A23, [x25, #8*12]
662    stp A24, A30, [x25, #8*14]
663    stp A31, A32, [x25, #8*16]
664    stp A33, A34, [x25, #8*18]
665    stp A40, A41, [x25, #8*20]
666    stp A42, A43, [x25, #8*22]
667    str A44, [x25, #8*24]
668
669.Labsorb_end:
670    /* Return the remaining message address. */
671    mov x0, x26
672
673    /* End popping */
674    add sp, sp, #32             // skip x0~x3
675    ldp x29, x30, [sp], #8*2
676    ldp x19, x20, [sp], #8*2
677    ldp x21, x22, [sp], #8*2
678    ldp x23, x24, [sp], #8*2
679    ldp x25, x26, [sp], #8*2
680    ldp x27, x28, [sp], #8*2
681AARCH64_AUTIASP
682    ret
683.size SHA3_Absorb, .-SHA3_Absorb
684
685.balign 16
686/**
687 *  Function description: Perform SHA3 squeezing to obtain the digest message.
688 *  Function prototyp: void SHA3_Squeeze(uint8_t *state, uint8_t *out, uinT32_t outLen, uinT32_t r, bool isNeedKeccak)
689 *  Input register:
690 *         x0: Pointer to the address of the State Matrix
691 *         x1: Pointer to the output summary address
692 *         x2: digist Length
693 *         x3: Different SHA3 algorithms are executed based on the SHA3 parameter r.
694 *  Register usage: A00~A44: x0~x24 State Matrix
695 *                  T0~T4: x25~x29 temporary calculation register
696 *  Output register: x1: Pointer to the output summary address
697 *  Function/Macro Call: ROUND
698 */
699.global SHA3_Squeeze
700.type   SHA3_Squeeze, %function
701SHA3_Squeeze:
702AARCH64_PACIASP
703    /* push stack protection */
704    stp x29, x30, [sp, #-96]!
705    stp x19, x20, [sp, #8*2]
706    stp x21, x22, [sp, #8*4]
707    stp x23, x24, [sp, #8*6]
708    stp x25, x26, [sp, #8*8]
709    stp x27, x28, [sp, #8*10]
710
711    mov x25, x0
712    mov x26, x1
713    mov x27, x2
714    mov x28, x3
715    mov x30, x4
716
717    /* Cyclically squeezing message summaries from the State Matrix */
718.Loop_squeeze:
719    ldr     x4, [x0], #8
720    cmp     x27, #8
721    blo     .Lsqueeze_tail      // If the remaining length is less than 8 bytes, perform single-byte extrusion.
722
723#ifdef      HITLS_BIG_ENDIAN
724    rev     x4, x4
725#endif
726
727    str     x4, [x26], #8       // Perform 8-byte squeeze
728    subs    x27, x27, #8
729    beq     .Lsqueeze_done
730
731    subs    x3, x3, #8
732    bhi     .Loop_squeeze
733    Keccak
734    b       .Loop_squeeze
735
736    /* Single Byte Squeezing */
737.Lsqueeze_tail:
738    strb    w4, [x26], #1
739    lsr     x4, x4, #8
740    subs    x27, x27, #1
741    beq     .Lsqueeze_done
742    strb    w4, [x26], #1
743    lsr     x4, x4, #8
744    subs    x27, x27, #1
745    beq     .Lsqueeze_done
746    strb    w4, [x26], #1
747    lsr     x4, x4, #8
748    subs    x27, x27, #1
749    beq     .Lsqueeze_done
750    strb    w4, [x26], #1
751    lsr     x4, x4, #8
752    subs    x27, x27, #1
753    beq     .Lsqueeze_done
754    strb    w4, [x26], #1
755    lsr     x4, x4, #8
756    subs    x27, x27, #1
757    beq     .Lsqueeze_done
758    strb    w4, [x26], #1
759    lsr     x4, x4, #8
760    subs    x27, x27, #1
761    beq     .Lsqueeze_done
762    strb    w4, [x26], #1
763
764.Lsqueeze_done:
765    /* End popping */
766    cmp x30, 0
767    beq .Lsqueeze_end
768    Keccak
769.Lsqueeze_end:
770    ldp x29, x30, [sp], #8*2
771    ldp x19, x20, [sp], #8*2
772    ldp x21, x22, [sp], #8*2
773    ldp x23, x24, [sp], #8*2
774    ldp x25, x26, [sp], #8*2
775    ldp x27, x28, [sp], #8*2
776    eor x0, x0, x0
777AARCH64_AUTIASP
778    ret
779.size SHA3_Squeeze, .-SHA3_Squeeze
780
781#endif
782