• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_SHA1
18
19#include "crypt_arm.h"
20
21.arch   armv8-a+crypto
22.extern	g_cryptArmCpuInfo
23.hidden	g_cryptArmCpuInfo
24/* SHA1 used constant value. For the data source, see the RFC3174 document.
25 * K(t) = 5A827999 ( 0 <= t <= 19)
26 * K(t) = 6ED9EBA1 (20 <= t <= 39)
27 * K(t) = 8F1BBCDC (40 <= t <= 59)
28 * K(t) = CA62C1D6 (60 <= t <= 79)
29 */
30.data
31.balign 64         // Alignment based on the size of the read data block
32.type   g_k, %object
33g_k:
34    .long   0x5a827999
35    .long   0x6ed9eba1
36    .long   0x8f1bbcdc
37    .long   0xca62c1d6
38.size   g_k, .-g_k
39
40.balign 64         // Alignment based on the size of the read data block
41.type   g_kExt, %object
42g_kExt:
43    .long   0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999   //K_00_19
44    .long   0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1   //K_20_39
45    .long   0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc   //K_40_59
46    .long   0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6   //K_60_79
47.size   g_kExt, .-g_kExt
48
49/**
50 *  Macro Description: 32位Message block扩展Wi
51 *  input register:
52 *      wi_3:  W[i-3]
53 *      wi_8:  W[i-8]
54 *     wi_14:  W[i-14]
55 *     wi_16:  W[i-16]
56 *     temp1: temporary register
57 *     temp2: temporary register
58 *  Modify the register:  wi_16 temp1 temp2
59 *  Output register:
60 *      wi_16:  Latest W[i] value, W(i) = S^1(W(i-3) XOR W(i-8) XOR W(i-14) XOR W(i-16))
61 *  Function/Macro Call: NONE
62 */
63.macro  MESSAGE_EXPAND wi_16, wi_14, wi_8, wi_3, temp1, temp2
64    eor     \temp1, \wi_14, \wi_16      // W(i-14) XOR W(i-16)
65    eor     \temp2, \wi_3, \wi_8        // W(i-3) XOR W(i-8)
66    eor     \wi_16, \temp1, \temp2      // W(i-3) XOR W(i-8) XOR W(i-14) XOR W(i-16)
67    ror     \wi_16, \wi_16, #31         // Cyclic left shift 1 equals cyclic right shift 31
68.endm
69
70/**
71 *  Macro Description: b、e Compute
72 *  input register:
73 *          k:  Constant data
74 *         wi:  Message block
75 *      a、b、e:  Intermediate variable of hash value
76 *          f:  f(B, C, D)
77 *    temp1-4:  temporary register
78 *  Modify the register:  b e temp3-temp4
79 *  Output register:
80 *          b:  Indicates the value after a cyclic update.
81 *          e:  Indicates the value after a cyclic update.
82 *  Macro implementation:
83 *          e = S^5(A) + f(B, C, D) + E + W(i) + K(i)
84 *          b = S^30(B)
85 *  Function/Macro Call: NONE
86 */
87.macro  CAL_B_E a, b, e, wi, k, f, temp3, temp4
88    add     \temp3, \wi, \k             // W(i) + K(i)
89    ror     \temp4, \a, #27             // S^5(A) Cyclic shift left 5 equal Cyclic shift right 27
90
91    ror     \b, \b, #2                  // b = S^30(B) Cyclic shift left 30 equal Cyclic shift right 2
92    add     \temp4, \temp4, \temp3      // S^5(A) + W(i) + K(i)
93    add     \e, \e, \f                  // f(B, C, D) + E
94    add     \e, \e, \temp4              // f(B, C, D) + E + S^5(A) + W(i) + K(i)
95.endm
96
97/**
98 *  Macro Description: Message compression,0~19round data compression
99 *  input register:
100 *          k:  Constant data
101 *         wi:  Message block
102 *      a - h:  Intermediate variable of hash value
103 *    temp1-4:  temporary register
104 *  Modify the register:  b e temp1-temp4
105 *  Output register:
106 *          b:  Indicates the value after a cyclic update.
107 *          e:  Indicates the value after a cyclic update.
108 *  Macro implementation: f(B, C, D) = (B AND C) OR ((NOT B) AND D)
109 *          e = S^5(A) + f(B, C, D) + E + W(i) + K(i)
110 *          b = S^30(B)
111 *  Function/Macro Call: CAL_B_E
112 */
113.macro  DATA_COMPRE_0_19 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4
114    and     \temp1, \b, \c          // b&c
115    bic     \temp2, \d, \b          // d&(~b)
116    orr     \temp1, \temp1, \temp2  // f(B, C, D)
117
118    CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4
119.endm
120
121/**
122 *  Macro Description: Message compression,20~39、60~79round data compression
123 *  input register:
124 *          k:  Constant data
125 *         wi:  Message block
126 *      a - h:  Intermediate variable of hash value
127 *    temp1-4:  temporary register
128 *  Modify the register:  b e temp1-temp4
129 *  Output register:
130 *          b:  Indicates the value after a cyclic update.
131 *          e:  Indicates the value after a cyclic update.
132 *  Macro implementation: f(B, C, D) =  B XOR C XOR D
133 *          e = S^5(A) + f(B, C, D) + E + W(i) + K(i)
134 *          b = S^30(B)
135 *  Function/Macro Call: CAL_B_E
136 */
137.macro  DATA_COMPRE_20_39_60_79 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4
138    eor     \temp2, \b, \c          // b&c
139    eor     \temp1, \temp2, \d          // f(B, C, D) = b&c&d
140
141    CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4
142.endm
143
144/**
145 *  Macro Description: Message compression,40~59round data compression
146 *  input register:
147 *          k:  Constant data
148 *         wi:  Message block
149 *      a - h:  Intermediate variable of hash value
150 *    temp1-4:  temporary register
151 *  Modify the register:  b e temp1-temp4
152 *  Output register:
153 *          b:  Indicates the value after a cyclic update.
154 *          e:  Indicates the value after a cyclic update.
155 *  Macro implementation: f(B, C, D) =  (B AND C) OR (B AND D) OR (C AND D)
156 *          e = S^5(A) + f(B, C, D) + E + W(i) + K(i)
157 *          b = S^30(B)
158 *  Function/Macro Call: CAL_B_E
159 */
160.macro  DATA_COMPRE_40_59 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4
161    and     \temp1, \b, \c          // b&c
162    and     \temp2, \b, \d          // b&d
163    and     \temp3, \c, \d          // c&d
164    orr     \temp1, \temp1, \temp2    // (b&c) or (b&d)
165    orr     \temp1, \temp1, \temp3  // f(B, C, D)
166
167    CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4
168.endm
169
170/**
171 *  Function Description: Perform SHA1 compression calculation based on the input message and update the hash value.
172 *  Function prototype: static const uint8_t *SHA1_Step(const uint8_t *input, uint32_t len, uint32_t *h)
173 *  Input register:
174 *         x0:  Pointer to the input data address
175 *         x1:  Message length
176 *         x2:  Storage address of the hash value
177 *  Register usage:  w0–w15 store message blocks, x/w16, w17, w28, and w29 are temporary registers,
178 *                   and x30 stores the hash value address. a to e correspond to w20 to w24. w19 stores the k constant,
179 *                   x25 stores the message pointer, and x26 stores the remaining message length.
180 *  Output register:  x0 returns the address of the message for which sha1 calculation is not performed.
181 *  Function/Macro Call:  DATA_COMPRE_0_19、DATA_COMPRE_20_39_60_79、DATA_COMPRE_40_59、MESSAGE_EXPAND、SHA1CryptoExt
182 */
183.text
184.balign 16
185.global SHA1_Step
186.type   SHA1_Step, %function
187SHA1_Step:
188    .inst 0xd503233f  // paciasp
189    cmp     x1, #64
190    b.lo    .Lend_sha1
191
192    /* If the SHA1 cryptography extension instruction is supported, go to. */
193    adrp    x5, g_cryptArmCpuInfo
194    add     x5, x5, :lo12:g_cryptArmCpuInfo
195    ldr     x6, [x5]
196    tst     x6, #CRYPT_ARM_SHA1
197    bne     SHA1CryptoExt
198
199    /* Extended instructions are not supported, Using Base Instructions, Open up stack space, push stack protection */
200    stp     x29, x30, [sp, #-96]!
201    stp     x19, x20, [sp, #8*2]
202    stp     x21, x22, [sp, #8*4]
203    stp     x23, x24, [sp, #8*6]
204    stp     x25, x26, [sp, #8*8]
205    stp     x27, x28, [sp, #8*10]
206
207    /* load a - e */
208    ldp     w20, w21, [x2]
209    ldp     w22, w23, [x2, #4*2]
210    ldr     w24, [x2, #4*4]
211
212    mov     x30, x2             // x30 address for storing hash values
213    mov     x25, x0             // pointer to the x25 store message
214    mov     x26, x1             // x26: stores the remaining message length.
215
216.Lloop_sha1_compress:
217    adrp    x16, g_k
218    add     x16, x16, :lo12:g_k
219    ldr     w19, [x16]          // load k1
220
221    ldp     w0, w1, [x25]       // load input value, load 64 bytes at a time
222    ldp     w2, w3, [x25, #4*2]
223    ldp     w4, w5, [x25, #4*4]
224    ldp     w6, w7, [x25, #4*6]
225    ldp     w8, w9, [x25, #4*8]
226    ldp     w10, w11, [x25, #4*10]
227    ldp     w12, w13, [x25, #4*12]
228    ldp     w14, w15, [x25, #4*14]
229
230    add     x25, x25, #64       // address offset: 64 bytes
231    sub     x26, x26, #64       // update the remaining address length.
232
233#ifndef   HITLS_BIG_ENDIAN
234    rev     w0, w0
235    rev     w1, w1
236    rev     w2, w2
237    rev     w3, w3
238    rev     w4, w4
239    rev     w5, w5
240    rev     w6, w6
241    rev     w7, w7
242    rev     w8, w8
243    rev     w9, w9
244    rev     w10, w10
245    rev     w11, w11
246    rev     w12, w12
247    rev     w13, w13
248    rev     w14, w14
249    rev     w15, w15
250#endif
251    /* 0~19round data compression */
252    /* a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4 */
253    DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w0, w19, w16, w17, w28, w29
254    DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w1, w19, w16, w17, w28, w29
255    DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w2, w19, w16, w17, w28, w29
256    DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w3, w19, w16, w17, w28, w29
257    DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w4, w19, w16, w17, w28, w29
258
259    DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w5, w19, w16, w17, w28, w29
260    DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w6, w19, w16, w17, w28, w29
261    DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w7, w19, w16, w17, w28, w29
262    DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w8, w19, w16, w17, w28, w29
263    DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w9, w19, w16, w17, w28, w29
264
265    DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w10, w19, w16, w17, w28, w29
266    DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w11, w19, w16, w17, w28, w29
267    DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w12, w19, w16, w17, w28, w29
268    DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w13, w19, w16, w17, w28, w29
269    DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w14, w19, w16, w17, w28, w29
270
271    DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w15, w19, w16, w17, w28, w29
272    /* Message block extension calculation wi_16, wi_14, wi_8, wi_3, temp1, temp2 */
273    MESSAGE_EXPAND w0, w2, w8, w13, w16, w17
274    DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w0, w19, w16, w17, w28, w29
275    MESSAGE_EXPAND w1, w3, w9, w14, w16, w17
276    DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w1, w19, w16, w17, w28, w29
277    MESSAGE_EXPAND w2, w4, w10, w15, w16, w17
278    DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w2, w19, w16, w17, w28, w29
279    MESSAGE_EXPAND w3, w5, w11, w0, w16, w17
280    DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w3, w19, w16, w17, w28, w29
281
282    /* 20~39 round data compression */
283    adrp    x16, g_k
284    add     x16, x16, :lo12:g_k
285    ldr     w19, [x16, #4]      // load k2
286    MESSAGE_EXPAND w4, w6, w12, w1, w16, w17
287    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w4, w19, w16, w17, w28, w29
288    MESSAGE_EXPAND w5, w7, w13, w2, w16, w17
289    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w5, w19, w16, w17, w28, w29
290    MESSAGE_EXPAND w6, w8, w14, w3, w16, w17
291    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w6, w19, w16, w17, w28, w29
292    MESSAGE_EXPAND w7, w9, w15, w4, w16, w17
293    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w7, w19, w16, w17, w28, w29
294    MESSAGE_EXPAND w8, w10, w0, w5, w16, w17
295    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w8, w19, w16, w17, w28, w29
296
297    MESSAGE_EXPAND w9, w11, w1, w6, w16, w17
298    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w9, w19, w16, w17, w28, w29
299    MESSAGE_EXPAND w10, w12, w2, w7, w16, w17
300    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w10, w19, w16, w17, w28, w29
301    MESSAGE_EXPAND w11, w13, w3, w8, w16, w17
302    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w11, w19, w16, w17, w28, w29
303    MESSAGE_EXPAND w12, w14, w4, w9, w16, w17
304    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w12, w19, w16, w17, w28, w29
305    MESSAGE_EXPAND w13, w15, w5, w10, w16, w17
306    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w13, w19, w16, w17, w28, w29
307
308    MESSAGE_EXPAND w14, w0,  w6, w11, w16, w17
309    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w14, w19, w16, w17, w28, w29
310    MESSAGE_EXPAND w15, w1,  w7, w12, w16, w17
311    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w15, w19, w16, w17, w28, w29
312    MESSAGE_EXPAND w0, w2, w8, w13, w16, w17
313    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w0, w19, w16, w17, w28, w29
314    MESSAGE_EXPAND w1, w3, w9, w14, w16, w17
315    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w1, w19, w16, w17, w28, w29
316    MESSAGE_EXPAND w2, w4, w10, w15, w16, w17
317    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w2, w19, w16, w17, w28, w29
318
319    MESSAGE_EXPAND w3, w5, w11, w0, w16, w17
320    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w3, w19, w16, w17, w28, w29
321    MESSAGE_EXPAND w4, w6, w12, w1, w16, w17
322    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w4, w19, w16, w17, w28, w29
323    MESSAGE_EXPAND w5, w7, w13, w2, w16, w17
324    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w5, w19, w16, w17, w28, w29
325    MESSAGE_EXPAND w6, w8, w14, w3, w16, w17
326    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w6, w19, w16, w17, w28, w29
327    MESSAGE_EXPAND w7, w9, w15, w4, w16, w17
328    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w7, w19, w16, w17, w28, w29
329
330    /* 40~59 round data compression */
331    adrp    x16, g_k
332    add     x16, x16, :lo12:g_k
333    ldr     w19, [x16, #8]      // load k3
334    MESSAGE_EXPAND w8, w10, w0, w5, w16, w17
335    DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w8, w19, w16, w17, w28, w29
336    MESSAGE_EXPAND w9, w11, w1, w6, w16, w17
337    DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w9, w19, w16, w17, w28, w29
338    MESSAGE_EXPAND w10, w12, w2, w7, w16, w17
339    DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w10, w19, w16, w17, w28, w29
340    MESSAGE_EXPAND w11, w13, w3, w8, w16, w17
341    DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w11, w19, w16, w17, w28, w29
342    MESSAGE_EXPAND w12, w14, w4, w9, w16, w17
343    DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w12, w19, w16, w17, w28, w29
344
345    MESSAGE_EXPAND w13, w15, w5, w10, w16, w17
346    DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w13, w19, w16, w17, w28, w29
347    MESSAGE_EXPAND w14, w0,  w6, w11, w16, w17
348    DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w14, w19, w16, w17, w28, w29
349    MESSAGE_EXPAND w15, w1,  w7, w12, w16, w17
350    DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w15, w19, w16, w17, w28, w29
351    MESSAGE_EXPAND w0, w2, w8, w13, w16, w17
352    DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w0, w19, w16, w17, w28, w29
353    MESSAGE_EXPAND w1, w3, w9, w14, w16, w17
354    DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w1, w19, w16, w17, w28, w29
355
356    MESSAGE_EXPAND w2, w4, w10, w15, w16, w17
357    DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w2, w19, w16, w17, w28, w29
358    MESSAGE_EXPAND w3, w5, w11, w0, w16, w17
359    DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w3, w19, w16, w17, w28, w29
360    MESSAGE_EXPAND w4, w6, w12, w1, w16, w17
361    DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w4, w19, w16, w17, w28, w29
362    MESSAGE_EXPAND w5, w7, w13, w2, w16, w17
363    DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w5, w19, w16, w17, w28, w29
364    MESSAGE_EXPAND w6, w8, w14, w3, w16, w17
365    DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w6, w19, w16, w17, w28, w29
366
367    MESSAGE_EXPAND w7, w9, w15, w4, w16, w17
368    DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w7, w19, w16, w17, w28, w29
369    MESSAGE_EXPAND w8, w10, w0, w5, w16, w17
370    DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w8, w19, w16, w17, w28, w29
371    MESSAGE_EXPAND w9, w11, w1, w6, w16, w17
372    DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w9, w19, w16, w17, w28, w29
373    MESSAGE_EXPAND w10, w12, w2, w7, w16, w17
374    DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w10, w19, w16, w17, w28, w29
375    MESSAGE_EXPAND w11, w13, w3, w8, w16, w17
376    DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w11, w19, w16, w17, w28, w29
377
378    /* 60~79 round data compression */
379    adrp    x16, g_k
380    add     x16, x16, :lo12:g_k
381    ldr     w19, [x16, #12]         // load k4
382    MESSAGE_EXPAND w12, w14, w4, w9, w16, w17
383    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w12, w19, w16, w17, w28, w29
384    MESSAGE_EXPAND w13, w15, w5, w10, w16, w17
385    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w13, w19, w16, w17, w28, w29
386    MESSAGE_EXPAND w14, w0,  w6, w11, w16, w17
387    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w14, w19, w16, w17, w28, w29
388    MESSAGE_EXPAND w15, w1,  w7, w12, w16, w17
389    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w15, w19, w16, w17, w28, w29
390    MESSAGE_EXPAND w0, w2, w8, w13, w16, w17
391    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w0, w19, w16, w17, w28, w29
392
393    MESSAGE_EXPAND w1, w3, w9, w14, w16, w17
394    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w1, w19, w16, w17, w28, w29
395    MESSAGE_EXPAND w2, w4, w10, w15, w16, w17
396    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w2, w19, w16, w17, w28, w29
397    MESSAGE_EXPAND w3, w5, w11, w0, w16, w17
398    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w3, w19, w16, w17, w28, w29
399    MESSAGE_EXPAND w4, w6, w12, w1, w16, w17
400    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w4, w19, w16, w17, w28, w29
401    MESSAGE_EXPAND w5, w7, w13, w2, w16, w17
402    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w5, w19, w16, w17, w28, w29
403
404    MESSAGE_EXPAND w6, w8, w14, w3, w16, w17
405    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w6, w19, w16, w17, w28, w29
406    MESSAGE_EXPAND w7, w9, w15, w4, w16, w17
407    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w7, w19, w16, w17, w28, w29
408    MESSAGE_EXPAND w8, w10, w0, w5, w16, w17
409    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w8, w19, w16, w17, w28, w29
410    MESSAGE_EXPAND w9, w11, w1, w6, w16, w17
411    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w9, w19, w16, w17, w28, w29
412    MESSAGE_EXPAND w10, w12, w2, w7, w16, w17
413    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w10, w19, w16, w17, w28, w29
414
415    MESSAGE_EXPAND w11, w13, w3, w8, w16, w17
416    DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w11, w19, w16, w17, w28, w29
417    MESSAGE_EXPAND w12, w14, w4, w9, w16, w17
418    DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w12, w19, w16, w17, w28, w29
419    MESSAGE_EXPAND w13, w15, w5, w10, w16, w17
420    DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w13, w19, w16, w17, w28, w29
421    MESSAGE_EXPAND w14, w0,  w6, w11, w16, w17
422    DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w14, w19, w16, w17, w28, w29
423    MESSAGE_EXPAND w15, w1,  w7, w12, w16, w17
424    DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w15, w19, w16, w17, w28, w29
425
426    /* load a - e */
427    ldp     w0, w1, [x30]
428    ldp     w2, w3, [x30, #4*2]
429    ldr     w4, [x30, #4*4]
430
431    /* H0 = H0 + A, H1 = H1 + B, H2 = H2 + C, H3 = H3 + D, H4 = H4 + E */
432    add     w20, w20, w0
433    add     w21, w21, w1
434    add     w22, w22, w2
435    add     w23, w23, w3
436    add     w24, w24, w4
437
438    stp     w20, w21, [x30]
439    stp     w22, w23, [x30, #4*2]
440    str     w24, [x30, #4*4]
441
442    cmp     x26, #64
443    b.hs    .Lloop_sha1_compress
444
445    /* returns the address of the message for which SHA1 calculation is not performed. */
446    mov     x0, x25
447
448    /* pop-stack */
449    ldp     x19, x20, [sp, #8*2]
450    ldp     x21, x22, [sp, #8*4]
451    ldp     x23, x24, [sp, #8*6]
452    ldp     x25, x26, [sp, #8*8]
453    ldp     x27, x28, [sp, #8*10]
454    ldp     x29, x30, [sp], #96
455
456.Lend_sha1:
457    .inst 0xd50323bf  // autiasp
458    ret
459.size   SHA1_Step, .-SHA1_Step
460
461/**
462 *  Function Description: Based on the input message, compress the SHA1 dedicated instruction and
463 *                        update the hash value.
464 *  Function prototype: static const uint8_t *SHA1CryptoExt(const uint8_t *input, uint32_t len, uint32_t *h)
465 *  Input register:
466 *         x0:  Pointer to the input data address
467 *         x1:  Message length
468 *         x2:  Storage address of the hash value
469 *  Register usage:  v0–v3 stores k0–k3, s5 stores e temporarily, v6 stores abcd, and v7 stores e,
470 *                   V23–V26 stores w0–w15 and recycles w16–w79. V19–v22 stores w+k calculation results.
471 *                   V16 is used as the 0 register. v17 stores abcd and v18 stores e. v16 is used together with v6 and v7.
472 *  Output register:  x0 returns the address of the message for which sha1 calculation is not performed.
473 *  Function/Macro Call:  NONE
474 */
475.text
476.balign 16
477.type   SHA1CryptoExt, %function
478SHA1CryptoExt:
479    /* load k */
480    adrp    x3, g_kExt
481    add     x3, x3, :lo12:g_kExt
482    ld1     {v0.4s-v3.4s}, [x3]
483
484    /* load a - e */
485    ld1     {v17.4s}, [x2]
486    ld1     {v6.4s}, [x2], #16
487    ld1     {v18.s}[0], [x2]
488    ld1     {v7.s}[0], [x2]
489    sub     x2, x2, #16
490
491    eor     v16.16b, v16.16b, v16.16b
492
493.Lloop_sha1_ext_compress:
494
495    /* load w */
496    ld1     {v23.4s-v26.4s}, [x0], #64
497    sub     x1, x1, #64                 // update the remaining address length.
498
499    /* little endian inversion */
500
501#ifndef   HITLS_BIG_ENDIAN
502    rev32     v23.16b, v23.16b
503    rev32     v24.16b, v24.16b
504    rev32     v25.16b, v25.16b
505    rev32     v26.16b, v26.16b
506#endif
507
508    add     v19.4s, v0.4s, v23.4s       // k0+w[3:0]
509    add     v20.4s, v0.4s, v24.4s       // k0+w[4:7]
510    add     v21.4s, v0.4s, v25.4s       // k0+w[11:8]
511    add     v22.4s, v0.4s, v26.4s       // k0+w[15:12]
512
513    /* [0:16] data compression */
514    sha1su0 v23.4s, v24.4s, v25.4s      // w[16:20]
515    sha1h   s5, s6                      // a -> e
516    sha1c   q6, s7, v19.4s              // a, b, c, d -> a, b, c, d
517    sha1su1 v23.4s, v26.4s
518
519    sha1su0 v24.4s, v25.4s, v26.4s
520    sha1h   s7, s6
521    sha1c   q6, s5, v20.4s
522    sha1su1 v24.4s, v23.4s
523
524    sha1su0 v25.4s, v26.4s, v23.4s
525    sha1h   s5, s6
526    sha1c   q6, s7, v21.4s
527    sha1su1 v25.4s, v24.4s
528
529    sha1su0 v26.4s, v23.4s, v24.4s
530    sha1h   s7, s6
531    sha1c   q6, s5, v22.4s
532    sha1su1 v26.4s, v25.4s
533
534    add     v19.4s, v0.4s, v23.4s    // k0+w[19:16]
535    add     v20.4s, v1.4s, v24.4s    // k1+w[23:20]
536    add     v21.4s, v1.4s, v25.4s   // k1+w[27:24]
537    add     v22.4s, v1.4s, v26.4s   // k1+w[31:28]
538
539    /* [16:20] data compression */
540    sha1su0 v23.4s, v24.4s, v25.4s
541    sha1h   s5, s6
542    sha1c   q6, s7, v19.4s
543    sha1su1 v23.4s, v26.4s
544
545    /* [20:40] data compression */
546    sha1su0 v24.4s, v25.4s, v26.4s
547    sha1h   s7, s6
548    sha1p   q6, s5, v20.4s
549    sha1su1 v24.4s, v23.4s
550
551    sha1su0 v25.4s, v26.4s, v23.4s
552    sha1h   s5, s6
553    sha1p   q6, s7, v21.4s
554    sha1su1 v25.4s, v24.4s
555
556    sha1su0 v26.4s, v23.4s, v24.4s
557    sha1h   s7, s6
558    sha1p   q6, s5, v22.4s
559    sha1su1 v26.4s, v25.4s
560
561    add     v19.4s, v1.4s, v23.4s    // k1+w[35:32]
562    add     v20.4s, v1.4s, v24.4s    // k1+w[39:36]
563    add     v21.4s, v2.4s, v25.4s   // k2+w[43:40]
564    add     v22.4s, v2.4s, v26.4s   // k2+w[47:44]
565
566    sha1su0 v23.4s, v24.4s, v25.4s
567    sha1h   s5, s6
568    sha1p   q6, s7, v19.4s
569    sha1su1 v23.4s, v26.4s
570
571    sha1su0 v24.4s, v25.4s, v26.4s
572    sha1h   s7, s6
573    sha1p   q6, s5, v20.4s
574    sha1su1 v24.4s, v23.4s
575
576    /* [40:60] data compression */
577    sha1su0 v25.4s, v26.4s, v23.4s
578    sha1h   s5, s6
579    sha1m   q6, s7, v21.4s
580    sha1su1 v25.4s, v24.4s
581
582    sha1su0 v26.4s, v23.4s, v24.4s
583    sha1h   s7, s6
584    sha1m   q6, s5, v22.4s
585    sha1su1 v26.4s, v25.4s
586
587    add     v19.4s, v2.4s, v23.4s    // k2+w[51:48]
588    add     v20.4s, v2.4s, v24.4s    // k2+w[55:52]
589    add     v21.4s, v2.4s, v25.4s   // k2+w[59:56]
590    add     v22.4s, v3.4s, v26.4s   // k3+w[63:60]
591
592    sha1su0 v23.4s, v24.4s, v25.4s
593    sha1h   s5, s6
594    sha1m   q6, s7, v19.4s
595    sha1su1 v23.4s, v26.4s
596
597    sha1su0 v24.4s, v25.4s, v26.4s
598    sha1h   s7, s6
599    sha1m   q6, s5, v20.4s
600    sha1su1 v24.4s, v23.4s
601
602    sha1su0 v25.4s, v26.4s, v23.4s
603    sha1h   s5, s6
604    sha1m   q6, s7, v21.4s
605    sha1su1 v25.4s, v24.4s
606
607    /* [60:80] data compression */
608    sha1su0 v26.4s, v23.4s, v24.4s
609    sha1h   s7, s6
610    sha1p   q6, s5, v22.4s
611    sha1su1 v26.4s, v25.4s
612
613    add     v19.4s, v3.4s, v23.4s    // k3+w[67:64]
614    add     v20.4s, v3.4s, v24.4s    // k3+w[71:68]
615    add     v21.4s, v3.4s, v25.4s   // k3+w[75:72]
616    add     v22.4s, v3.4s, v26.4s   // k3+w[79:76]
617
618    sha1h   s5, s6
619    sha1p   q6, s7, v19.4s
620
621    sha1h   s7, s6
622    sha1p   q6, s5, v20.4s
623
624    sha1h   s5, s6
625    sha1p   q6, s7, v21.4s
626
627    sha1h   s7, s6
628    sha1p   q6, s5, v22.4s
629
630    /* calculate H0 H1 H2 H3 H4 */
631    add     v17.4s, v17.4s, v6.4s
632    add     v18.4s, v18.4s, v7.4s
633
634    add     v6.4s, v17.4s, v16.4s
635    add     v7.4s, v18.4s, v16.4s
636
637    cmp     x1, #64
638    b.hs    .Lloop_sha1_ext_compress
639
640    st1     {v17.4s}, [x2], #16
641    st1     {v18.s}[0], [x2]
642
643    ret
644.size   SHA1CryptoExt, .-SHA1CryptoExt
645
646#endif
647