• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_SHA256
18
19#include "crypt_arm.h"
20
21    .arch    armv8-a+crypto
22
23/* sha256 used constant value. For the data source, see the RFC4634 document. */
24.extern	g_cryptArmCpuInfo
25.hidden	g_cryptArmCpuInfo
26.section .rodata
27.balign 64
28.K256:
29    .long    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
30    .long    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
31    .long    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
32    .long    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
33    .long    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
34    .long    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
35    .long    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
36    .long    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
37
38/*
39 *  Macro description: updates the 32-bit plaintext information. W
40 *  Input register:
41 *      wi_16: W[i-16]
42 *      wi_15: W[i-15]
43 *      wi_7: W[i-7]
44 *      wi_2: W[i-2]
45 *  Modify the register: wi_16 w17 w28
46 *  Output register:
47 *      wi_16: Latest W[i] value, W[i] = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16]
48 *  Function/Macro Call:None
49 */
50    .macro  UPDATE_W        wi_16, wi_15, wi_7, wi_2
51    ror     w28, \wi_15, #7
52    ror     w17, \wi_2, #17
53    eor     w28, w28, \wi_15, ror#18
54    eor     w17, w17, \wi_2, ror#19
55    eor     w28, w28, \wi_15, lsr#3     // w28 = sigma0(w[i-15])
56    eor     w17, w17, \wi_2, lsr#10     // w17 = sigma1(W[i-2])
57    add     \wi_16, \wi_16, \wi_7       // + W[i-7]
58    add     \wi_16, \wi_16, w28         // + sigma0(w[i-15])
59    add     \wi_16, \wi_16, w17         // + sigma1(W[i-2])
60    .endm
61
62/*
63 *  Macro description: Processes the update of a round of hash values in 64 rounds of compression.
64 *  Input register:
65 *        x19: Point to the address of the corresponding element in the g_k256 constant
66 *         wi: Plaintext data after processing
67 *      a - h: Intermediate variable of hash value
68 *  Modify the register: h d w16 w17 w28 w29
69 *  Output register:
70 *          h: Indicates the value after a cyclic update.
71 *          d: Indicates the value after a cyclic update.
72 *  Function/Macro Call:None
73 */
74    .macro ONE_ROUND         wi, a, b, c, d, e, f, g, h
75    ldr    w16, [x19], #4           // K[i]
76    and    w17, \f, \e              // e&f
77    bic    w28, \g, \e              // g&(~e)
78    add    \h, \h, w16              // h += K[i]
79    eor    w29, \e, \e, ror#14
80    ror    w16, \e, #6
81    orr    w17, w17, w28            // Ch(e, f, g) = e&f | g&(~e)
82    add    \h, \h, \wi              // h += W[i]
83    eor    w29, w16, w29, ror#11    // Sigma1(e) = ROR(e, 6) ^ ROR(e, 11) ^ ROR(e, 25)
84    eor    w28, \a, \c              // a^c
85    eor    w16, \a, \b              // a^b
86    add    \h, \h, w29              // h += Sigma1(e)
87    and    w28, w28, w16            // (a^b)&(a^c)
88    eor    w29, \a, \a, ror#9
89    add    \h, \h, w17              // h += Ch(e, f, g)
90    eor    w28, w28, \a             // Maj(a, b, c) = ((a^b)&(a^c))^a = (a&b)^(b&c)^(a&c)
91    ror    w16, \a, #2
92    add    \d, \d, \h               // d += h
93    add    \h, \h, w28              // h += Maj(a, b, c)
94    eor    w29, w16, w29, ror#13    // Sigma0(a) = ROR(a, 2)^ROR(a, 13)^ROR(a, 22)
95    add    \h, \h, w29              // h += Sigma0(a)
96    .endm
97
98/*
99 *  Function Description:Performs 64 rounds of compression calculation based on the input plaintext data
100 *                        and updates the hash value.
101 *  Function prototype:void SHA256CompressMultiBlocks(uint32_t hash[8], const uint8_t *in, uint32_t num);
102 *  Input register:
103 *         x0: Storage address of the hash value
104 *         x1: Pointer to the input data address
105 *         x2: Number of 64 rounds of cycles
106 *  Modify the register: x0-x17
107 *  Output register: None
108 *  Function/Macro Call: None
109 *
110 */
111    .text
112    .balign 16
113    .global SHA256CompressMultiBlocks
114    .type SHA256CompressMultiBlocks, %function
115SHA256CompressMultiBlocks:
116    cbz     x2, .Lend_sha256
117    /* If the SHA256 cryptography extension instruction is supported, go to. */
118    adrp    x5, g_cryptArmCpuInfo
119    ldr	    w6, [x5, #:lo12:g_cryptArmCpuInfo]
120    tst     w6, #CRYPT_ARM_SHA256
121    bne     SHA256CryptoExt
122    /* Extension instructions are not supported. Base instructions are used. */
123    stp     x29, x30, [sp, #-112]!
124    add     x29, sp, #0
125    stp     x19, x20, [sp, #8*2]
126    stp     x21, x22, [sp, #8*4]
127    stp     x23, x24, [sp, #8*6]
128    stp     x25, x26, [sp, #8*8]
129    stp     x27, x28, [sp, #8*10]
130
131    /* load a - h */
132    ldp     w20, w21, [x0]
133    ldp     w22, w23, [x0, #4*2]
134    ldp     w24, w25, [x0, #4*4]
135    ldp     w26, w27, [x0, #4*6]
136
137    str     x0, [sp, #96]
138    mov     x16, x1     // Enter Value Address
139    lsl     x30, x2, #6 // Number of times to process 2^6 = 64
140
141    /* w0-w15 are used to record input values W[i] and temporary registers */
142.Lloop_compress_64:
143
144    /* Start a 64-round process */
145    sub     x30, x30, #16
146    adrp    x19, .K256
147    add	    x19, x19, :lo12:.K256
148    /* 8 bytes are loaded each time, and then two rounds are processed. */
149    ldp     w0, w1, [x16] // load input value
150    ldp     w2, w3, [x16, #4*2]
151    ldp     w4, w5, [x16, #4*4]
152    ldp     w6, w7, [x16, #4*6]
153    ldp     w8, w9, [x16, #4*8]
154    ldp     w10, w11, [x16, #4*10]
155    ldp     w12, w13, [x16, #4*12]
156    ldp     w14, w15, [x16, #4*14]
157
158    add     x16, x16, #64
159    str     x16, [sp, #104]
160#ifndef	HITLS_BIG_ENDIAN
161    rev     w0, w0
162    rev     w1, w1
163    rev     w2, w2
164    rev     w3, w3
165    rev     w4, w4
166    rev     w5, w5
167    rev     w6, w6
168    rev     w7, w7
169    rev     w8, w8
170    rev     w9, w9
171    rev     w10, w10
172    rev     w11, w11
173    rev     w12, w12
174    rev     w13, w13
175    rev     w14, w14
176    rev     w15, w15
177#endif
178    /* w16 w17 w28 w29 used as a temporary register */
179    ONE_ROUND   w0, w20, w21, w22, w23, w24, w25, w26, w27
180    ONE_ROUND   w1, w27, w20, w21, w22, w23, w24, w25, w26
181    ONE_ROUND   w2, w26, w27, w20, w21, w22, w23, w24, w25
182    ONE_ROUND   w3, w25, w26, w27, w20, w21, w22, w23, w24
183
184    ONE_ROUND   w4, w24, w25, w26, w27, w20, w21, w22, w23
185    ONE_ROUND   w5, w23, w24, w25, w26, w27, w20, w21, w22
186    ONE_ROUND   w6, w22, w23, w24, w25, w26, w27, w20, w21
187    ONE_ROUND   w7, w21, w22, w23, w24, w25, w26, w27, w20
188
189    ONE_ROUND   w8, w20, w21, w22, w23, w24, w25, w26, w27
190    ONE_ROUND   w9, w27, w20, w21, w22, w23, w24, w25, w26
191    ONE_ROUND   w10, w26, w27, w20, w21, w22, w23, w24, w25
192    ONE_ROUND   w11, w25, w26, w27, w20, w21, w22, w23, w24
193
194    ONE_ROUND   w12, w24, w25, w26, w27, w20, w21, w22, w23
195    ONE_ROUND   w13, w23, w24, w25, w26, w27, w20, w21, w22
196    ONE_ROUND   w14, w22, w23, w24, w25, w26, w27, w20, w21
197    ONE_ROUND   w15, w21, w22, w23, w24, w25, w26, w27, w20
198
199.Lloop_compress_16_63:
200    /* Start 16-31, 32-47, 48-63 compression */
201    sub     x30, x30, #16
202
203    /* 0 */
204    UPDATE_W    w0, w1, w9, w14
205    ONE_ROUND   w0, w20, w21, w22, w23, w24, w25, w26, w27
206
207    /* 1 */
208    UPDATE_W    w1, w2, w10, w15
209    ONE_ROUND   w1, w27, w20, w21, w22, w23, w24, w25, w26
210
211    /* 2 */
212    UPDATE_W    w2, w3, w11, w0
213    ONE_ROUND   w2, w26, w27, w20, w21, w22, w23, w24, w25
214
215    /* 3 */
216    UPDATE_W    w3, w4, w12, w1
217    ONE_ROUND   w3, w25, w26, w27, w20, w21, w22, w23, w24
218
219    /* 4 */
220    UPDATE_W    w4, w5, w13, w2
221    ONE_ROUND   w4, w24, w25, w26, w27, w20, w21, w22, w23
222
223    /* 5 */
224    UPDATE_W    w5, w6, w14, w3
225    ONE_ROUND   w5, w23, w24, w25, w26, w27, w20, w21, w22
226
227    /* 6 */
228    UPDATE_W    w6, w7, w15, w4
229    ONE_ROUND   w6, w22, w23, w24, w25, w26, w27, w20, w21
230
231    /* 7 */
232    UPDATE_W    w7, w8, w0, w5
233    ONE_ROUND   w7, w21, w22, w23, w24, w25, w26, w27, w20
234
235    /* 8 */
236    UPDATE_W    w8, w9, w1, w6
237    ONE_ROUND   w8, w20, w21, w22, w23, w24, w25, w26, w27
238
239    /* 9 */
240    UPDATE_W    w9, w10, w2, w7
241    ONE_ROUND   w9, w27, w20, w21, w22, w23, w24, w25, w26
242
243    /* 10 */
244    UPDATE_W    w10, w11, w3, w8
245    ONE_ROUND   w10, w26, w27, w20, w21, w22, w23, w24, w25
246
247    /* 11 */
248    UPDATE_W    w11, w12, w4, w9
249    ONE_ROUND   w11, w25, w26, w27, w20, w21, w22, w23, w24
250
251    /* 12 */
252    UPDATE_W    w12, w13, w5, w10
253    ONE_ROUND   w12, w24, w25, w26, w27, w20, w21, w22, w23
254
255    /* 13 */
256    UPDATE_W    w13, w14, w6, w11
257    ONE_ROUND   w13, w23, w24, w25, w26, w27, w20, w21, w22
258
259    /* 14 */
260    UPDATE_W    w14, w15, w7, w12
261    ONE_ROUND   w14, w22, w23, w24, w25, w26, w27, w20, w21
262
263    /* 15 */
264    UPDATE_W    w15, w0, w8, w13
265    ONE_ROUND   w15, w21, w22, w23, w24, w25, w26, w27, w20
266
267    /* If the processing length is less than 64 bytes, the loop continues. */
268    tst     x30, #63
269    bne     .Lloop_compress_16_63
270
271    /* Stores a - h information. */
272    ldr     x0, [sp, #96]
273
274    ldp     w10, w11, [x0]
275    ldp     w12, w13, [x0, #4*2]
276    ldp     w14, w15, [x0, #4*4]
277    ldp     w16, w17, [x0, #4*6]
278
279    add     w20, w20, w10
280    add     w21, w21, w11
281    add     w22, w22, w12
282    add     w23, w23, w13
283    stp     w20, w21, [x0]
284    add     w24, w24, w14
285    add     w25, w25, w15
286    stp     w22, w23, [x0, #4*2]
287    add     w26, w26, w16
288    add     w27, w27, w17
289    stp     w24, w25, [x0, #4*4]
290    stp     w26, w27, [x0, #4*6]
291
292    ldr     x16, [sp, #104]
293    /* If the remaining length is not processed, the processing continues for 64 rounds. */
294    cbnz    x30, .Lloop_compress_64
295
296    /* The function returns */
297    ldp     x19, x20, [sp, #8*2]
298    ldp     x21, x22, [sp, #8*4]
299    ldp     x23, x24, [sp, #8*6]
300    ldp     x25, x26, [sp, #8*8]
301    ldp     x27, x28, [sp, #8*10]
302    ldp     x29, x30, [sp], #112
303.Lend_sha256:
304    ret
305    .size SHA256CompressMultiBlocks, .-SHA256CompressMultiBlocks
306
307/*
308 *  Function Description:Performs 64 rounds of compression calculation based on the input plaintext data
309 *                        and updates the hash value
310 *  Function prototype:void SHA256CryptoExt(uint32_t hash[8], const uint8_t *in, uint32_t num);
311 *  Input register:
312 *         x0: Storage address of the hash value
313 *         x1: Pointer to the input data address
314 *         x2: Number of 64 rounds of cycles
315 *  Modify the register: x1-x4, v0-v5, v16-v23
316 *  Output register: None
317 *  Function/Macro Call: None
318 *
319 */
320    .text
321    .balign 16
322    .type SHA256CryptoExt, %function
323SHA256CryptoExt:
324    ld1     {v4.4s-v5.4s}, [x0]
325.Lloop_compress_64_ext:
326    adrp    x4, .K256
327    add	    x4, x4, :lo12:.K256
328    sub     x2, x2, #1
329    /* 0-15 */
330    ld1     {v16.16b-v19.16b}, [x1], #64
331
332    mov     v0.16b, v4.16b
333    mov     v1.16b, v5.16b
334
335    rev32       v16.16b, v16.16b
336    ld1         {v20.4s}, [x4], #16
337    rev32       v17.16b, v17.16b
338    ld1         {v21.4s}, [x4], #16
339    rev32       v18.16b, v18.16b
340    ld1         {v22.4s}, [x4], #16
341
342    add         v20.4s, v20.4s, v16.4s
343
344    rev32       v19.16b, v19.16b
345    ld1         {v23.4s}, [x4], #16
346
347    sha256su0   v16.4s, v17.4s
348    mov         v2.16b, v0.16b
349    sha256h     q0, q1, v20.4s
350    sha256h2    q1, q2, v20.4s
351    add         v21.4s, v21.4s, v17.4s
352    sha256su1   v16.4s, v18.4s, v19.4s
353    ld1         {v20.4s}, [x4], #16
354
355    sha256su0   v17.4s, v18.4s
356    mov         v3.16b, v0.16b
357    sha256h     q0, q1, v21.4s
358    sha256h2    q1, q3, v21.4s
359    add         v22.4s, v22.4s, v18.4s
360    sha256su1   v17.4s, v19.4s, v16.4s
361    ld1         {v21.4s}, [x4], #16
362
363    sha256su0   v18.4s, v19.4s
364    mov         v2.16b, v0.16b
365    sha256h     q0, q1, v22.4s
366    sha256h2    q1, q2, v22.4s
367    add         v23.4s, v23.4s, v19.4s
368    sha256su1   v18.4s, v16.4s, v17.4s
369    ld1         {v22.4s}, [x4], #16
370
371    sha256su0   v19.4s, v16.4s
372    mov         v3.16b, v0.16b
373    sha256h     q0, q1, v23.4s
374    sha256h2    q1, q3, v23.4s
375    add         v20.4s, v20.4s, v16.4s
376    sha256su1   v19.4s, v17.4s, v18.4s
377    ld1         {v23.4s}, [x4], #16
378
379    /* 16-31 */
380    sha256su0   v16.4s, v17.4s
381    mov         v2.16b, v0.16b
382    sha256h     q0, q1, v20.4s
383    sha256h2    q1, q2, v20.4s
384    add         v21.4s, v21.4s, v17.4s
385    sha256su1   v16.4s, v18.4s, v19.4s
386    ld1         {v20.4s}, [x4], #16
387
388    sha256su0   v17.4s, v18.4s
389    mov         v3.16b, v0.16b
390    sha256h     q0, q1, v21.4s
391    sha256h2    q1, q3, v21.4s
392    add         v22.4s, v22.4s, v18.4s
393    sha256su1   v17.4s, v19.4s, v16.4s
394    ld1         {v21.4s}, [x4], #16
395
396    mov         v2.16b, v0.16b
397    sha256su0   v18.4s, v19.4s
398    sha256h     q0, q1, v22.4s
399    sha256h2    q1, q2, v22.4s
400    add         v23.4s, v23.4s, v19.4s
401    sha256su1   v18.4s, v16.4s, v17.4s
402    ld1         {v22.4s}, [x4], #16
403
404    sha256su0   v19.4s, v16.4s
405    mov         v3.16b, v0.16b
406    sha256h     q0, q1, v23.4s
407    sha256h2    q1, q3, v23.4s
408    add         v20.4s, v20.4s, v16.4s
409    sha256su1   v19.4s, v17.4s, v18.4s
410    ld1         {v23.4s}, [x4], #16
411
412    /* 32-47 */
413    sha256su0   v16.4s, v17.4s
414    mov         v2.16b, v0.16b
415    sha256h     q0, q1, v20.4s
416    sha256h2    q1, q2, v20.4s
417    add         v21.4s, v21.4s, v17.4s
418    sha256su1   v16.4s, v18.4s, v19.4s
419    ld1         {v20.4s}, [x4], #16
420
421    sha256su0   v17.4s, v18.4s
422    mov         v3.16b, v0.16b
423    sha256h     q0, q1, v21.4s
424    sha256h2    q1, q3, v21.4s
425    add         v22.4s, v22.4s, v18.4s
426
427    sha256su1   v17.4s, v19.4s, v16.4s
428    ld1         {v21.4s}, [x4], #16
429
430    sha256su0   v18.4s, v19.4s
431    mov         v2.16b, v0.16b
432    sha256h     q0, q1, v22.4s
433    sha256h2    q1, q2, v22.4s
434    add         v23.4s, v23.4s, v19.4s
435    sha256su1   v18.4s, v16.4s, v17.4s
436    ld1         {v22.4s}, [x4], #16
437
438
439    sha256su0   v19.4s, v16.4s
440    mov         v3.16b, v0.16b
441    sha256h     q0, q1, v23.4s
442    sha256h2    q1, q3, v23.4s
443    add         v20.4s, v20.4s, v16.4s
444    sha256su1   v19.4s, v17.4s, v18.4s
445    ld1         {v23.4s}, [x4], #16
446    /* 48-63 */
447    mov         v2.16b, v0.16b
448    sha256h     q0, q1, v20.4s
449    add         v21.4s, v21.4s, v17.4s
450    sha256h2    q1, q2, v20.4s
451
452    mov         v3.16b, v0.16b
453    sha256h     q0, q1, v21.4s
454    add         v22.4s, v22.4s, v18.4s
455    sha256h2    q1, q3, v21.4s
456
457    mov         v2.16b, v0.16b
458    sha256h     q0, q1, v22.4s
459    add         v23.4s, v23.4s, v19.4s
460    sha256h2    q1, q2, v22.4s
461
462    mov         v3.16b, v0.16b
463    sha256h     q0, q1, v23.4s
464    sha256h2    q1, q3, v23.4s
465    /* Add the original hash value */
466    add     v4.4s, v4.4s, v0.4s
467    add     v5.4s, v5.4s, v1.4s
468    cbnz    x2, .Lloop_compress_64_ext
469
470    /* Output result */
471    st1     {v4.4s-v5.4s}, [x0]
472    ret
473    .size SHA256CryptoExt, .-SHA256CryptoExt
474#endif
475