• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_SHA512
18
19.arch    armv8-a+crypto
20/* sha512 used constant value. For the data source, see the RFC4634 document. */
21.section .rodata
22.balign 64
23.K512:
24    .quad    0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
25    .quad    0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
26    .quad    0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
27    .quad    0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694
28    .quad    0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
29    .quad    0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
30    .quad    0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4
31    .quad    0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70
32    .quad    0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
33    .quad    0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b
34    .quad    0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30
35    .quad    0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8
36    .quad    0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
37    .quad    0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
38    .quad    0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec
39    .quad    0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b
40    .quad    0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
41    .quad    0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b
42    .quad    0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
43    .quad    0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
44
45/**
46 *  Macro description: Update the processed 64-bit plaintext information W.
47 *  Input register:
48 *      wi_16:  W[i-16]
49 *      wi_15: W[i-15]
50 *      wi_7: W[i-7]
51 *      wi_2: W[i-2]
52 *  Modify the register: wi_16 x17 x28.
53 *  Output register:
54 *      wi_16: latest W[i] value, W[i] = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16]
55 *  Function/Macro Call: None
56 */
57    .macro  UPDATE_W        wi_16, wi_15, wi_7, wi_2
58    ror     x28, \wi_15, #1
59    ror     x17, \wi_2, #19
60    eor     x28, x28, \wi_15, ror#8
61    eor     x17, x17, \wi_2, ror#61
62    eor     x28, x28, \wi_15, lsr#7
63    eor     x17, x17, \wi_2, lsr#6
64    add     \wi_16, \wi_16, \wi_7
65    add     \wi_16, \wi_16, x28
66    add     \wi_16, \wi_16, x17
67    .endm
68
69/**
70 *  Macro description: Processes the update of a hash value in 80 rounds of compression.
71 *  Input register:
72 *      x19:  indicates the address of the corresponding element in the g_k512 constant.
73 *      wi:   plaintext data after processing
74 *      a - h: intermediate variable of the hash value
75 *  Modify the register: h d x16 x17 x28 x29
76 *  Output register:
77 *      h: value after a round of cyclic update
78 *      d: value after a round of cyclic update
79 *  Function/Macro Call: None
80 */
81    .macro ONE_ROUND         wi, a, b, c, d, e, f, g, h
82    ldr    x16, [x19], #8           // K[i]
83    add    \h, \h, x16              // h += K[i]
84    add    \h, \h, \wi              // h += W[i]
85
86    and    x17, \f, \e              // e&f
87    bic    x28, \g, \e              // g&(~e)
88    orr    x17, x17, x28            // Ch(e, f, g) = e&f | g&(~e)
89    add    \h, \h, x17              // h += Ch(e, f, g)
90
91    eor    x29, \e, \e, ror#23
92    ror    x16, \e, #14
93    eor    x29, x16, x29, ror#18    // Sigma1(e) = ROR(e, 14) ^ ROR(e, 18) ^ ROR(e, 41)
94    add    \h, \h, x29              // h += Sigma1(e)
95
96    eor    x17, \a, \b              // a^b
97    eor    x28, \a, \c              // a^c
98    and    x28, x28, x17            // (a^b)&(a^c)
99    eor    x28, x28, \a             // Maj(a, b, c) = ((a^b)&(a^c))^a = (a&b)^(b&c)^(a&c)
100
101    add    \d, \d, \h               // d += h
102    add    \h, \h, x28              // h += Maj(a, b, c)
103
104    eor    x29, \a, \a, ror#5
105    ror    x16, \a, #28
106    eor    x29, x16, x29, ror#34    // Sigma0(a) = ROR(a, 28)^ROR(a, 34)^ROR(a, 39)
107    add    \h, \h, x29              // h += Sigma0(a)
108    .endm
109
110/**
111 *  Function description: Performs 80 rounds of compression calculation
112 *based on the input plaintext data and updates the hash value.
113 *  Function prototype: void SHA512CompressMultiBlocks(uint64_t hash[8], const uint8_t *in, uint32_t num);
114 *  Input register:
115 *         x0: indicates the storage address of the hash value.
116 *         x1: pointer to the input data address
117 *         x2: number of 80 rounds of cycles. The value is the input data length divided by 128.
118 *  Change register: x0-x17.
119 *  Output register: None
120 *  Function/Macro Call: None
121 *
122 */
123    .text
124    .balign 16
125    .global SHA512CompressMultiBlocks
126    .type SHA512CompressMultiBlocks, %function
127SHA512CompressMultiBlocks:
128    cbz     x2, .Lend_sha512
129    stp     x29, x30, [sp, #-112]!
130    add     x29, sp, #0
131    stp     x19, x20, [sp, #8*2]
132    stp     x21, x22, [sp, #8*4]
133    stp     x23, x24, [sp, #8*6]
134    stp     x25, x26, [sp, #8*8]
135    stp     x27, x28, [sp, #8*10]
136
137    /* load a - h */
138    ldp     x20, x21, [x0]
139    ldp     x22, x23, [x0, #8*2]
140    ldp     x24, x25, [x0, #8*4]
141    ldp     x26, x27, [x0, #8*6]
142
143    str     x0, [sp, #96]
144    mov     x16, x1 // input Value Address
145    lsl     x30, x2, #2
146
147.Lloop_compress_80:
148    /* Start 80 rounds of processing */
149    adrp    x19, .K512
150    add	    x19, x19, :lo12:.K512
151    ldp     x0, x1, [x16] // Load input values.
152    ldp     x2, x3, [x16, #8*2]
153    ldp     x4, x5, [x16, #8*4]
154    ldp     x6, x7, [x16, #8*6]
155    ldp     x8, x9, [x16, #8*8]
156    ldp     x10, x11, [x16, #8*10]
157    ldp     x12, x13, [x16, #8*12]
158    ldp     x14, x15, [x16, #8*14]
159
160    add     x16, x16, #8*16
161    str     x16, [sp, #104]
162#ifndef	HITLS_BIG_ENDIAN
163    rev     x0, x0
164    rev     x1, x1
165    rev     x2, x2
166    rev     x3, x3
167    rev     x4, x4
168    rev     x5, x5
169    rev     x6, x6
170    rev     x7, x7
171    rev     x8, x8
172    rev     x9, x9
173    rev     x10, x10
174    rev     x11, x11
175    rev     x12, x12
176    rev     x13, x13
177    rev     x14, x14
178    rev     x15, x15
179#endif
180    /* x16 x17 x28 x29 used as a temporary register */
181    ONE_ROUND   x0, x20, x21, x22, x23, x24, x25, x26, x27
182    ONE_ROUND   x1, x27, x20, x21, x22, x23, x24, x25, x26
183    ONE_ROUND   x2, x26, x27, x20, x21, x22, x23, x24, x25
184    ONE_ROUND   x3, x25, x26, x27, x20, x21, x22, x23, x24
185
186    ONE_ROUND   x4, x24, x25, x26, x27, x20, x21, x22, x23
187    ONE_ROUND   x5, x23, x24, x25, x26, x27, x20, x21, x22
188    ONE_ROUND   x6, x22, x23, x24, x25, x26, x27, x20, x21
189    ONE_ROUND   x7, x21, x22, x23, x24, x25, x26, x27, x20
190
191    ONE_ROUND   x8, x20, x21, x22, x23, x24, x25, x26, x27
192    ONE_ROUND   x9, x27, x20, x21, x22, x23, x24, x25, x26
193    ONE_ROUND   x10, x26, x27, x20, x21, x22, x23, x24, x25
194    ONE_ROUND   x11, x25, x26, x27, x20, x21, x22, x23, x24
195
196    ONE_ROUND   x12, x24, x25, x26, x27, x20, x21, x22, x23
197    ONE_ROUND   x13, x23, x24, x25, x26, x27, x20, x21, x22
198    ONE_ROUND   x14, x22, x23, x24, x25, x26, x27, x20, x21
199    ONE_ROUND   x15, x21, x22, x23, x24, x25, x26, x27, x20
200
201.Lloop_compress_16_79:
202    /* Start 16 - 31, 32 - 47, 48 - 63, 64 - 79 compression */
203    sub     x30, x30, #1
204
205    /* 0 */
206    UPDATE_W    x0, x1, x9, x14
207    ONE_ROUND   x0, x20, x21, x22, x23, x24, x25, x26, x27
208
209    /* 1 */
210    UPDATE_W    x1, x2, x10, x15
211    ONE_ROUND   x1, x27, x20, x21, x22, x23, x24, x25, x26
212
213    /* 2 */
214    UPDATE_W    x2, x3, x11, x0
215    ONE_ROUND   x2, x26, x27, x20, x21, x22, x23, x24, x25
216
217    /* 3 */
218    UPDATE_W    x3, x4, x12, x1
219    ONE_ROUND   x3, x25, x26, x27, x20, x21, x22, x23, x24
220
221    /* 4 */
222    UPDATE_W    x4, x5, x13, x2
223    ONE_ROUND   x4, x24, x25, x26, x27, x20, x21, x22, x23
224
225    /* 5 */
226    UPDATE_W    x5, x6, x14, x3
227    ONE_ROUND   x5, x23, x24, x25, x26, x27, x20, x21, x22
228
229    /* 6 */
230    UPDATE_W    x6, x7, x15, x4
231    ONE_ROUND   x6, x22, x23, x24, x25, x26, x27, x20, x21
232
233    /* 7 */
234    UPDATE_W    x7, x8, x0, x5
235    ONE_ROUND   x7, x21, x22, x23, x24, x25, x26, x27, x20
236
237    /* 8 */
238    UPDATE_W    x8, x9, x1, x6
239    ONE_ROUND   x8, x20, x21, x22, x23, x24, x25, x26, x27
240
241    /* 9 */
242    UPDATE_W    x9, x10, x2, x7
243    ONE_ROUND   x9, x27, x20, x21, x22, x23, x24, x25, x26
244
245    /* 10 */
246    UPDATE_W    x10, x11, x3, x8
247    ONE_ROUND   x10, x26, x27, x20, x21, x22, x23, x24, x25
248
249    /* 11 */
250    UPDATE_W    x11, x12, x4, x9
251    ONE_ROUND   x11, x25, x26, x27, x20, x21, x22, x23, x24
252
253    /* 12 */
254    UPDATE_W    x12, x13, x5, x10
255    ONE_ROUND   x12, x24, x25, x26, x27, x20, x21, x22, x23
256
257    /* 13 */
258    UPDATE_W    x13, x14, x6, x11
259    ONE_ROUND   x13, x23, x24, x25, x26, x27, x20, x21, x22
260
261    /* 14 */
262    UPDATE_W    x14, x15, x7, x12
263    ONE_ROUND   x14, x22, x23, x24, x25, x26, x27, x20, x21
264
265    /* 15 */
266    UPDATE_W    x15, x0, x8, x13
267    ONE_ROUND   x15, x21, x22, x23, x24, x25, x26, x27, x20
268
269    /* If the processing length is not 80, continue the loop. */
270    tst     x30, #3
271    bne     .Lloop_compress_16_79
272
273    /* Stores a - h information. */
274    ldr     x0, [sp, #96]
275
276    ldp     x10, x11, [x0]
277    ldp     x12, x13, [x0, #8*2]
278    ldp     x14, x15, [x0, #8*4]
279    ldp     x16, x17, [x0, #8*6]
280
281    add     x20, x20, x10
282    add     x21, x21, x11
283    add     x22, x22, x12
284    add     x23, x23, x13
285    add     x24, x24, x14
286    add     x25, x25, x15
287    add     x26, x26, x16
288    add     x27, x27, x17
289
290    stp     x20, x21, [x0]
291    stp     x22, x23, [x0, #8*2]
292    stp     x24, x25, [x0, #8*4]
293    stp     x26, x27, [x0, #8*6]
294
295    ldr     x16, [sp, #104]
296    /* If the remaining length is not processed, continue to process 80 rounds. */
297    cbnz    x30, .Lloop_compress_80
298
299    /* The function returns */
300    ldp     x19, x20, [sp, #8*2]
301    ldp     x21, x22, [sp, #8*4]
302    ldp     x23, x24, [sp, #8*6]
303    ldp     x25, x26, [sp, #8*8]
304    ldp     x27, x28, [sp, #8*10]
305    ldp     x29, x30, [sp], #112
306.Lend_sha512:
307    ret
308    .size SHA512CompressMultiBlocks, .-SHA512CompressMultiBlocks
309
310#endif
311