• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 *     http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16#include "hitls_build.h"
17#ifdef HITLS_CRYPTO_MD5
18
19.file   "md5_x86_64.S"
20
21.set TEMP1, %r14d
22.set TEMP2, %r15d
23
24.set    T, %r13d
25.set    W, %r12d
26
27.set    T_ORIGIN_ADDR, %rcx
28.set    HASH, %rdi
29.set    INPUT, %rsi
30.set    NUM, %rdx
31
32.set    S11, 7
33.set    S12, 12
34.set    S13, 17
35.set    S14, 22
36.set    S21, 5
37.set    S22, 9
38.set    S23, 14
39.set    S24, 20
40.set    S31, 4
41.set    S32, 11
42.set    S33, 16
43.set    S34, 23
44.set    S41, 6
45.set    S42, 10
46.set    S43, 15
47.set    S44, 21
48
49.set    A, %r8d
50.set    B, %r9d
51.set    C, %r10d
52.set    D, %r11d
53
54/* MD5 Used constant value. For details about the data source, see the RFC1321 document. */
55    .text
56    .align 64
57    .type	g_tMd5, %object
58g_tMd5:
59    .long   0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE
60    .long   0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501
61    .long   0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE
62    .long   0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821
63
64    .long   0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA
65    .long   0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8
66    .long   0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED
67    .long   0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A
68
69    .long   0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C
70    .long   0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70
71    .long   0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05
72    .long   0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665
73
74    .long   0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039
75    .long   0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1
76    .long   0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1
77    .long   0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391
78.size    g_tMd5, .-g_tMd5
79
80/*
81 *  Macro description: The FF function processes the update of a hash value in a round of 0-15 compression.
82 *  Input register:
83 *       wAddr: sequence corresponding to W (wi)
84 *       tAddr: order (ti) corresponding to t
85 *       a - d: intermediate variable of the hash value
86 *  Change register: r8d-r15d
87 *  Output register:
88 *           a: indicates the value after a round of cyclic update.
89 *  Function/Macro Call: None
90 *  Implementation description:
91 *          Parameter: S11->28, S12->48, S13->17, S14->22
92 *          T2 = BSIG0(a) + MAJ(a,b,c)
93 *          a = b + ROTL32(F(a,b,c)+x+ac),s)
94 *               F(X,Y,Z) = XY v not(X) Z
95 *        G(X,Y,Z) = XZ v Y not(Z)
96 *        H(X,Y,Z) = X xor Y xor Z
97 *        I(X,Y,Z) = Y xor (X v not(Z))
98 */
99.macro FF_ONE_ROUND     a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2
100    mov     \tAddr(T_ORIGIN_ADDR), \t
101    mov     \wAddr(INPUT), \w
102
103    /* F(b, c, d) ((b & c) | ((~b) & d)) */
104    mov \b, \temp1
105    andn    \d, \b, \temp2        // (~b) & d
106    and \c, \temp1               // b & c
107    or  \temp1, \temp2            // (b & c) | ((~b) & d)
108
109    /* (a) += F((b), (c), (d)) + (\w) + (\t) */
110    add \w, \a
111    add \t, \a
112    add \temp2, \a
113
114    /* (a) = ROTL32((a), (s))  */
115    rol $\s, \a
116
117    /* (a) += (b) */
118    add \b, \a
119.endm
120
121/*
122 *  Macro description: The GG function updates a round of hash values in rounds 16-31 compression.
123 *  Input register:
124 *       wAddr:  sequence corresponding to W (wi)
125 *       tAddr:  order (ti) corresponding to t
126 *       a - d:  intermediate variable of the hash value
127 *  Change register:  r8d-r15d
128 *  Output register:
129 *           a:  indicates the value after a round of cyclic update.
130 *  Function/Macro Call: None
131 *  Implementation description:
132 *          For t = 0 to 63, T1 = h + BSIG1(e) + CH(e,f,g) + Kt + Wt
133 *          T2 = BSIG0(a) + MAJ(a,b,c)
134 *          h = g, g = f, f = e, e = d + T1, d = c, c = b, b = a, a = T1 + T2
135 *             G(x, y, z) (((x) & (z)) | ((y) & (~(z))))
136 *       (a) += G((b), (c), (d)) + (x) + (ac);
137 *      (a) = ROTL32((a), (s));
138 *      (a) += (b);
139 */
140.macro GG_ONE_ROUND     a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2
141    mov     \tAddr(T_ORIGIN_ADDR), \t
142    mov     \wAddr(INPUT), \w
143
144    /* G(x, y, z) ((b & d) | (c & (~d))) */
145    mov \b, \temp1
146    and \d, \temp1
147    andn    \c, \d, \temp2
148    or \temp1, \temp2
149
150    /* (a) += G((b), (c), (d)) + (\w) + (t) */
151    add \t, \a
152    add \w, \a
153    add \temp2, \a
154
155    /* (a) = ROTL32((a), (s)) */
156    rol $\s, \a
157
158    /* (a) += (b) */
159    add \b, \a
160.endm
161
162/*
163 *  Macro description: The HH function processes the update of a hash value in a round of 32-47 compression.
164 *  Input register:
165 *      wAddr: sequence corresponding to W (wi)
166 *      tAddr: order (ti) corresponding to t
167 *      a - d: intermediate variable of the hash value
168 *  Change register:  r8d-r15d
169 *  Output register:
170 *           a: indicates the value after a round of cyclic update.
171 *  Function/Macro Call: None
172 *  Implementation description:
173 *
174 *          H(x, y, z) ((x) ^ (y) ^ (z))
175 *          (a) += H((b), (c), (d)) + (x) + (ac);
176 *          (a) = ROTL32((a), (s));
177 *          (a) += (b);
178 *          b and c ->next c and d
179 *          swap \temp2 temp4 for next round
180 */
181.macro HH_ONE_ROUND     a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2
182    mov     \tAddr(T_ORIGIN_ADDR), \t
183    mov     \wAddr(INPUT), \w
184
185    /* H(x, y, z) (b ^ c ^ d) */
186    mov \b, \temp1
187    xor \d, \temp1
188    xor \c, \temp1
189
190    /* (a) += H((b), (c), (d)) + (\w) + (\t) */
191    add \t, \a
192    add \w, \a
193    add \temp1, \a
194
195    /* (a) = ROTL32((a), (s)) */
196    rol $\s, \a
197
198    /* (a) += (b) */
199    add \b, \a
200.endm
201
202/*
203 *  Macro description: Processes the update of a hash value in a round of 48-63 compression.
204 *  Input register:
205 *     wAddr: Sequence corresponding to W (wi)
206 *     tAddr: Order (ti) corresponding to t
207 *     a - d: Intermediate variable of the hash value
208 *  Change register: r8d-r15d.
209 *  Output register:
210 *         a: indicates the value after a round of cyclic update.
211 *  Function/Macro Call: None
212 *  Implementation description:
213 *          For t = 0 to 63, T1 = h + BSIG1(e) + CH(e,f,g) + Kt + Wt
214 *          T2 = BSIG0(a) + MAJ(a,b,c)
215 *          h = g, g = f, f = e, e = d + T1, d = c, c = b, b = a, a = T1 + T2
216 *          I(x, y, z) ((y) ^ ((x) | (~(z))))
217 *      (a) += I((b), (c), (d)) + (x) + (ac); \
218 *      (a) = ROTL32((a), (s));               \
219 *      (a) += (b);
220 *          swap \temp2 temp4 for next round
221 */
222.macro II_ONE_ROUND     a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2
223    mov     \tAddr(T_ORIGIN_ADDR), \t
224    mov     \wAddr(INPUT), \w
225
226    /* I(b, c, d) (c ^ (b | (~d))) */
227    mov \d, \temp1
228    not \temp1
229    or  \b, \temp1
230    xor \c, \temp1
231
232    /* (a) += I((b), (c), (d)) + (\w) + (\t); */
233    add \t, \a
234    add \w, \a
235    add \temp1, \a
236
237    /* (a) = ROTL32((a), (s)) */
238    rol $\s, \a
239
240    /* (a) += (b) */
241    add \b, \a
242.endm
243
244/*
245 *  Function description: Performs 64 rounds of compression calculation
246 *                        based on the input plaintext data and updates the hash value.
247 *  Function prototype: void MD5_Compress(uint32_t hash[32], const uint8_t *in, uint32_t num);
248 *  Input register:
249 *      rdi: Indicates the storage address of the hash value.
250 *      rsi: Pointer to the input data address (Wi)
251 *      rdx: Indicates the number of 64 rounds of cycles.
252 *           (You need to do several blocks, that is, you need to do several loops.)
253 *  Change register: rsi, r8d-r15d, rcx.
254 *  Output register: None
255 *  Function/Macro Call: FF_ONE_ROUND, GG_ONE_ROUND, HH_ONE_ROUND, II_ONE_ROUND
256 */
257.text
258.globl MD5_Compress
259.type MD5_Compress,%function
260.align 4
261MD5_Compress:
262.cfi_startproc
263    /* Push stack and pop stack protection */
264    pushq %r14
265    pushq %rbx
266    pushq %rbp
267    pushq %r12
268    pushq %r13
269    pushq %r15
270
271    /* r8d-r10d: a-d */
272    mov 0(%rdi), A
273    mov 4(%rdi), B
274    mov 8(%rdi), C
275    mov 12(%rdi), D
276
277.Lmd5_loop:
278    leaq    g_tMd5(%rip), T_ORIGIN_ADDR
279
280    /* LEND_MD5_FF_ROUND_ROUND_0_15 */
281    /* FF_ONE_ROUND      a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2 */
282    FF_ONE_ROUND         A, B, C, D, 0, S11, 0, W, T, TEMP1, TEMP2
283    FF_ONE_ROUND         D, A, B, C, 4, S12, 4, W, T, TEMP1, TEMP2
284    FF_ONE_ROUND         C, D, A, B, 8, S13, 8, W, T, TEMP1, TEMP2
285    FF_ONE_ROUND         B, C, D, A, 12, S14, 12, W, T, TEMP1, TEMP2
286
287    FF_ONE_ROUND         A, B, C, D, 16, S11, 16, W, T, TEMP1, TEMP2
288    FF_ONE_ROUND         D, A, B, C, 20, S12, 20, W, T, TEMP1, TEMP2
289    FF_ONE_ROUND         C, D, A, B, 24, S13, 24, W, T, TEMP1, TEMP2
290    FF_ONE_ROUND         B, C, D, A, 28, S14, 28, W, T, TEMP1, TEMP2
291
292    FF_ONE_ROUND         A, B, C, D, 32, S11, 32, W, T, TEMP1, TEMP2
293    FF_ONE_ROUND         D, A, B, C, 36, S12, 36, W, T, TEMP1, TEMP2
294    FF_ONE_ROUND         C, D, A, B, 40, S13, 40, W, T, TEMP1, TEMP2
295    FF_ONE_ROUND         B, C, D, A, 44, S14, 44, W, T, TEMP1, TEMP2
296
297    FF_ONE_ROUND         A, B, C, D, 48, S11, 48, W, T, TEMP1, TEMP2
298    FF_ONE_ROUND         D, A, B, C, 52, S12, 52, W, T, TEMP1, TEMP2
299    FF_ONE_ROUND         C, D, A, B, 56, S13, 56, W, T, TEMP1, TEMP2
300    FF_ONE_ROUND         B, C, D, A, 60, S14, 60, W, T, TEMP1, TEMP2
301
302    /* LEND_MD5_GG_ROUND_ROUND_16_31 */
303    /* GG_ONE_ROUND      a, b, c, d, wAddr, s, tAddr, w, t, temp1, temp2 */
304    GG_ONE_ROUND         A, B, C, D, 4, S21, 64, W, T, TEMP1, TEMP2
305    GG_ONE_ROUND         D, A, B, C, 24, S22, 68, W, T, TEMP1, TEMP2
306    GG_ONE_ROUND         C, D, A, B, 44, S23, 72, W, T, TEMP1, TEMP2
307    GG_ONE_ROUND         B, C, D, A, 0, S24, 76, W, T, TEMP1, TEMP2
308
309    GG_ONE_ROUND         A, B, C, D, 20, S21, 80, W, T, TEMP1, TEMP2
310    GG_ONE_ROUND         D, A, B, C, 40, S22, 84, W, T, TEMP1, TEMP2
311    GG_ONE_ROUND         C, D, A, B, 60, S23, 88, W, T, TEMP1, TEMP2
312    GG_ONE_ROUND         B, C, D, A, 16, S24, 92, W, T, TEMP1, TEMP2
313
314    GG_ONE_ROUND         A, B, C, D, 36, S21, 96, W, T, TEMP1, TEMP2
315    GG_ONE_ROUND         D, A, B, C, 56, S22, 100, W, T, TEMP1, TEMP2
316    GG_ONE_ROUND         C, D, A, B, 12, S23, 104, W, T, TEMP1, TEMP2
317    GG_ONE_ROUND         B, C, D, A, 32, S24, 108, W, T, TEMP1, TEMP2
318
319    GG_ONE_ROUND         A, B, C, D, 52, S21, 112, W, T, TEMP1, TEMP2
320    GG_ONE_ROUND         D, A, B, C, 8, S22, 116, W, T, TEMP1, TEMP2
321    GG_ONE_ROUND         C, D, A, B, 28, S23, 120, W, T, TEMP1, TEMP2
322    GG_ONE_ROUND         B, C, D, A, 48, S24, 124, W, T, TEMP1, TEMP2
323
324    /* LEND_MD5_HH_ROUND_ROUND_32_47 */
325    /* HH_ONE_ROUND      a,b,c,d,wAddr,s,tAddr, w, t, temp1, temp2 */
326    HH_ONE_ROUND         A, B, C, D, 20, S31, 128, W, T, TEMP1, TEMP2
327    HH_ONE_ROUND         D, A, B, C, 32, S32, 132, W, T, TEMP1, TEMP2
328    HH_ONE_ROUND         C, D, A, B, 44, S33, 136, W, T, TEMP1, TEMP2
329    HH_ONE_ROUND         B, C, D, A, 56, S34, 140, W, T, TEMP1, TEMP2
330
331    HH_ONE_ROUND         A, B, C, D, 4, S31, 144, W, T, TEMP1, TEMP2
332    HH_ONE_ROUND         D, A, B, C, 16, S32, 148, W, T, TEMP1, TEMP2
333    HH_ONE_ROUND         C, D, A, B, 28, S33, 152, W, T, TEMP1, TEMP2
334    HH_ONE_ROUND         B, C, D, A, 40, S34, 156, W, T, TEMP1, TEMP2
335
336    HH_ONE_ROUND         A, B, C, D, 52, S31, 160, W, T, TEMP1, TEMP2
337    HH_ONE_ROUND         D, A, B, C, 0, S32, 164, W, T, TEMP1, TEMP2
338    HH_ONE_ROUND         C, D, A, B, 12, S33, 168, W, T, TEMP1, TEMP2
339    HH_ONE_ROUND         B, C, D, A, 24, S34, 172, W, T, TEMP1, TEMP2
340
341    HH_ONE_ROUND         A, B, C, D, 36, S31, 176, W, T, TEMP1, TEMP2
342    HH_ONE_ROUND         D, A, B, C, 48, S32, 180, W, T, TEMP1, TEMP2
343    HH_ONE_ROUND         C, D, A, B, 60, S33, 184, W, T, TEMP1, TEMP2
344    HH_ONE_ROUND         B, C, D, A, 8, S34, 188, W, T, TEMP1, TEMP2
345
346    /* LEND_MD5_II_ROUND_ROUND_48_63 */
347    /* II_ONE_ROUND      a, b,c,d,wAddr,s,tAddr, w, t, temp1, temp2 */
348    II_ONE_ROUND         A, B, C, D, 0, S41, 192, W, T, TEMP1, TEMP2
349    II_ONE_ROUND         D, A, B, C, 28, S42, 196, W, T, TEMP1, TEMP2
350    II_ONE_ROUND         C, D, A, B, 56, S43, 200, W, T, TEMP1, TEMP2
351    II_ONE_ROUND         B, C, D, A, 20, S44, 204, W, T, TEMP1, TEMP2
352
353    II_ONE_ROUND         A, B, C, D, 48, S41, 208, W, T, TEMP1, TEMP2
354    II_ONE_ROUND         D, A, B, C, 12, S42, 212, W, T, TEMP1, TEMP2
355    II_ONE_ROUND         C, D, A, B, 40, S43, 216, W, T, TEMP1, TEMP2
356    II_ONE_ROUND         B, C, D, A, 4, S44, 220, W, T, TEMP1, TEMP2
357
358    II_ONE_ROUND         A, B, C, D, 32, S41, 224, W, T, TEMP1, TEMP2
359    II_ONE_ROUND         D, A, B, C, 60, S42, 228, W, T, TEMP1, TEMP2
360    II_ONE_ROUND         C, D, A, B, 24, S43, 232, W, T, TEMP1, TEMP2
361    II_ONE_ROUND         B, C, D, A, 52, S44, 236, W, T, TEMP1, TEMP2
362
363    II_ONE_ROUND         A, B, C, D, 16, S41, 240, W, T, TEMP1, TEMP2
364    II_ONE_ROUND         D, A, B, C, 44, S42, 244, W, T, TEMP1, TEMP2
365    II_ONE_ROUND         C, D, A, B, 8, S43, 248, W, T, TEMP1, TEMP2
366    II_ONE_ROUND         B, C, D, A, 36, S44, 252, W, T, TEMP1, TEMP2
367
368    /* Update the storage hash value. */
369    add 0(%rdi), A
370    add 4(%rdi), B
371    add 8(%rdi), C
372    add 12(%rdi), D
373    mov A, 0(%rdi)
374    mov B, 4(%rdi)
375    mov C, 8(%rdi)
376    mov D, 12(%rdi)
377    lea 64(INPUT), INPUT
378    sub $1, NUM
379    ja .Lmd5_loop
380
381.LEND_MD5_FINFISH_INITIAL:
382    /* Registers and pointers are reset. */
383    popq %r15
384    popq %r13
385    popq %r12
386    popq %rbp
387    popq %rbx
388    popq %r14
389    ret
390.cfi_endproc
391    .size   MD5_Compress, .-MD5_Compress
392
393#endif
394