1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_CHACHA20) && defined(HITLS_CRYPTO_CHACHA20POLY1305)
18
19 #include "bsl_sal.h"
20 #include "crypt_utils.h"
21 #include "poly1305_core.h"
22
23 // Information required by initializing the assembly,
24 // for example, ctx->table. However, the C language does not calculate the table.
Poly1305InitForAsm(Poly1305Ctx * ctx)25 void Poly1305InitForAsm(Poly1305Ctx *ctx)
26 {
27 (void)ctx;
28 return;
29 }
30
31
32 // Operation for blocks. The return value is the length of the remaining unprocessed data.
Poly1305Block(Poly1305Ctx * ctx,const uint8_t * data,uint32_t dataLen,uint32_t padbit)33 uint32_t Poly1305Block(Poly1305Ctx *ctx, const uint8_t *data, uint32_t dataLen, uint32_t padbit)
34 {
35 uint32_t a[5], r[4];
36 uint64_t b[8];
37 // RFC_7539-2.5.1 for loop internal operation
38 a[0] = ctx->acc[0];
39 a[1] = ctx->acc[1];
40 a[2] = ctx->acc[2];
41 a[3] = ctx->acc[3];
42 a[4] = ctx->acc[4];
43 r[0] = ctx->r[0];
44 r[1] = ctx->r[1];
45 r[2] = ctx->r[2];
46 r[3] = ctx->r[3];
47
48 const uint8_t *off = data;
49 uint32_t len = dataLen;
50
51 while (len >= POLY1305_BLOCKSIZE) {
52 // a = acc + inputret
53 b[0] = (uint64_t)a[0] + GET_UINT32_LE(off, 0);
54 b[1] = (uint64_t)a[1] + GET_UINT32_LE(off, 4) + (b[0] >> 32);
55 b[2] = (uint64_t)a[2] + GET_UINT32_LE(off, 8) + (b[1] >> 32);
56 b[3] = (uint64_t)a[3] + GET_UINT32_LE(off, 12) + (b[2] >> 32);
57
58 a[0] = (uint32_t)b[0];
59 a[1] = (uint32_t)b[1];
60 a[2] = (uint32_t)b[2];
61 a[3] = (uint32_t)b[3];
62 // Upper 32 bits of b[3] carry to a[4]. Because a[4] <= 4, this processing can never overflow
63 a[4] += (uint32_t)(b[3] >> 32) + padbit;
64
65 /* Lower bits of the data product. Because the high bits of each term of r are processed,
66 there is no carry in the following polynomial multiplication and addition. */
67 b[0] = (uint64_t)a[0] * r[0];
68 b[1] = (uint64_t)a[0] * r[1] + (uint64_t)a[1] * r[0];
69 b[2] = (uint64_t)a[0] * r[2] + (uint64_t)a[1] * r[1] + (uint64_t)a[2] * r[0];
70 b[3] = (uint64_t)a[0] * r[3] + (uint64_t)a[1] * r[2] + (uint64_t)a[2] * r[1] + (uint64_t)a[3] * r[0];
71
72 /**
73 * Higher bits of the data product. Because the high bits of each term of r are processed,
74 * there is no carry in the following polynomial multiplication and addition.
75 */
76 // (Ensure that the calculation (b[4] * 5) does not overflow, calculate (a[4] * r[0]) items later.)
77 b[4] = (uint64_t)a[1] * r[3] + (uint64_t)a[2] * r[2] + (uint64_t)a[3] * r[1];
78 b[5] = (uint64_t)a[2] * r[3] + (uint64_t)a[3] * r[2] + (uint64_t)a[4] * r[1];
79 b[6] = (uint64_t)a[3] * r[3] + (uint64_t)a[4] * r[2];
80 b[7] = (uint64_t)a[4] * r[3];
81 /**
82 * The upper bits are multiplied by 5/4, because r1, r[2], r3 is processed,
83 * so the above values are divisible by 4. Because the high bits of each term of r are processed,
84 * there is no carry in the following polynomial multiplication and addition: (3 * 5) < 0xF
85 */
86 b[4] = (b[4] >> 2) + b[4];
87 b[5] = (b[5] >> 2) + b[5];
88 b[6] = (b[6] >> 2) + b[6];
89 b[7] = (b[7] >> 2) + b[7];
90 /* After offset 130 bits, the combination is obtained a0 = b[4] * 5 + b[0]....
91 Because the high bits of each term of r are processed,
92 there is no carry in the following polynomial multiplication and addition. */
93 b[0] += (b[4] & 0xFFFFFFFF);
94 b[1] += (b[0] >> 32) + (b[4] >> 32) + (b[5] & 0xFFFFFFFF);
95 b[2] += (b[1] >> 32) + (b[5] >> 32) + (b[6] & 0xFFFFFFFF);
96 b[3] += (b[2] >> 32) + (b[6] >> 32) + (b[7] & 0xFFFFFFFF);
97 a[4] = a[4] * r[0] + (uint32_t)(b[3] >> 32) + (uint32_t)(b[7] >> 32);
98 b[0] = (uint32_t)b[0];
99 b[1] = (uint32_t)b[1];
100 b[2] = (uint32_t)b[2];
101 b[3] = (uint32_t)b[3];
102 // Shift the upper bits of a4 by 130 bits and then multiply it by 5.
103 // The amount of a4 data is small and carry cannot be occurred.
104 b[0] += (a[4] >> 2) + (a[4] & 0xFFFFFFFC);
105 a[4] &= 0x3;
106
107 /* Process carry */
108 b[1] += (b[0] >> 32);
109 b[2] += (b[1] >> 32);
110 b[3] += (b[2] >> 32);
111 a[4] += (uint32_t)(b[3] >> 32);
112
113 a[0] = (uint32_t)b[0];
114 a[1] = (uint32_t)b[1];
115 a[2] = (uint32_t)b[2];
116 a[3] = (uint32_t)b[3];
117 len -= POLY1305_BLOCKSIZE;
118 off += POLY1305_BLOCKSIZE;
119 }
120
121 ctx->acc[0] = a[0];
122 ctx->acc[1] = a[1];
123 ctx->acc[2] = a[2];
124 ctx->acc[3] = a[3];
125 ctx->acc[4] = a[4];
126
127 // Clear sensitive information.
128 BSL_SAL_CleanseData(a, sizeof(a));
129 BSL_SAL_CleanseData(r, sizeof(r));
130 BSL_SAL_CleanseData(b, sizeof(b));
131 return len;
132 }
133
Poly1305Last(Poly1305Ctx * ctx,uint8_t mac[POLY1305_TAGSIZE])134 void Poly1305Last(Poly1305Ctx *ctx, uint8_t mac[POLY1305_TAGSIZE])
135 {
136 uint32_t a[5];
137 uint64_t b[5];
138 a[0] = ctx->acc[0];
139 a[1] = ctx->acc[1];
140 a[2] = ctx->acc[2];
141 a[3] = ctx->acc[3];
142 a[4] = ctx->acc[4];
143 /* Check whether it is greater than p. */
144 b[0] = (uint64_t)(a[0]) + 5;
145 b[1] = a[1] + (b[0] >> 32);
146 b[2] = a[2] + (b[1] >> 32);
147 b[3] = a[3] + (b[2] >> 32);
148 b[4] = a[4] + (b[3] >> 32);
149 /* Obtain the mask. If there is a carry, the number is greater than p. */
150 if ((b[4] & 0x4) == 0) { // b[4] & 0x4 is bit131.
151 b[0] = a[0];
152 b[1] = a[1];
153 b[2] = a[2];
154 b[3] = a[3];
155 }
156 // Adding s at the end does not require modulo processing.
157 b[0] = ctx->s[0] + (b[0] & 0xffffffff);
158 b[1] = ctx->s[1] + (b[1] & 0xffffffff) + (b[0] >> 32);
159 b[2] = ctx->s[2] + (b[2] & 0xffffffff) + (b[1] >> 32);
160 b[3] = ctx->s[3] + (b[3] & 0xffffffff) + (b[2] >> 32);
161 PUT_UINT32_LE(b[0], mac, 0);
162 PUT_UINT32_LE(b[1], mac, 4);
163 PUT_UINT32_LE(b[2], mac, 8);
164 PUT_UINT32_LE(b[3], mac, 12);
165
166 // Clear sensitive information.
167 BSL_SAL_CleanseData(a, sizeof(a));
168 BSL_SAL_CleanseData(b, sizeof(b));
169 }
170
171 // Clear the residual sensitive information in the register.
172 // This function is implemented only when the assembly function is enabled.
Poly1305CleanRegister(void)173 void Poly1305CleanRegister(void)
174 {
175 return;
176 }
177 #endif