• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of the openHiTLS project.
3  *
4  * openHiTLS is licensed under the Mulan PSL v2.
5  * You can use this software according to the terms and conditions of the Mulan PSL v2.
6  * You may obtain a copy of Mulan PSL v2 at:
7  *
8  *     http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  * See the Mulan PSL v2 for more details.
14  */
15 
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_CHACHA20) && defined(HITLS_CRYPTO_CHACHA20POLY1305)
18 
19 #include "bsl_sal.h"
20 #include "crypt_utils.h"
21 #include "poly1305_core.h"
22 
23 // Information required by initializing the assembly,
24 // for example, ctx->table. However, the C language does not calculate the table.
Poly1305InitForAsm(Poly1305Ctx * ctx)25 void Poly1305InitForAsm(Poly1305Ctx *ctx)
26 {
27     (void)ctx;
28     return;
29 }
30 
31 
32 // Operation for blocks. The return value is the length of the remaining unprocessed data.
Poly1305Block(Poly1305Ctx * ctx,const uint8_t * data,uint32_t dataLen,uint32_t padbit)33 uint32_t Poly1305Block(Poly1305Ctx *ctx, const uint8_t *data, uint32_t dataLen, uint32_t padbit)
34 {
35     uint32_t a[5], r[4];
36     uint64_t b[8];
37     // RFC_7539-2.5.1 for loop internal operation
38     a[0] = ctx->acc[0];
39     a[1] = ctx->acc[1];
40     a[2] = ctx->acc[2];
41     a[3] = ctx->acc[3];
42     a[4] = ctx->acc[4];
43     r[0] = ctx->r[0];
44     r[1] = ctx->r[1];
45     r[2] = ctx->r[2];
46     r[3] = ctx->r[3];
47 
48     const uint8_t *off = data;
49     uint32_t len = dataLen;
50 
51     while (len >= POLY1305_BLOCKSIZE) {
52         // a = acc + inputret
53         b[0] = (uint64_t)a[0] + GET_UINT32_LE(off, 0);
54         b[1] = (uint64_t)a[1] + GET_UINT32_LE(off, 4) + (b[0] >> 32);
55         b[2] = (uint64_t)a[2] + GET_UINT32_LE(off, 8) + (b[1] >> 32);
56         b[3] = (uint64_t)a[3] + GET_UINT32_LE(off, 12) + (b[2] >> 32);
57 
58         a[0] = (uint32_t)b[0];
59         a[1] = (uint32_t)b[1];
60         a[2] = (uint32_t)b[2];
61         a[3] = (uint32_t)b[3];
62         // Upper 32 bits of b[3] carry to a[4]. Because a[4] <= 4, this processing can never overflow
63         a[4] += (uint32_t)(b[3] >> 32) + padbit;
64 
65         /* Lower bits of the data product. Because the high bits of each term of r are processed,
66            there is no carry in the following polynomial multiplication and addition. */
67         b[0] = (uint64_t)a[0] * r[0];
68         b[1] = (uint64_t)a[0] * r[1] + (uint64_t)a[1] * r[0];
69         b[2] = (uint64_t)a[0] * r[2] + (uint64_t)a[1] * r[1] + (uint64_t)a[2] * r[0];
70         b[3] = (uint64_t)a[0] * r[3] + (uint64_t)a[1] * r[2] + (uint64_t)a[2] * r[1] + (uint64_t)a[3] * r[0];
71 
72         /**
73          * Higher bits of the data product. Because the high bits of each term of r are processed,
74          * there is no carry in the following polynomial multiplication and addition.
75          */
76         // (Ensure that the calculation (b[4] * 5) does not overflow, calculate (a[4] * r[0]) items later.)
77         b[4] = (uint64_t)a[1] * r[3] + (uint64_t)a[2] * r[2] + (uint64_t)a[3] * r[1];
78         b[5] = (uint64_t)a[2] * r[3] + (uint64_t)a[3] * r[2] + (uint64_t)a[4] * r[1];
79         b[6] = (uint64_t)a[3] * r[3] + (uint64_t)a[4] * r[2];
80         b[7] = (uint64_t)a[4] * r[3];
81         /**
82          * The upper bits are multiplied by 5/4, because r1, r[2], r3 is processed,
83          * so the above values are divisible by 4. Because the high bits of each term of r are processed,
84          * there is no carry in the following polynomial multiplication and addition: (3 * 5) < 0xF
85          */
86         b[4] = (b[4] >> 2) + b[4];
87         b[5] = (b[5] >> 2) + b[5];
88         b[6] = (b[6] >> 2) + b[6];
89         b[7] = (b[7] >> 2) + b[7];
90         /* After offset 130 bits, the combination is obtained a0 = b[4] * 5 + b[0]....
91            Because the high bits of each term of r are processed,
92            there is no carry in the following polynomial multiplication and addition. */
93         b[0] += (b[4] & 0xFFFFFFFF);
94         b[1] += (b[0] >> 32) + (b[4] >> 32) + (b[5] & 0xFFFFFFFF);
95         b[2] += (b[1] >> 32) + (b[5] >> 32) + (b[6] & 0xFFFFFFFF);
96         b[3] += (b[2] >> 32) + (b[6] >> 32) + (b[7] & 0xFFFFFFFF);
97         a[4] = a[4] * r[0] + (uint32_t)(b[3] >> 32) + (uint32_t)(b[7] >> 32);
98         b[0] = (uint32_t)b[0];
99         b[1] = (uint32_t)b[1];
100         b[2] = (uint32_t)b[2];
101         b[3] = (uint32_t)b[3];
102         // Shift the upper bits of a4 by 130 bits and then multiply it by 5.
103         // The amount of a4 data is small and carry cannot be occurred.
104         b[0] += (a[4] >> 2) + (a[4] & 0xFFFFFFFC);
105         a[4] &= 0x3;
106 
107         /* Process carry */
108         b[1] += (b[0] >> 32);
109         b[2] += (b[1] >> 32);
110         b[3] += (b[2] >> 32);
111         a[4] += (uint32_t)(b[3] >> 32);
112 
113         a[0] = (uint32_t)b[0];
114         a[1] = (uint32_t)b[1];
115         a[2] = (uint32_t)b[2];
116         a[3] = (uint32_t)b[3];
117         len -= POLY1305_BLOCKSIZE;
118         off += POLY1305_BLOCKSIZE;
119     }
120 
121     ctx->acc[0] = a[0];
122     ctx->acc[1] = a[1];
123     ctx->acc[2] = a[2];
124     ctx->acc[3] = a[3];
125     ctx->acc[4] = a[4];
126 
127     // Clear sensitive information.
128     BSL_SAL_CleanseData(a, sizeof(a));
129     BSL_SAL_CleanseData(r, sizeof(r));
130     BSL_SAL_CleanseData(b, sizeof(b));
131     return len;
132 }
133 
Poly1305Last(Poly1305Ctx * ctx,uint8_t mac[POLY1305_TAGSIZE])134 void Poly1305Last(Poly1305Ctx *ctx, uint8_t mac[POLY1305_TAGSIZE])
135 {
136     uint32_t a[5];
137     uint64_t b[5];
138     a[0] = ctx->acc[0];
139     a[1] = ctx->acc[1];
140     a[2] = ctx->acc[2];
141     a[3] = ctx->acc[3];
142     a[4] = ctx->acc[4];
143     /* Check whether it is greater than p. */
144     b[0] = (uint64_t)(a[0]) + 5;
145     b[1] = a[1] + (b[0] >> 32);
146     b[2] = a[2] + (b[1] >> 32);
147     b[3] = a[3] + (b[2] >> 32);
148     b[4] = a[4] + (b[3] >> 32);
149     /* Obtain the mask. If there is a carry, the number is greater than p. */
150     if ((b[4] & 0x4) == 0) {    // b[4] & 0x4 is bit131.
151         b[0] = a[0];
152         b[1] = a[1];
153         b[2] = a[2];
154         b[3] = a[3];
155     }
156     // Adding s at the end does not require modulo processing.
157     b[0] = ctx->s[0] + (b[0] & 0xffffffff);
158     b[1] = ctx->s[1] + (b[1] & 0xffffffff) + (b[0] >> 32);
159     b[2] = ctx->s[2] + (b[2] & 0xffffffff) + (b[1] >> 32);
160     b[3] = ctx->s[3] + (b[3] & 0xffffffff) + (b[2] >> 32);
161     PUT_UINT32_LE(b[0], mac, 0);
162     PUT_UINT32_LE(b[1], mac, 4);
163     PUT_UINT32_LE(b[2], mac, 8);
164     PUT_UINT32_LE(b[3], mac, 12);
165 
166     // Clear sensitive information.
167     BSL_SAL_CleanseData(a, sizeof(a));
168     BSL_SAL_CleanseData(b, sizeof(b));
169 }
170 
171 // Clear the residual sensitive information in the register.
172 // This function is implemented only when the assembly function is enabled.
Poly1305CleanRegister(void)173 void Poly1305CleanRegister(void)
174 {
175     return;
176 }
177 #endif