1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_BN) && defined(HITLS_CRYPTO_BN_COMBA)
18
19 #include <stdint.h>
20 #include "bn_bincal.h"
21
22 #define SQR_COMBA_BEGIN_1(r, a, h, m, l) do { \
23 SQRADD_A((h), (m), (l), (a)[0]); \
24 (r)[0] = (l); \
25 (l) = 0; \
26 MULADD_AB2((l), (h), (m), (a)[0], (a)[1]); \
27 (r)[1] = (m); \
28 (m) = 0; \
29 MULADD_AB2((m), (l), (h), (a)[0], (a)[2]); /* 0 + 2 = 2 */ \
30 SQRADD_A((m), (l), (h), (a)[1]); /* 1 + 1 = 2 */ \
31 (r)[2] = (h); /* 2 */ \
32 (h) = 0; \
33 MULADD_AB2((h), (m), (l), (a)[1], (a)[2]); /* 1 + 2 = 3 */ \
34 MULADD_AB2((h), (m), (l), (a)[0], (a)[3]); /* 0 + 3 = 3 */ \
35 (r)[3] = (l); /* 3 */ \
36 (l) = 0; \
37 } while (0)
38
39 #define SQR_COMBA_BEGIN_2(r, a, h, m, l) do { \
40 MULADD_AB2((l), (h), (m), (a)[0], (a)[4]); /* 0 + 4 = 4 */ \
41 MULADD_AB2((l), (h), (m), (a)[1], (a)[3]); /* 1 + 3 = 4 */ \
42 SQRADD_A((l), (h), (m), (a)[2]); /* 2 + 2 = 4 */ \
43 (r)[4] = (m); /* 4 */ \
44 (m) = 0; \
45 MULADD_AB2((m), (l), (h), (a)[2], (a)[3]); /* 2 + 3 = 5 */ \
46 MULADD_AB2((m), (l), (h), (a)[1], (a)[4]); /* 1 + 4 = 5 */ \
47 MULADD_AB2((m), (l), (h), (a)[0], (a)[5]); /* 0 + 5 = 5 */ \
48 (r)[5] = (h); /* 5 */ \
49 (h) = 0; \
50 } while (0)
51
52 #define MUL_COMBA_BEGIN_1(r, a, b, h, m, l) do { \
53 MULADD_AB((h), (m), (l), (a)[0], (b)[0]); \
54 (r)[0] = (l); \
55 (l) = 0; \
56 MULADD_AB((l), (h), (m), (a)[0], (b)[1]); \
57 MULADD_AB((l), (h), (m), (a)[1], (b)[0]); \
58 (r)[1] = (m); \
59 (m) = 0; \
60 MULADD_AB((m), (l), (h), (a)[2], (b)[0]); /* 2 + 0 = 2 */ \
61 MULADD_AB((m), (l), (h), (a)[1], (b)[1]); /* 1 + 1 = 2 */ \
62 MULADD_AB((m), (l), (h), (a)[0], (b)[2]); /* 0 + 2 = 2 */ \
63 (r)[2] = (h); \
64 (h) = 0; \
65 MULADD_AB((h), (m), (l), (a)[0], (b)[3]); /* 0 + 3 = 3 */ \
66 MULADD_AB((h), (m), (l), (a)[1], (b)[2]); /* 1 + 2 = 3 */ \
67 MULADD_AB((h), (m), (l), (a)[2], (b)[1]); /* 2 + 1 = 3 */ \
68 MULADD_AB((h), (m), (l), (a)[3], (b)[0]); /* 3 + 9 = 3 */ \
69 (r)[3] = (l); /* 3 */ \
70 (l) = 0; \
71 } while (0)
72
73 #define MUL_COMBA_BEGIN_2(r, a, b, h, m, l) do { \
74 MULADD_AB((l), (h), (m), (a)[4], (b)[0]); /* 4 + 0 = 4 */ \
75 MULADD_AB((l), (h), (m), (a)[3], (b)[1]); /* 3 + 1 = 4 */ \
76 MULADD_AB((l), (h), (m), (a)[2], (b)[2]); /* 2 + 2 = 4 */ \
77 MULADD_AB((l), (h), (m), (a)[1], (b)[3]); /* 1 + 3 = 4 */ \
78 MULADD_AB((l), (h), (m), (a)[0], (b)[4]); /* 0 + 4 = 4 */ \
79 (r)[4] = (m); /* 4 */ \
80 (m) = 0; \
81 MULADD_AB((m), (l), (h), (a)[0], (b)[5]); /* 0 + 5 = 5 */ \
82 MULADD_AB((m), (l), (h), (a)[1], (b)[4]); /* 1 + 4 = 5 */ \
83 MULADD_AB((m), (l), (h), (a)[2], (b)[3]); /* 2 + 3 = 5 */ \
84 MULADD_AB((m), (l), (h), (a)[3], (b)[2]); /* 3 + 2 = 5 */ \
85 MULADD_AB((m), (l), (h), (a)[4], (b)[1]); /* 4 + 1 = 5 */ \
86 MULADD_AB((m), (l), (h), (a)[5], (b)[0]); /* 5 + 0 = 5 */ \
87 (r)[5] = (h); /* 5 */ \
88 (h) = 0; \
89 } while (0)
90
91 #define MUL_COMBA_BEGIN_3(r, a, b, h, m, l) do { \
92 MULADD_AB((h), (m), (l), (a)[6], (b)[0]); /* 6 + 0 = 6 */ \
93 MULADD_AB((h), (m), (l), (a)[5], (b)[1]); /* 5 + 1 = 6 */ \
94 MULADD_AB((h), (m), (l), (a)[4], (b)[2]); /* 4 + 2 = 6 */ \
95 MULADD_AB((h), (m), (l), (a)[3], (b)[3]); /* 3 + 3 = 6 */ \
96 MULADD_AB((h), (m), (l), (a)[2], (b)[4]); /* 2 + 4 = 6 */ \
97 MULADD_AB((h), (m), (l), (a)[1], (b)[5]); /* 1 + 5 = 6 */ \
98 MULADD_AB((h), (m), (l), (a)[0], (b)[6]); /* 0 + 6 = 6 */ \
99 (r)[6] = (l); /* 6 */ \
100 (l) = 0; \
101 MULADD_AB((l), (h), (m), (a)[0], (b)[7]); /* 0 + 7 = 7 */ \
102 MULADD_AB((l), (h), (m), (a)[1], (b)[6]); /* 1 + 6 = 7 */ \
103 MULADD_AB((l), (h), (m), (a)[2], (b)[5]); /* 2 + 5 = 7 */ \
104 MULADD_AB((l), (h), (m), (a)[3], (b)[4]); /* 3 + 4 = 7 */ \
105 MULADD_AB((l), (h), (m), (a)[4], (b)[3]); /* 4 + 3 = 7 */ \
106 MULADD_AB((l), (h), (m), (a)[5], (b)[2]); /* 5 + 2 = 7 */ \
107 MULADD_AB((l), (h), (m), (a)[6], (b)[1]); /* 6 + 1 = 7 */ \
108 MULADD_AB((l), (h), (m), (a)[7], (b)[0]); /* 7 + 0 = 7 */ \
109 (r)[7] = (m); /* 7 */ \
110 (m) = 0; \
111 MULADD_AB((m), (l), (h), (a)[7], (b)[1]); /* 7 + 1 = 8 */ \
112 MULADD_AB((m), (l), (h), (a)[6], (b)[2]); /* 6 + 2 = 8 */ \
113 MULADD_AB((m), (l), (h), (a)[5], (b)[3]); /* 5 + 3 = 8 */ \
114 MULADD_AB((m), (l), (h), (a)[4], (b)[4]); /* 4 + 4 = 8 */ \
115 MULADD_AB((m), (l), (h), (a)[3], (b)[5]); /* 3 + 5 = 8 */ \
116 MULADD_AB((m), (l), (h), (a)[2], (b)[6]); /* 2 + 6 = 8 */ \
117 MULADD_AB((m), (l), (h), (a)[1], (b)[7]); /* 1 + 7 = 8 */ \
118 (r)[8] = (h); /* 8 */ \
119 (h) = 0; \
120 } while (0)
121
SqrComba8(BN_UINT * r,const BN_UINT * a)122 static void SqrComba8(BN_UINT *r, const BN_UINT *a)
123 {
124 BN_UINT h = 0;
125 BN_UINT m = 0;
126 BN_UINT l = 0;
127
128 SQR_COMBA_BEGIN_1(r, a, h, m, l);
129 SQR_COMBA_BEGIN_2(r, a, h, m, l);
130
131 MULADD_AB2(h, m, l, a[0], a[6]); /* 0 + 6 = 6 */
132 MULADD_AB2(h, m, l, a[1], a[5]); /* 1 + 5 = 6 */
133 MULADD_AB2(h, m, l, a[2], a[4]); /* 2 + 4 = 6 */
134 SQRADD_A(h, m, l, a[3]); /* 3 + 3 = 6 */
135 r[6] = l; /* 6 */
136 l = 0;
137
138 MULADD_AB2(l, h, m, a[3], a[4]); /* 3 + 4 = 7 */
139 MULADD_AB2(l, h, m, a[2], a[5]); /* 2 + 5 = 7 */
140 MULADD_AB2(l, h, m, a[1], a[6]); /* 1 + 6 = 7 */
141 MULADD_AB2(l, h, m, a[0], a[7]); /* 0 + 7 = 7 */
142 r[7] = m; /* 7 */
143 m = 0;
144
145 MULADD_AB2(m, l, h, a[1], a[7]); /* 1 + 7 = 8 */
146 MULADD_AB2(m, l, h, a[2], a[6]); /* 2 + 6 = 8 */
147 MULADD_AB2(m, l, h, a[3], a[5]); /* 3 + 5 = 8 */
148 SQRADD_A(m, l, h, a[4]); /* 4 + 4 = 8 */
149 r[8] = h; /* 8 */
150 h = 0;
151
152 MULADD_AB2(h, m, l, a[4], a[5]); /* 4 + 5 = 9 */
153 MULADD_AB2(h, m, l, a[3], a[6]); /* 3 + 6 = 9 */
154 MULADD_AB2(h, m, l, a[2], a[7]); /* 2 + 7 = 9 */
155 r[9] = l; /* 9 */
156 l = 0;
157
158 MULADD_AB2(l, h, m, a[3], a[7]); /* 3 + 7 = 10 */
159 MULADD_AB2(l, h, m, a[4], a[6]); /* 4 + 6 = 10 */
160 SQRADD_A(l, h, m, a[5]); /* 5 + 5 = 10 */
161 r[10] = m; /* 10 */
162 m = 0;
163
164 MULADD_AB2(m, l, h, a[5], a[6]); /* 5 + 6 = 11 */
165 MULADD_AB2(m, l, h, a[4], a[7]); /* 4 + 7 = 11 */
166 r[11] = h; /* 11 */
167 h = 0;
168
169 MULADD_AB2(h, m, l, a[5], a[7]); /* 5 + 7 = 12 */
170 SQRADD_A(h, m, l, a[6]); /* 6 + 6 = 12 */
171 r[12] = l; /* 12 */
172 l = 0;
173
174 MULADD_AB2(l, h, m, a[6], a[7]); /* 6 + 7 = 13 */
175 r[13] = m; /* 13 */
176 m = 0;
177
178 SQRADD_A(m, l, h, a[7]); /* 7 + 7 = 14 */
179 r[14] = h; /* 14 */
180 r[15] = l; /* 15 */
181 }
182
SqrComba6(BN_UINT * r,const BN_UINT * a)183 void SqrComba6(BN_UINT *r, const BN_UINT *a)
184 {
185 BN_UINT h = 0;
186 BN_UINT m = 0;
187 BN_UINT l = 0;
188
189 SQR_COMBA_BEGIN_1(r, a, h, m, l);
190 SQR_COMBA_BEGIN_2(r, a, h, m, l);
191
192 MULADD_AB2(h, m, l, a[1], a[5]); /* 1 + 5 = 6 */
193 MULADD_AB2(h, m, l, a[2], a[4]); /* 2 + 4 = 6 */
194 SQRADD_A(h, m, l, a[3]); /* 3 + 3 = 6 */
195 r[6] = l; /* 6 */
196 l = 0;
197
198 MULADD_AB2(l, h, m, a[3], a[4]); /* 3 + 4 = 7 */
199 MULADD_AB2(l, h, m, a[2], a[5]); /* 2 + 5 = 7 */
200 r[7] = m; /* 7 */
201 m = 0;
202
203 MULADD_AB2(m, l, h, a[3], a[5]); /* 3 + 5 = 8 */
204 SQRADD_A(m, l, h, a[4]); /* 4 + 4 = 8 */
205 r[8] = h; /* 8 */
206 h = 0;
207
208 MULADD_AB2(h, m, l, a[4], a[5]); /* 4 + 5 = 9 */
209 r[9] = l; /* 9 */
210 l = 0;
211
212 SQRADD_A(l, h, m, a[5]); /* 5 + 5 = 10 */
213 r[10] = m; /* 10 */
214 r[11] = h; /* 11 */
215 }
216
SqrComba4(BN_UINT * r,const BN_UINT * a)217 void SqrComba4(BN_UINT *r, const BN_UINT *a)
218 {
219 BN_UINT h = 0;
220 BN_UINT m = 0;
221 BN_UINT l = 0;
222
223 SQR_COMBA_BEGIN_1(r, a, h, m, l);
224
225 MULADD_AB2(l, h, m, a[1], a[3]); /* 1 + 3 = 4 */
226 SQRADD_A(l, h, m, a[2]); /* 2 + 2 = 4 */
227 r[4] = m; /* 4 */
228 m = 0;
229
230 MULADD_AB2(m, l, h, a[2], a[3]); /* 2 + 3 = 5 */
231 r[5] = h; /* 5 */
232 h = 0;
233
234 SQRADD_A(h, m, l, a[3]); /* 3 + 3 = 6 */
235 r[6] = l; /* 6 */
236 r[7] = m; /* 7 */
237 }
238
SqrComba(BN_UINT * r,const BN_UINT * a,uint32_t size)239 static void SqrComba(BN_UINT *r, const BN_UINT *a, uint32_t size)
240 {
241 BN_UINT h = 0;
242 BN_UINT m = 0;
243 BN_UINT l = 0;
244 if (size == 3) { /* 3 */
245 SQRADD_A(h, m, l, a[0]);
246 r[0] = l;
247 l = 0;
248
249 MULADD_AB2(l, h, m, a[0], a[1]);
250 r[1] = m;
251 m = 0;
252
253 MULADD_AB2(m, l, h, a[0], a[2]); /* 0 + 2 = 2 */
254 SQRADD_A(m, l, h, a[1]); /* 1 + 1 = 2 */
255 r[2] = h; /* 2 */
256 h = 0;
257
258 MULADD_AB2(h, m, l, a[1], a[2]); /* 1 + 2 = 3 */
259 r[3] = l; /* 3 */
260 l = 0;
261
262 SQRADD_A(l, h, m, a[2]); /* 2 + 2 = 4 */
263 r[4] = m; /* 4 */
264 r[5] = h; /* 5 */
265 return;
266 }
267 if (size == 2) { /* 2 */
268 SQRADD_A(h, m, l, a[0]);
269 r[0] = l;
270 l = 0;
271
272 MULADD_AB2(l, h, m, a[0], a[1]);
273 r[1] = m;
274 m = 0;
275
276 SQRADD_A(m, l, h, a[1]); /* 1 + 1 = 2 */
277 r[2] = h; /* 2 */
278 r[3] = l; /* 3 */
279 return;
280 }
281 SQR_A(r[1], r[0], a[0]); /* size == 1 */
282 }
283
MulComba8(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)284 void MulComba8(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
285 {
286 BN_UINT h = 0;
287 BN_UINT m = 0;
288 BN_UINT l = 0;
289
290 MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
291 MUL_COMBA_BEGIN_2(r, a, b, h, m, l);
292 MUL_COMBA_BEGIN_3(r, a, b, h, m, l);
293
294 MULADD_AB(h, m, l, a[2], b[7]); /* 2 + 7 = 9 */
295 MULADD_AB(h, m, l, a[3], b[6]); /* 3 + 6 = 9 */
296 MULADD_AB(h, m, l, a[4], b[5]); /* 4 + 5 = 9 */
297 MULADD_AB(h, m, l, a[5], b[4]); /* 5 + 4 = 9 */
298 MULADD_AB(h, m, l, a[6], b[3]); /* 6 + 3 = 9 */
299 MULADD_AB(h, m, l, a[7], b[2]); /* 7 + 2 = 9 */
300 r[9] = l; /* 9 */
301 l = 0;
302
303 MULADD_AB(l, h, m, a[7], b[3]); /* 7 + 3 = 10 */
304 MULADD_AB(l, h, m, a[6], b[4]); /* 6 + 4 = 10 */
305 MULADD_AB(l, h, m, a[5], b[5]); /* 5 + 5 = 10 */
306 MULADD_AB(l, h, m, a[4], b[6]); /* 4 + 6 = 10 */
307 MULADD_AB(l, h, m, a[3], b[7]); /* 3 + 7 = 10 */
308 r[10] = m; /* 10 */
309 m = 0;
310
311 MULADD_AB(m, l, h, a[4], b[7]); /* 4 + 7 = 11 */
312 MULADD_AB(m, l, h, a[5], b[6]); /* 5 + 6 = 11 */
313 MULADD_AB(m, l, h, a[6], b[5]); /* 6 + 5 = 11 */
314 MULADD_AB(m, l, h, a[7], b[4]); /* 7 + 4 = 11 */
315 r[11] = h; /* 11 */
316 h = 0;
317
318 MULADD_AB(h, m, l, a[7], b[5]); /* 7 + 5 = 12 */
319 MULADD_AB(h, m, l, a[6], b[6]); /* 6 + 6 = 12 */
320 MULADD_AB(h, m, l, a[5], b[7]); /* 5 + 7 = 12 */
321 r[12] = l; /* 12 */
322 l = 0;
323
324 MULADD_AB(l, h, m, a[6], b[7]); /* 6 + 7 = 13 */
325 MULADD_AB(l, h, m, a[7], b[6]); /* 7 + 6 = 13 */
326 r[13] = m; /* 13 */
327 m = 0;
328
329 MULADD_AB(m, l, h, a[7], b[7]); /* 7 + 7 = 14 */
330 r[14] = h; /* 14 */
331 r[15] = l; /* 15 */
332 }
333
MulComba6(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)334 void MulComba6(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
335 {
336 BN_UINT h = 0;
337 BN_UINT m = 0;
338 BN_UINT l = 0;
339
340 MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
341 MUL_COMBA_BEGIN_2(r, a, b, h, m, l);
342
343 MULADD_AB(h, m, l, a[5], b[1]); /* 5 + 1 = 6 */
344 MULADD_AB(h, m, l, a[4], b[2]); /* 4 + 2 = 6 */
345 MULADD_AB(h, m, l, a[3], b[3]); /* 3 + 3 = 6 */
346 MULADD_AB(h, m, l, a[2], b[4]); /* 2 + 4 = 6 */
347 MULADD_AB(h, m, l, a[1], b[5]); /* 1 + 5 = 6 */
348 r[6] = l; /* 6 */
349 l = 0;
350
351 MULADD_AB(l, h, m, a[2], b[5]); /* 2 + 5 = 7 */
352 MULADD_AB(l, h, m, a[3], b[4]); /* 3 + 4 = 7 */
353 MULADD_AB(l, h, m, a[4], b[3]); /* 4 + 3 = 7 */
354 MULADD_AB(l, h, m, a[5], b[2]); /* 5 + 2 = 7 */
355 r[7] = m; /* 7 */
356 m = 0;
357
358 MULADD_AB(m, l, h, a[5], b[3]); /* 5 + 3 = 8 */
359 MULADD_AB(m, l, h, a[4], b[4]); /* 4 + 4 = 8 */
360 MULADD_AB(m, l, h, a[3], b[5]); /* 3 + 5 = 8 */
361 r[8] = h; /* 8 */
362 h = 0;
363
364 MULADD_AB(h, m, l, a[4], b[5]); /* 4 + 5 = 9 */
365 MULADD_AB(h, m, l, a[5], b[4]); /* 5 + 4 = 9 */
366 r[9] = l; /* 9 */
367 l = 0;
368
369 MULADD_AB(l, h, m, a[5], b[5]); /* 5 + 5 = 10 */
370 r[10] = m; /* 10 */
371 r[11] = h; /* 11 */
372 }
373
MulComba4(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)374 void MulComba4(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
375 {
376 BN_UINT h = 0;
377 BN_UINT m = 0;
378 BN_UINT l = 0;
379
380 MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
381
382 MULADD_AB(l, h, m, a[3], b[1]); /* 3 + 1 = 4 */
383 MULADD_AB(l, h, m, a[2], b[2]); /* 2 + 2 = 4 */
384 MULADD_AB(l, h, m, a[1], b[3]); /* 1 + 3 = 4 */
385 r[4] = m; /* 4 */
386 m = 0;
387
388 MULADD_AB(m, l, h, a[2], b[3]); /* 2 + 3 = 5 */
389 MULADD_AB(m, l, h, a[3], b[2]); /* 3 + 2 = 5 */
390 r[5] = h; /* 5 */
391 h = 0;
392
393 MULADD_AB(h, m, l, a[3], b[3]); /* 3 + 3 = 6 */
394 r[6] = l; /* 6 */
395 r[7] = m; /* 7 */
396 }
397
MulComba(BN_UINT * r,const BN_UINT * a,const BN_UINT * b,uint32_t size)398 static void MulComba(BN_UINT *r, const BN_UINT *a, const BN_UINT *b, uint32_t size)
399 {
400 BN_UINT h = 0;
401 BN_UINT m = 0;
402 BN_UINT l = 0;
403 if (size == 3) { /* 3 */
404 MULADD_AB(h, m, l, a[0], b[0]);
405 r[0] = l;
406 l = 0;
407
408 MULADD_AB(l, h, m, a[0], b[1]);
409 MULADD_AB(l, h, m, a[1], b[0]);
410 r[1] = m;
411 m = 0;
412
413 MULADD_AB(m, l, h, a[2], b[0]); /* 2 + 0 = 2 */
414 MULADD_AB(m, l, h, a[1], b[1]); /* 1 + 1 = 2 */
415 MULADD_AB(m, l, h, a[0], b[2]); /* 0 + 2 = 2 */
416 r[2] = h;
417 h = 0;
418
419 MULADD_AB(h, m, l, a[1], b[2]); /* 1 + 2 = 3 */
420 MULADD_AB(h, m, l, a[2], b[1]); /* 2 + 1 = 3 */
421 r[3] = l; /* 3 */
422 l = 0;
423
424 MULADD_AB(l, h, m, a[2], b[2]); /* 2 + 2 = 4 */
425 r[4] = m; /* 4 */
426 r[5] = h; /* 5 */
427 return;
428 }
429 if (size == 2) { /* 2 */
430 MULADD_AB(h, m, l, a[0], b[0]);
431 r[0] = l;
432 l = 0;
433
434 MULADD_AB(l, h, m, a[0], b[1]);
435 MULADD_AB(l, h, m, a[1], b[0]);
436 r[1] = m;
437 m = 0;
438
439 MULADD_AB(m, l, h, a[1], b[1]);
440 r[2] = h; /* 2 */
441 r[3] = l; /* 3 */
442 return;
443 }
444 MUL_AB(r[1], r[0], a[0], b[0]); /* size == 1 */
445 }
446
SpaceSize(uint32_t size)447 uint32_t SpaceSize(uint32_t size)
448 {
449 uint32_t base = 8; /* Perform 8x batch processing */
450 while (size > base) {
451 base <<= 1;
452 }
453 return base * 4; /* 2x expansion. Each layer requires 2 * size temporary space, 2 * 2 = 4 */
454 }
455
456 /* compare BN array.
457 * return 0, if a == b
458 * return 1, if a > b
459 * return -1, if a < b
460 */
BinCmpSame(const BN_UINT * a,const BN_UINT * b,uint32_t size)461 static int32_t BinCmpSame(const BN_UINT *a, const BN_UINT *b, uint32_t size)
462 {
463 int64_t idx = (int64_t)size;
464 while (--idx >= 0) {
465 if (a[idx] != b[idx]) {
466 return a[idx] > b[idx] ? 1 : -1;
467 }
468 }
469 return 0;
470 }
471
472 /* The caller promised that aSize >= bSize.
473 * r = ABS(a - b). Need to ensure that aSize == bSize + (0 || 1).
474 * return 0 if a > b
475 * return 1 if a <= b
476 */
ABS_Sub(BN_UINT * t,const BN_UINT * a,uint32_t aSize,const BN_UINT * b,uint32_t bSize)477 static uint32_t ABS_Sub(BN_UINT *t, const BN_UINT *a, uint32_t aSize, const BN_UINT *b, uint32_t bSize)
478 {
479 int32_t ret;
480 do {
481 if (aSize > bSize) {
482 t[bSize] = a[bSize]; /* bSize = aSize - 1 */
483 if (a[bSize] > 0) {
484 ret = 1;
485 break;
486 }
487 }
488 ret = BinCmpSame(a, b, bSize);
489 } while (0);
490 if (ret > 0) {
491 BN_UINT borrow = BinSub(t, a, b, bSize);
492 if (aSize > bSize) { /* When the length difference exists and a > b exists, the borrowing is processed. */
493 t[bSize] -= borrow;
494 }
495 return 0;
496 } else {
497 BinSub(t, b, a, bSize);
498 return 1;
499 }
500 return 0;
501 }
502
503 /** Only aSize == bSize is supported. This interface will recurse. The recursion depth is O(deep) = log2(size)
504 * Ensure that space >= SpaceSize(size)
505 * (ah|al * bh|bl) = (((ah*bh) << 2) + (((ah*bh) + (al*bl) - (ah - al)(bh - bl)) << 1) + (al*bl))
506 */
MulConquer(BN_UINT * r,const BN_UINT * a,const BN_UINT * b,uint32_t size,BN_UINT * space,bool consttime)507 void MulConquer(BN_UINT *r, const BN_UINT *a, const BN_UINT *b, uint32_t size, BN_UINT *space, bool consttime)
508 {
509 if (!consttime) {
510 if (size == 8) { /* Perform 8x batch processing */
511 MulComba8(r, a, b);
512 return;
513 }
514 if (size == 6) { /* Perform 6x batch processing */
515 MulComba6(r, a, b);
516 return;
517 }
518 if (size == 4) { /* Perform 4x batch processing */
519 MulComba4(r, a, b);
520 return;
521 }
522 if (size < 4) { /* Less than 4, simple processing */
523 MulComba(r, a, b, size);
524 return;
525 }
526 } else if (size <= 8) { /* Calculate if the block size is smaller than 8. */
527 BinMul(r, size << 1, a, size, b, size);
528 return;
529 }
530
531 /* truncates the length of the low bits of the BigNum, that is the length of al bl. */
532 const uint32_t sizeLo = size >> 1;
533 const uint32_t sizeLo2 = sizeLo << 1;
534 const uint32_t shift1 = sizeLo; /* (((ah*bh) + (al*bl) - (ah - al)(bh - bl)) << 1) location */
535 const uint32_t shift2 = shift1 << 1; /* ((ah*bh) << 2) location */
536 /* truncates the length of the high bits of the BigNum, that is the length of ah bh. */
537 const uint32_t sizeHi = size - sizeLo;
538 const uint32_t sizeHi2 = sizeHi << 1;
539
540 /* Split the input 'space'. The current function uses tmp1 and tmp2,
541 * and the remaining newspace is used by the lower layer.
542 * space = tmp1_lo..tmp1_hi | tmp2_lo..tmp2_hi | newSpace, sizeof(tmp1_lo) == sizeHi.
543 */
544 BN_UINT *tmp1 = space;
545 BN_UINT *tmp2 = tmp1 + sizeHi2;
546 BN_UINT *newSpace = tmp2 + sizeHi2;
547
548 /* tmp2_lo = (ah-al) */
549 uint32_t sign = ABS_Sub(tmp2, a + shift1, sizeHi, a, sizeLo);
550 /* tmp2_hi = (bh-bl) */
551 sign ^= ABS_Sub(tmp2 + sizeHi, b + shift1, sizeHi, b, sizeLo);
552
553 MulConquer(r, a, b, sizeLo, newSpace, consttime); /* calculate (al*bl) */
554 MulConquer(r + shift2, a + shift1, b + shift1, sizeHi, newSpace, consttime); /* calculate (ah*bh) */
555 MulConquer(tmp1, tmp2, tmp2 + sizeHi, sizeHi, newSpace, consttime); /* calculate (ah-al)(bh-bl) */
556 /* At this time r has stored ((ah*bh) << 2) and (al*bl) */
557 /* carry should be added in (r + shift1)[sizeHi * 2] */
558 /* tmp2 is (ah*bh) + (al*bl), but the processing length here is sizeLo * 2 */
559 BN_UINT carry = BinAdd(tmp2, r, r + shift2, sizeLo2);
560 if (sizeHi > sizeLo) {
561 /* If there is a length difference, the length of (ah*bh) is sizeLo * 2 + 2,
562 and the tail of (ah*bh) needs to be processed. */
563 /* point to (r + shift2)[sizeLo * 2], the unprocessed tail of (ah*bh) */
564 const uint32_t position = shift2 + (sizeLo2);
565 tmp2[sizeLo2] = r[position] + carry;
566 carry = (tmp2[sizeLo2] < carry) ? 1 : 0;
567 /* continue the processing */
568 tmp2[(sizeLo2) + 1] = r[position + 1] + carry;
569 carry = (tmp2[sizeLo2 + 1] < carry) ? 1 : 0;
570 }
571 /* tmp1 = (ah*bh) + (al*bl) - (ah-al)(bh-bl), tmp2 is (ah*bh) + (al*bl) */
572 if (sign == 1) {
573 carry += BinAdd(tmp1, tmp2, tmp1, sizeHi2);
574 } else {
575 carry -= BinSub(tmp1, tmp2, tmp1, sizeHi2);
576 }
577 /* finally r adds tmp1, that is (ah*bh) + (al*bl) - (ah - al)(bh - bl) */
578 carry += BinAdd(r + shift1, r + shift1, tmp1, sizeHi2);
579 for (uint32_t i = shift1 + sizeHi2; carry > 0 && i < (size << 1); i++) {
580 ADD_AB(carry, r[i], r[i], carry);
581 }
582 }
583
584 /** This interface will recurse. The recursion depth is O(deep) = log2(size)
585 * Ensure that space >= SpaceSize(size)
586 * (x|y)^2 = ((x^2 << 2) + ((x^2 + y^2 - (x - y)^2)) << 1) + y^2)
587 */
SqrConquer(BN_UINT * r,const BN_UINT * a,uint32_t size,BN_UINT * space,bool consttime)588 void SqrConquer(BN_UINT *r, const BN_UINT *a, uint32_t size, BN_UINT *space, bool consttime)
589 {
590 if (!consttime) {
591 if (size == 8) { /* Perform 8x batch processing */
592 SqrComba8(r, a);
593 return;
594 }
595 if (size == 6) { /* Perform 6x batch processing */
596 SqrComba6(r, a);
597 return;
598 }
599 if (size == 4) { /* Perform 4x batch processing */
600 SqrComba4(r, a);
601 return;
602 }
603 if (size < 4) { /* Less than 4, simple processing */
604 SqrComba(r, a, size);
605 return;
606 }
607 } else if (size <= 8) { /* Calculate if the block size is smaller than 8. */
608 BinSqr(r, size << 1, a, size);
609 return;
610 }
611
612 /* truncates the length of the high bits of the BigNum, that is the length of x. */
613 const uint32_t sizeHi = (size + 1) >> 1;
614 /* truncates the length of the low bits of the BigNum, that is the length of y. */
615 const uint32_t sizeLo = size >> 1;
616 const uint32_t shift1 = sizeLo; /* ((x^2 + y^2 - (x - y)^2)) << 1) location */
617 const uint32_t shift2 = shift1 << 1; /* ((x^2 << 2) location */
618
619 /* Split the input 'space'. The current function uses tmp1 and tmp2,
620 and the remaining newspace is used by the lower layer. */
621 BN_UINT *tmp1 = space;
622 BN_UINT *tmp2 = tmp1 + (sizeHi << 1);
623 BN_UINT *newSpace = tmp2 + (sizeHi << 1);
624
625 /* tmp2 is the upper bits of num minus the lower bits of num, (x-y) */
626 (void)ABS_Sub(tmp2, a + shift1, sizeHi, a, sizeLo);
627
628 SqrConquer(r, a, sizeLo, newSpace, consttime); /* calculate y^2 */
629 SqrConquer(r + shift2, a + shift1, sizeHi, newSpace, consttime); /* calculate x^2 */
630 SqrConquer(tmp1, tmp2, sizeHi, newSpace, consttime); /* calculate (x-y)^2 */
631
632 /* At this time r has stored (x^2 << 2) and y^2 */
633 /* carry should be added in (r + shift1)[sizeHi * 2] */
634 /* tmp2 = x^2 + y^2, but the processing length here is sizeLo * 2 */
635 BN_UINT carry = BinAdd(tmp2, r, r + shift2, sizeLo << 1);
636 if (sizeHi > sizeLo) {
637 /* If there is a length difference, the length of x^2 is sizeLo * 2 + 2,
638 and the tail of x^2 needs to be processed. */
639 /* point to (r + shift2)[sizeLo * 2], the unprocessed tail of x^2 */
640 const uint32_t position = shift2 + (sizeLo << 1);
641 tmp2[sizeLo << 1] = r[position] + carry;
642 carry = (tmp2[sizeLo << 1] < carry) ? 1 : 0;
643 /* continue the processing */
644 tmp2[(sizeLo << 1) + 1] = r[position + 1] + carry;
645 carry = (tmp2[(sizeLo << 1) + 1] < carry) ? 1 : 0;
646 }
647 /* tmp1 = x^2 + y^2 - (x-y)^2, tmp2 is x^2 + y^2 */
648 carry -= BinSub(tmp1, tmp2, tmp1, sizeHi << 1);
649 /* finally r adds x^2 + y^2 - (x-y)^2 */
650 carry += BinAdd(r + shift1, r + shift1, tmp1, sizeHi << 1);
651
652 uint32_t i;
653 for (i = shift1 + (sizeHi << 1); carry > 0 && i < (size << 1); i++) {
654 ADD_AB(carry, r[i], r[i], carry);
655 }
656 }
657 #endif /* HITLS_CRYPTO_BN_COMBA */
658