• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of the openHiTLS project.
3  *
4  * openHiTLS is licensed under the Mulan PSL v2.
5  * You can use this software according to the terms and conditions of the Mulan PSL v2.
6  * You may obtain a copy of Mulan PSL v2 at:
7  *
8  *     http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  * See the Mulan PSL v2 for more details.
14  */
15 
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_BN) && defined(HITLS_CRYPTO_BN_COMBA)
18 
19 #include <stdint.h>
20 #include "bn_bincal.h"
21 
22 #define SQR_COMBA_BEGIN_1(r, a, h, m, l) do {                   \
23     SQRADD_A((h), (m), (l), (a)[0]);                            \
24     (r)[0] = (l);                                               \
25     (l) = 0;                                                    \
26     MULADD_AB2((l), (h), (m), (a)[0], (a)[1]);                  \
27     (r)[1] = (m);                                               \
28     (m) = 0;                                                    \
29     MULADD_AB2((m), (l), (h), (a)[0], (a)[2]);  /* 0 + 2 = 2 */ \
30     SQRADD_A((m), (l), (h), (a)[1]);            /* 1 + 1 = 2 */ \
31     (r)[2] = (h);                               /* 2 */         \
32     (h) = 0;                                                    \
33     MULADD_AB2((h), (m), (l), (a)[1], (a)[2]);  /* 1 + 2 = 3 */ \
34     MULADD_AB2((h), (m), (l), (a)[0], (a)[3]);  /* 0 + 3 = 3 */ \
35     (r)[3] = (l);                               /* 3 */         \
36     (l) = 0;                                                    \
37 } while (0)
38 
39 #define SQR_COMBA_BEGIN_2(r, a, h, m, l) do {                   \
40     MULADD_AB2((l), (h), (m), (a)[0], (a)[4]);  /* 0 + 4 = 4 */ \
41     MULADD_AB2((l), (h), (m), (a)[1], (a)[3]);  /* 1 + 3 = 4 */ \
42     SQRADD_A((l), (h), (m), (a)[2]);            /* 2 + 2 = 4 */ \
43     (r)[4] = (m);                               /* 4 */         \
44     (m) = 0;                                                    \
45     MULADD_AB2((m), (l), (h), (a)[2], (a)[3]);  /* 2 + 3 = 5 */ \
46     MULADD_AB2((m), (l), (h), (a)[1], (a)[4]);  /* 1 + 4 = 5 */ \
47     MULADD_AB2((m), (l), (h), (a)[0], (a)[5]);  /* 0 + 5 = 5 */ \
48     (r)[5] = (h);                               /* 5 */         \
49     (h) = 0;                                                    \
50 } while (0)
51 
52 #define MUL_COMBA_BEGIN_1(r, a, b, h, m, l) do {                \
53     MULADD_AB((h), (m), (l), (a)[0], (b)[0]);                   \
54     (r)[0] = (l);                                               \
55     (l) = 0;                                                    \
56     MULADD_AB((l), (h), (m), (a)[0], (b)[1]);                   \
57     MULADD_AB((l), (h), (m), (a)[1], (b)[0]);                   \
58     (r)[1] = (m);                                               \
59     (m) = 0;                                                    \
60     MULADD_AB((m), (l), (h), (a)[2], (b)[0]);   /* 2 + 0 = 2 */ \
61     MULADD_AB((m), (l), (h), (a)[1], (b)[1]);   /* 1 + 1 = 2 */ \
62     MULADD_AB((m), (l), (h), (a)[0], (b)[2]);   /* 0 + 2 = 2 */ \
63     (r)[2] = (h);                                               \
64     (h) = 0;                                                    \
65     MULADD_AB((h), (m), (l), (a)[0], (b)[3]);   /* 0 + 3 = 3 */ \
66     MULADD_AB((h), (m), (l), (a)[1], (b)[2]);   /* 1 + 2 = 3 */ \
67     MULADD_AB((h), (m), (l), (a)[2], (b)[1]);   /* 2 + 1 = 3 */ \
68     MULADD_AB((h), (m), (l), (a)[3], (b)[0]);   /* 3 + 9 = 3 */ \
69     (r)[3] = (l);                               /* 3 */         \
70     (l) = 0;                                                    \
71 } while (0)
72 
73 #define MUL_COMBA_BEGIN_2(r, a, b, h, m, l) do {                \
74     MULADD_AB((l), (h), (m), (a)[4], (b)[0]);   /* 4 + 0 = 4 */ \
75     MULADD_AB((l), (h), (m), (a)[3], (b)[1]);   /* 3 + 1 = 4 */ \
76     MULADD_AB((l), (h), (m), (a)[2], (b)[2]);   /* 2 + 2 = 4 */ \
77     MULADD_AB((l), (h), (m), (a)[1], (b)[3]);   /* 1 + 3 = 4 */ \
78     MULADD_AB((l), (h), (m), (a)[0], (b)[4]);   /* 0 + 4 = 4 */ \
79     (r)[4] = (m);                               /* 4 */         \
80     (m) = 0;                                                    \
81     MULADD_AB((m), (l), (h), (a)[0], (b)[5]);   /* 0 + 5 = 5 */ \
82     MULADD_AB((m), (l), (h), (a)[1], (b)[4]);   /* 1 + 4 = 5 */ \
83     MULADD_AB((m), (l), (h), (a)[2], (b)[3]);   /* 2 + 3 = 5 */ \
84     MULADD_AB((m), (l), (h), (a)[3], (b)[2]);   /* 3 + 2 = 5 */ \
85     MULADD_AB((m), (l), (h), (a)[4], (b)[1]);   /* 4 + 1 = 5 */ \
86     MULADD_AB((m), (l), (h), (a)[5], (b)[0]);   /* 5 + 0 = 5 */ \
87     (r)[5] = (h);                               /* 5 */         \
88     (h) = 0;                                                    \
89 } while (0)
90 
91 #define MUL_COMBA_BEGIN_3(r, a, b, h, m, l) do {                \
92     MULADD_AB((h), (m), (l), (a)[6], (b)[0]);   /* 6 + 0 = 6 */ \
93     MULADD_AB((h), (m), (l), (a)[5], (b)[1]);   /* 5 + 1 = 6 */ \
94     MULADD_AB((h), (m), (l), (a)[4], (b)[2]);   /* 4 + 2 = 6 */ \
95     MULADD_AB((h), (m), (l), (a)[3], (b)[3]);   /* 3 + 3 = 6 */ \
96     MULADD_AB((h), (m), (l), (a)[2], (b)[4]);   /* 2 + 4 = 6 */ \
97     MULADD_AB((h), (m), (l), (a)[1], (b)[5]);   /* 1 + 5 = 6 */ \
98     MULADD_AB((h), (m), (l), (a)[0], (b)[6]);   /* 0 + 6 = 6 */ \
99     (r)[6] = (l);                               /* 6 */         \
100     (l) = 0;                                                    \
101     MULADD_AB((l), (h), (m), (a)[0], (b)[7]);   /* 0 + 7 = 7 */ \
102     MULADD_AB((l), (h), (m), (a)[1], (b)[6]);   /* 1 + 6 = 7 */ \
103     MULADD_AB((l), (h), (m), (a)[2], (b)[5]);   /* 2 + 5 = 7 */ \
104     MULADD_AB((l), (h), (m), (a)[3], (b)[4]);   /* 3 + 4 = 7 */ \
105     MULADD_AB((l), (h), (m), (a)[4], (b)[3]);   /* 4 + 3 = 7 */ \
106     MULADD_AB((l), (h), (m), (a)[5], (b)[2]);   /* 5 + 2 = 7 */ \
107     MULADD_AB((l), (h), (m), (a)[6], (b)[1]);   /* 6 + 1 = 7 */ \
108     MULADD_AB((l), (h), (m), (a)[7], (b)[0]);   /* 7 + 0 = 7 */ \
109     (r)[7] = (m);                               /* 7 */         \
110     (m) = 0;                                                    \
111     MULADD_AB((m), (l), (h), (a)[7], (b)[1]);   /* 7 + 1 = 8 */ \
112     MULADD_AB((m), (l), (h), (a)[6], (b)[2]);   /* 6 + 2 = 8 */ \
113     MULADD_AB((m), (l), (h), (a)[5], (b)[3]);   /* 5 + 3 = 8 */ \
114     MULADD_AB((m), (l), (h), (a)[4], (b)[4]);   /* 4 + 4 = 8 */ \
115     MULADD_AB((m), (l), (h), (a)[3], (b)[5]);   /* 3 + 5 = 8 */ \
116     MULADD_AB((m), (l), (h), (a)[2], (b)[6]);   /* 2 + 6 = 8 */ \
117     MULADD_AB((m), (l), (h), (a)[1], (b)[7]);   /* 1 + 7 = 8 */ \
118     (r)[8] = (h);                               /* 8 */         \
119     (h) = 0;                                                    \
120 } while (0)
121 
SqrComba8(BN_UINT * r,const BN_UINT * a)122 static void SqrComba8(BN_UINT *r, const BN_UINT *a)
123 {
124     BN_UINT h = 0;
125     BN_UINT m = 0;
126     BN_UINT l = 0;
127 
128     SQR_COMBA_BEGIN_1(r, a, h, m, l);
129     SQR_COMBA_BEGIN_2(r, a, h, m, l);
130 
131     MULADD_AB2(h, m, l, a[0], a[6]);    /* 0 + 6 = 6 */
132     MULADD_AB2(h, m, l, a[1], a[5]);    /* 1 + 5 = 6 */
133     MULADD_AB2(h, m, l, a[2], a[4]);    /* 2 + 4 = 6 */
134     SQRADD_A(h, m, l, a[3]);            /* 3 + 3 = 6 */
135     r[6] = l;                           /* 6 */
136     l = 0;
137 
138     MULADD_AB2(l, h, m, a[3], a[4]);    /* 3 + 4 = 7 */
139     MULADD_AB2(l, h, m, a[2], a[5]);    /* 2 + 5 = 7 */
140     MULADD_AB2(l, h, m, a[1], a[6]);    /* 1 + 6 = 7 */
141     MULADD_AB2(l, h, m, a[0], a[7]);    /* 0 + 7 = 7 */
142     r[7] = m;                           /* 7 */
143     m = 0;
144 
145     MULADD_AB2(m, l, h, a[1], a[7]);    /* 1 + 7 = 8 */
146     MULADD_AB2(m, l, h, a[2], a[6]);    /* 2 + 6 = 8 */
147     MULADD_AB2(m, l, h, a[3], a[5]);    /* 3 + 5 = 8 */
148     SQRADD_A(m, l, h, a[4]);            /* 4 + 4 = 8 */
149     r[8] = h;                           /* 8 */
150     h = 0;
151 
152     MULADD_AB2(h, m, l, a[4], a[5]);    /* 4 + 5 = 9 */
153     MULADD_AB2(h, m, l, a[3], a[6]);    /* 3 + 6 = 9 */
154     MULADD_AB2(h, m, l, a[2], a[7]);    /* 2 + 7 = 9 */
155     r[9] = l;                           /* 9 */
156     l = 0;
157 
158     MULADD_AB2(l, h, m, a[3], a[7]);    /* 3 + 7 = 10 */
159     MULADD_AB2(l, h, m, a[4], a[6]);    /* 4 + 6 = 10 */
160     SQRADD_A(l, h, m, a[5]);            /* 5 + 5 = 10 */
161     r[10] = m;                          /* 10 */
162     m = 0;
163 
164     MULADD_AB2(m, l, h, a[5], a[6]);    /* 5 + 6 = 11 */
165     MULADD_AB2(m, l, h, a[4], a[7]);    /* 4 + 7 = 11 */
166     r[11] = h;                          /* 11 */
167     h = 0;
168 
169     MULADD_AB2(h, m, l, a[5], a[7]);    /* 5 + 7 = 12 */
170     SQRADD_A(h, m, l, a[6]);            /* 6 + 6 = 12 */
171     r[12] = l;                          /* 12 */
172     l = 0;
173 
174     MULADD_AB2(l, h, m, a[6], a[7]);    /* 6 + 7 = 13 */
175     r[13] = m;                          /* 13 */
176     m = 0;
177 
178     SQRADD_A(m, l, h, a[7]);            /* 7 + 7 = 14 */
179     r[14] = h;                          /* 14 */
180     r[15] = l;                          /* 15 */
181 }
182 
SqrComba6(BN_UINT * r,const BN_UINT * a)183 void SqrComba6(BN_UINT *r, const BN_UINT *a)
184 {
185     BN_UINT h = 0;
186     BN_UINT m = 0;
187     BN_UINT l = 0;
188 
189     SQR_COMBA_BEGIN_1(r, a, h, m, l);
190     SQR_COMBA_BEGIN_2(r, a, h, m, l);
191 
192     MULADD_AB2(h, m, l, a[1], a[5]);    /* 1 + 5 = 6 */
193     MULADD_AB2(h, m, l, a[2], a[4]);    /* 2 + 4 = 6 */
194     SQRADD_A(h, m, l, a[3]);            /* 3 + 3 = 6 */
195     r[6] = l;                           /* 6 */
196     l = 0;
197 
198     MULADD_AB2(l, h, m, a[3], a[4]);    /* 3 + 4 = 7 */
199     MULADD_AB2(l, h, m, a[2], a[5]);    /* 2 + 5 = 7 */
200     r[7] = m;                           /* 7 */
201     m = 0;
202 
203     MULADD_AB2(m, l, h, a[3], a[5]);    /* 3 + 5 = 8 */
204     SQRADD_A(m, l, h, a[4]);            /* 4 + 4 = 8 */
205     r[8] = h;                           /* 8 */
206     h = 0;
207 
208     MULADD_AB2(h, m, l, a[4], a[5]);    /* 4 + 5 = 9 */
209     r[9] = l;                           /* 9 */
210     l = 0;
211 
212     SQRADD_A(l, h, m, a[5]);            /* 5 + 5 = 10 */
213     r[10] = m;                          /* 10 */
214     r[11] = h;                          /* 11 */
215 }
216 
SqrComba4(BN_UINT * r,const BN_UINT * a)217 void SqrComba4(BN_UINT *r, const BN_UINT *a)
218 {
219     BN_UINT h = 0;
220     BN_UINT m = 0;
221     BN_UINT l = 0;
222 
223     SQR_COMBA_BEGIN_1(r, a, h, m, l);
224 
225     MULADD_AB2(l, h, m, a[1], a[3]);    /* 1 + 3 = 4 */
226     SQRADD_A(l, h, m, a[2]);            /* 2 + 2 = 4 */
227     r[4] = m;                           /* 4 */
228     m = 0;
229 
230     MULADD_AB2(m, l, h, a[2], a[3]);    /* 2 + 3 = 5 */
231     r[5] = h;                           /* 5 */
232     h = 0;
233 
234     SQRADD_A(h, m, l, a[3]);            /* 3 + 3 = 6 */
235     r[6] = l;                           /* 6 */
236     r[7] = m;                           /* 7 */
237 }
238 
SqrComba(BN_UINT * r,const BN_UINT * a,uint32_t size)239 static void SqrComba(BN_UINT *r, const BN_UINT *a, uint32_t size)
240 {
241     BN_UINT h = 0;
242     BN_UINT m = 0;
243     BN_UINT l = 0;
244     if (size == 3) {                        /* 3 */
245         SQRADD_A(h, m, l, a[0]);
246         r[0] = l;
247         l = 0;
248 
249         MULADD_AB2(l, h, m, a[0], a[1]);
250         r[1] = m;
251         m = 0;
252 
253         MULADD_AB2(m, l, h, a[0], a[2]);    /* 0 + 2 = 2 */
254         SQRADD_A(m, l, h, a[1]);            /* 1 + 1 = 2 */
255         r[2] = h;                           /* 2 */
256         h = 0;
257 
258         MULADD_AB2(h, m, l, a[1], a[2]);    /* 1 + 2 = 3 */
259         r[3] = l;                           /* 3 */
260         l = 0;
261 
262         SQRADD_A(l, h, m, a[2]);            /* 2 + 2 = 4 */
263         r[4] = m;                           /* 4 */
264         r[5] = h;                           /* 5 */
265         return;
266     }
267     if (size == 2) {                        /* 2 */
268         SQRADD_A(h, m, l, a[0]);
269         r[0] = l;
270         l = 0;
271 
272         MULADD_AB2(l, h, m, a[0], a[1]);
273         r[1] = m;
274         m = 0;
275 
276         SQRADD_A(m, l, h, a[1]);            /* 1 + 1 = 2 */
277         r[2] = h;                           /* 2 */
278         r[3] = l;                           /* 3 */
279         return;
280     }
281     SQR_A(r[1], r[0], a[0]);                /* size == 1 */
282 }
283 
MulComba8(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)284 void MulComba8(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
285 {
286     BN_UINT h = 0;
287     BN_UINT m = 0;
288     BN_UINT l = 0;
289 
290     MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
291     MUL_COMBA_BEGIN_2(r, a, b, h, m, l);
292     MUL_COMBA_BEGIN_3(r, a, b, h, m, l);
293 
294     MULADD_AB(h, m, l, a[2], b[7]);     /* 2 + 7 = 9 */
295     MULADD_AB(h, m, l, a[3], b[6]);     /* 3 + 6 = 9 */
296     MULADD_AB(h, m, l, a[4], b[5]);     /* 4 + 5 = 9 */
297     MULADD_AB(h, m, l, a[5], b[4]);     /* 5 + 4 = 9 */
298     MULADD_AB(h, m, l, a[6], b[3]);     /* 6 + 3 = 9 */
299     MULADD_AB(h, m, l, a[7], b[2]);     /* 7 + 2 = 9 */
300     r[9] = l;                           /* 9 */
301     l = 0;
302 
303     MULADD_AB(l, h, m, a[7], b[3]);     /* 7 + 3 = 10 */
304     MULADD_AB(l, h, m, a[6], b[4]);     /* 6 + 4 = 10 */
305     MULADD_AB(l, h, m, a[5], b[5]);     /* 5 + 5 = 10 */
306     MULADD_AB(l, h, m, a[4], b[6]);     /* 4 + 6 = 10 */
307     MULADD_AB(l, h, m, a[3], b[7]);     /* 3 + 7 = 10 */
308     r[10] = m;                          /* 10 */
309     m = 0;
310 
311     MULADD_AB(m, l, h, a[4], b[7]);     /* 4 + 7 = 11 */
312     MULADD_AB(m, l, h, a[5], b[6]);     /* 5 + 6 = 11 */
313     MULADD_AB(m, l, h, a[6], b[5]);     /* 6 + 5 = 11 */
314     MULADD_AB(m, l, h, a[7], b[4]);     /* 7 + 4 = 11 */
315     r[11] = h;                          /* 11 */
316     h = 0;
317 
318     MULADD_AB(h, m, l, a[7], b[5]);     /* 7 + 5 = 12 */
319     MULADD_AB(h, m, l, a[6], b[6]);     /* 6 + 6 = 12 */
320     MULADD_AB(h, m, l, a[5], b[7]);     /* 5 + 7 = 12 */
321     r[12] = l;                          /* 12 */
322     l = 0;
323 
324     MULADD_AB(l, h, m, a[6], b[7]);     /* 6 + 7 = 13 */
325     MULADD_AB(l, h, m, a[7], b[6]);     /* 7 + 6 = 13 */
326     r[13] = m;                          /* 13 */
327     m = 0;
328 
329     MULADD_AB(m, l, h, a[7], b[7]);     /* 7 + 7 = 14 */
330     r[14] = h;                          /* 14 */
331     r[15] = l;                          /* 15 */
332 }
333 
MulComba6(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)334 void MulComba6(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
335 {
336     BN_UINT h = 0;
337     BN_UINT m = 0;
338     BN_UINT l = 0;
339 
340     MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
341     MUL_COMBA_BEGIN_2(r, a, b, h, m, l);
342 
343     MULADD_AB(h, m, l, a[5], b[1]);     /* 5 + 1 = 6 */
344     MULADD_AB(h, m, l, a[4], b[2]);     /* 4 + 2 = 6 */
345     MULADD_AB(h, m, l, a[3], b[3]);     /* 3 + 3 = 6 */
346     MULADD_AB(h, m, l, a[2], b[4]);     /* 2 + 4 = 6 */
347     MULADD_AB(h, m, l, a[1], b[5]);     /* 1 + 5 = 6 */
348     r[6] = l;                           /* 6 */
349     l = 0;
350 
351     MULADD_AB(l, h, m, a[2], b[5]);     /* 2 + 5 = 7 */
352     MULADD_AB(l, h, m, a[3], b[4]);     /* 3 + 4 = 7 */
353     MULADD_AB(l, h, m, a[4], b[3]);     /* 4 + 3 = 7 */
354     MULADD_AB(l, h, m, a[5], b[2]);     /* 5 + 2 = 7 */
355     r[7] = m;                           /* 7 */
356     m = 0;
357 
358     MULADD_AB(m, l, h, a[5], b[3]);     /* 5 + 3 = 8 */
359     MULADD_AB(m, l, h, a[4], b[4]);     /* 4 + 4 = 8 */
360     MULADD_AB(m, l, h, a[3], b[5]);     /* 3 + 5 = 8 */
361     r[8] = h;                           /* 8 */
362     h = 0;
363 
364     MULADD_AB(h, m, l, a[4], b[5]);     /* 4 + 5 = 9 */
365     MULADD_AB(h, m, l, a[5], b[4]);     /* 5 + 4 = 9 */
366     r[9] = l;                           /* 9 */
367     l = 0;
368 
369     MULADD_AB(l, h, m, a[5], b[5]);     /* 5 + 5 = 10 */
370     r[10] = m;                          /* 10 */
371     r[11] = h;                          /* 11 */
372 }
373 
MulComba4(BN_UINT * r,const BN_UINT * a,const BN_UINT * b)374 void MulComba4(BN_UINT *r, const BN_UINT *a, const BN_UINT *b)
375 {
376     BN_UINT h = 0;
377     BN_UINT m = 0;
378     BN_UINT l = 0;
379 
380     MUL_COMBA_BEGIN_1(r, a, b, h, m, l);
381 
382     MULADD_AB(l, h, m, a[3], b[1]);    /* 3 + 1 = 4 */
383     MULADD_AB(l, h, m, a[2], b[2]);    /* 2 + 2 = 4 */
384     MULADD_AB(l, h, m, a[1], b[3]);    /* 1 + 3 = 4 */
385     r[4] = m;                          /* 4 */
386     m = 0;
387 
388     MULADD_AB(m, l, h, a[2], b[3]);    /* 2 + 3 = 5 */
389     MULADD_AB(m, l, h, a[3], b[2]);    /* 3 + 2 = 5 */
390     r[5] = h;                          /* 5 */
391     h = 0;
392 
393     MULADD_AB(h, m, l, a[3], b[3]);    /* 3 + 3 = 6 */
394     r[6] = l;                          /* 6 */
395     r[7] = m;                          /* 7 */
396 }
397 
MulComba(BN_UINT * r,const BN_UINT * a,const BN_UINT * b,uint32_t size)398 static void MulComba(BN_UINT *r, const BN_UINT *a, const BN_UINT *b, uint32_t size)
399 {
400     BN_UINT h = 0;
401     BN_UINT m = 0;
402     BN_UINT l = 0;
403     if (size == 3) {                        /* 3 */
404         MULADD_AB(h, m, l, a[0], b[0]);
405         r[0] = l;
406         l = 0;
407 
408         MULADD_AB(l, h, m, a[0], b[1]);
409         MULADD_AB(l, h, m, a[1], b[0]);
410         r[1] = m;
411         m = 0;
412 
413         MULADD_AB(m, l, h, a[2], b[0]);     /* 2 + 0 = 2 */
414         MULADD_AB(m, l, h, a[1], b[1]);     /* 1 + 1 = 2 */
415         MULADD_AB(m, l, h, a[0], b[2]);     /* 0 + 2 = 2 */
416         r[2] = h;
417         h = 0;
418 
419         MULADD_AB(h, m, l, a[1], b[2]);     /* 1 + 2 = 3 */
420         MULADD_AB(h, m, l, a[2], b[1]);     /* 2 + 1 = 3 */
421         r[3] = l;                           /* 3 */
422         l = 0;
423 
424         MULADD_AB(l, h, m, a[2], b[2]);     /* 2 + 2 = 4 */
425         r[4] = m;                           /* 4 */
426         r[5] = h;                           /* 5 */
427         return;
428     }
429     if (size == 2) {                        /* 2 */
430         MULADD_AB(h, m, l, a[0], b[0]);
431         r[0] = l;
432         l = 0;
433 
434         MULADD_AB(l, h, m, a[0], b[1]);
435         MULADD_AB(l, h, m, a[1], b[0]);
436         r[1] = m;
437         m = 0;
438 
439         MULADD_AB(m, l, h, a[1], b[1]);
440         r[2] = h;                           /* 2 */
441         r[3] = l;                           /* 3 */
442         return;
443     }
444     MUL_AB(r[1], r[0], a[0], b[0]);         /* size == 1 */
445 }
446 
SpaceSize(uint32_t size)447 uint32_t SpaceSize(uint32_t size)
448 {
449     uint32_t base = 8; /* Perform 8x batch processing */
450     while (size > base) {
451         base <<= 1;
452     }
453     return base * 4; /* 2x expansion. Each layer requires 2 * size temporary space, 2 * 2 = 4 */
454 }
455 
456 /* compare BN array.
457  * return 0, if a == b
458  * return 1, if a > b
459  * return -1, if a < b
460  */
BinCmpSame(const BN_UINT * a,const BN_UINT * b,uint32_t size)461 static int32_t BinCmpSame(const BN_UINT *a, const BN_UINT *b, uint32_t size)
462 {
463     int64_t idx = (int64_t)size;
464     while (--idx >= 0) {
465         if (a[idx] != b[idx]) {
466             return a[idx] > b[idx] ? 1 : -1;
467         }
468     }
469     return 0;
470 }
471 
472 /* The caller promised that aSize >= bSize.
473  * r = ABS(a - b). Need to ensure that aSize == bSize + (0 || 1).
474  * return 0 if a > b
475  * return 1 if a <= b
476  */
ABS_Sub(BN_UINT * t,const BN_UINT * a,uint32_t aSize,const BN_UINT * b,uint32_t bSize)477 static uint32_t ABS_Sub(BN_UINT *t, const BN_UINT *a, uint32_t aSize, const BN_UINT *b, uint32_t bSize)
478 {
479     int32_t ret;
480     do {
481         if (aSize > bSize) {
482             t[bSize] = a[bSize]; /* bSize = aSize - 1 */
483             if (a[bSize] > 0) {
484                 ret = 1;
485                 break;
486             }
487         }
488         ret = BinCmpSame(a, b, bSize);
489     } while (0);
490     if (ret > 0) {
491         BN_UINT borrow = BinSub(t, a, b, bSize);
492         if (aSize > bSize) { /* When the length difference exists and a > b exists, the borrowing is processed. */
493             t[bSize] -= borrow;
494         }
495         return 0;
496     } else {
497         BinSub(t, b, a, bSize);
498         return 1;
499     }
500     return 0;
501 }
502 
503 /** Only aSize == bSize is supported. This interface will recurse. The recursion depth is O(deep) = log2(size)
504  *  Ensure that space >= SpaceSize(size)
505  *  (ah|al * bh|bl) = (((ah*bh) << 2) + (((ah*bh) + (al*bl) - (ah - al)(bh - bl)) << 1) + (al*bl))
506  */
MulConquer(BN_UINT * r,const BN_UINT * a,const BN_UINT * b,uint32_t size,BN_UINT * space,bool consttime)507 void MulConquer(BN_UINT *r, const BN_UINT *a, const BN_UINT *b, uint32_t size, BN_UINT *space, bool consttime)
508 {
509     if (!consttime) {
510         if (size == 8) { /* Perform 8x batch processing */
511             MulComba8(r, a, b);
512             return;
513         }
514         if (size == 6) { /* Perform 6x batch processing */
515             MulComba6(r, a, b);
516             return;
517         }
518         if (size == 4) { /* Perform 4x batch processing */
519             MulComba4(r, a, b);
520             return;
521         }
522         if (size < 4) { /* Less than 4, simple processing */
523             MulComba(r, a, b, size);
524             return;
525         }
526     } else if (size <= 8) { /* Calculate if the block size is smaller than 8. */
527         BinMul(r, size << 1, a, size, b, size);
528         return;
529     }
530 
531     /* truncates the length of the low bits of the BigNum, that is the length of al bl. */
532     const uint32_t sizeLo = size >> 1;
533     const uint32_t sizeLo2 = sizeLo << 1;
534     const uint32_t shift1 = sizeLo;          /* (((ah*bh) + (al*bl) - (ah - al)(bh - bl)) << 1) location */
535     const uint32_t shift2 = shift1 << 1;     /* ((ah*bh) << 2) location */
536     /* truncates the length of the high bits of the BigNum, that is the length of ah bh. */
537     const uint32_t sizeHi = size - sizeLo;
538     const uint32_t sizeHi2 = sizeHi << 1;
539 
540     /* Split the input 'space'. The current function uses tmp1 and tmp2,
541      * and the remaining newspace is used by the lower layer.
542      * space = tmp1_lo..tmp1_hi | tmp2_lo..tmp2_hi | newSpace, sizeof(tmp1_lo) == sizeHi.
543      */
544     BN_UINT *tmp1 = space;
545     BN_UINT *tmp2 = tmp1 + sizeHi2;
546     BN_UINT *newSpace = tmp2 + sizeHi2;
547 
548     /* tmp2_lo = (ah-al) */
549     uint32_t sign = ABS_Sub(tmp2, a + shift1, sizeHi, a, sizeLo);
550     /* tmp2_hi = (bh-bl) */
551     sign ^= ABS_Sub(tmp2 + sizeHi, b + shift1, sizeHi, b, sizeLo);
552 
553     MulConquer(r,          a,          b,             sizeLo, newSpace, consttime); /* calculate (al*bl) */
554     MulConquer(r + shift2, a + shift1, b + shift1,    sizeHi, newSpace, consttime); /* calculate (ah*bh) */
555     MulConquer(tmp1,       tmp2,       tmp2 + sizeHi, sizeHi, newSpace, consttime); /* calculate (ah-al)(bh-bl) */
556     /* At this time r has stored ((ah*bh) << 2) and (al*bl) */
557     /* carry should be added in (r + shift1)[sizeHi * 2] */
558     /* tmp2 is (ah*bh) + (al*bl), but the processing length here is sizeLo * 2 */
559     BN_UINT carry = BinAdd(tmp2, r, r + shift2, sizeLo2);
560     if (sizeHi > sizeLo) {
561         /* If there is a length difference, the length of (ah*bh) is sizeLo * 2 + 2,
562            and the tail of (ah*bh) needs to be processed. */
563         /* point to (r + shift2)[sizeLo * 2], the unprocessed tail of (ah*bh) */
564         const uint32_t position = shift2 + (sizeLo2);
565         tmp2[sizeLo2] = r[position] + carry;
566         carry = (tmp2[sizeLo2] < carry) ? 1 : 0;
567         /* continue the processing */
568         tmp2[(sizeLo2) + 1] = r[position + 1] + carry;
569         carry = (tmp2[sizeLo2 + 1] < carry) ? 1 : 0;
570     }
571     /* tmp1 = (ah*bh) + (al*bl) - (ah-al)(bh-bl), tmp2 is (ah*bh) + (al*bl) */
572     if (sign == 1) {
573         carry += BinAdd(tmp1, tmp2, tmp1, sizeHi2);
574     } else {
575         carry -= BinSub(tmp1, tmp2, tmp1, sizeHi2);
576     }
577     /* finally r adds tmp1, that is (ah*bh) + (al*bl) - (ah - al)(bh - bl) */
578     carry += BinAdd(r + shift1, r + shift1, tmp1, sizeHi2);
579     for (uint32_t i = shift1 + sizeHi2; carry > 0 && i < (size << 1); i++) {
580         ADD_AB(carry, r[i], r[i], carry);
581     }
582 }
583 
584 /** This interface will recurse. The recursion depth is O(deep) = log2(size)
585  *  Ensure that space >= SpaceSize(size)
586  *  (x|y)^2 = ((x^2 << 2) + ((x^2 + y^2 - (x - y)^2)) << 1) + y^2)
587  */
SqrConquer(BN_UINT * r,const BN_UINT * a,uint32_t size,BN_UINT * space,bool consttime)588 void SqrConquer(BN_UINT *r, const BN_UINT *a, uint32_t size, BN_UINT *space, bool consttime)
589 {
590     if (!consttime) {
591         if (size == 8) { /* Perform 8x batch processing */
592             SqrComba8(r, a);
593             return;
594         }
595         if (size == 6) { /* Perform 6x batch processing */
596             SqrComba6(r, a);
597             return;
598         }
599         if (size == 4) { /* Perform 4x batch processing */
600             SqrComba4(r, a);
601             return;
602         }
603         if (size < 4) { /* Less than 4, simple processing */
604             SqrComba(r, a, size);
605             return;
606         }
607     } else if (size <= 8) { /* Calculate if the block size is smaller than 8. */
608         BinSqr(r, size << 1, a, size);
609         return;
610     }
611 
612     /* truncates the length of the high bits of the BigNum, that is the length of x. */
613     const uint32_t sizeHi = (size + 1) >> 1;
614     /* truncates the length of the low bits of the BigNum, that is the length of y. */
615     const uint32_t sizeLo = size >> 1;
616     const uint32_t shift1 = sizeLo;          /* ((x^2 + y^2 - (x - y)^2)) << 1) location */
617     const uint32_t shift2 = shift1 << 1;     /* ((x^2 << 2) location */
618 
619     /* Split the input 'space'. The current function uses tmp1 and tmp2,
620        and the remaining newspace is used by the lower layer. */
621     BN_UINT *tmp1 = space;
622     BN_UINT *tmp2 = tmp1 + (sizeHi << 1);
623     BN_UINT *newSpace = tmp2 + (sizeHi << 1);
624 
625     /* tmp2 is the upper bits of num minus the lower bits of num, (x-y) */
626     (void)ABS_Sub(tmp2, a + shift1, sizeHi, a, sizeLo);
627 
628     SqrConquer(r,          a,          sizeLo, newSpace, consttime); /* calculate y^2 */
629     SqrConquer(r + shift2, a + shift1, sizeHi, newSpace, consttime); /* calculate x^2 */
630     SqrConquer(tmp1,       tmp2,       sizeHi, newSpace, consttime); /* calculate (x-y)^2 */
631 
632     /* At this time r has stored (x^2 << 2) and y^2 */
633     /* carry should be added in (r + shift1)[sizeHi * 2] */
634     /* tmp2 = x^2 + y^2, but the processing length here is sizeLo * 2 */
635     BN_UINT carry = BinAdd(tmp2, r, r + shift2, sizeLo << 1);
636     if (sizeHi > sizeLo) {
637         /* If there is a length difference, the length of x^2 is sizeLo * 2 + 2,
638            and the tail of x^2 needs to be processed. */
639         /* point to (r + shift2)[sizeLo * 2], the unprocessed tail of x^2 */
640         const uint32_t position = shift2 + (sizeLo << 1);
641         tmp2[sizeLo << 1] = r[position] + carry;
642         carry = (tmp2[sizeLo << 1] < carry) ? 1 : 0;
643         /* continue the processing */
644         tmp2[(sizeLo << 1) + 1] = r[position + 1] + carry;
645         carry = (tmp2[(sizeLo << 1) + 1] < carry) ? 1 : 0;
646     }
647     /* tmp1 = x^2 + y^2 - (x-y)^2, tmp2 is x^2 + y^2 */
648     carry -= BinSub(tmp1, tmp2, tmp1, sizeHi << 1);
649     /* finally r adds x^2 + y^2 - (x-y)^2 */
650     carry += BinAdd(r + shift1, r + shift1, tmp1, sizeHi << 1);
651 
652     uint32_t i;
653     for (i = shift1 + (sizeHi << 1); carry > 0 && i < (size << 1); i++) {
654         ADD_AB(carry, r[i], r[i], carry);
655     }
656 }
657 #endif /* HITLS_CRYPTO_BN_COMBA */
658