1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_CURVE_NISTP521) && defined(HITLS_CRYPTO_NIST_USE_ACCEL)
18
19 #include <stdint.h>
20 #include "bsl_err_internal.h"
21 #include "crypt_bn.h"
22 #include "crypt_errno.h"
23 #include "crypt_utils.h"
24 #include "crypt_ecc.h"
25 #include "ecc_local.h"
26 #include "ecc_utils.h"
27 #include "bsl_util_internal.h"
28
29 #ifndef __SIZEOF_INT128__
30 #error "This nistp521 implementation require the compiler support 128-bits integer."
31 #endif
32
33 #define FELEM_BITS 521
34 /* Each element of a BigNum array is carried by 2 ^ 58 */
35 #define BASE_BITS 58
36 /* The length of a BigNum array is 9: 58 * 9 = 522 > 521 */
37 #define NUM_LIMBS 9
38 /* Mask with 58 bits */
39 #define MASK_58BITS ((uint64_t)0x3FFFFFFFFFFFFFF)
40 /* Mask with 57 bits */
41 #define MASK_57BITS ((uint64_t)0x1FFFFFFFFFFFFFF)
42 /* The pre-calculation table of the G table has 16 points. */
43 #define TABLE_G_SIZE 16
44 /* The pre-calculation table of the P table has 17 points. */
45 #define TABLE_P_SIZE 17
46 /* Forcibly convert to uint128_t */
47 #define U128(x) ((uint128_t)(x))
48
49 /* Obtain the nth bit of a BigNum. The BigNum is stored in the uint64_t array in little-endian order. */
50 #define GET_ARRAY64_BIT(k, n) ((((k)->data)[(n) / 64] >> ((n) & 63)) & 1)
51
52 typedef struct {
53 uint64_t data[NUM_LIMBS];
54 } Array64;
55
56 typedef struct {
57 uint64_t data[NUM_LIMBS];
58 } Felem;
59
60 typedef struct {
61 uint128_t data[NUM_LIMBS];
62 } LongFelem;
63
64 typedef struct {
65 Felem x, y, z; /* Each point contains three coordinates x, y, and z. */
66 } Point;
67
68 /*
69 * The point type (Point) contains three field elements (Felem) (x, y, and z).
70 * Each field element consists of nine 64-bit data blocks (uint64_t).
71 * Point :pt
72 * Felem :x
73 * uint64_t :x[9]
74 * Felem :y
75 * uint64_t :y[9]
76 * Felem :z
77 * uint64_t :z[9]
78 */
79
FelemToArray64(Array64 * array,const Felem * felem)80 static inline void FelemToArray64(Array64 *array, const Felem *felem)
81 {
82 uint32_t shift = 0;
83 for (int32_t i = 0; i + 1 < NUM_LIMBS; i++) {
84 array->data[i] = (felem->data[i] >> shift) | (felem->data[i + 1] << (BASE_BITS - shift)); // i < 8, shift < 48
85 /* Felem is carried 1 every 58 bits, and array is carried 1 every 64 bits. The difference is 6 bits. */
86 shift += 6;
87 }
88 array->data[8] = felem->data[8] >> shift; /* felem->data[8] is the last data block. */
89 }
90
Array64ToFelem(Felem * felem,const Array64 * array)91 static inline void Array64ToFelem(Felem *felem, const Array64 *array)
92 {
93 uint32_t shift = 0;
94 felem->data[0] = array->data[0] & MASK_58BITS;
95 for (int32_t i = 1; i < NUM_LIMBS; i++) {
96 /* Felem is carried 1 every 58 bits, and array is carried 1 every 64 bits. The difference is 6 bits. */
97 shift += 6;
98 felem->data[i] = ((array->data[i - 1] >> (64 - shift)) | (array->data[i] << shift)) & MASK_58BITS;
99 }
100 }
101
FelemAssign(Felem * r,const Felem * a)102 static inline void FelemAssign(Felem *r, const Felem *a)
103 {
104 r->data[0] = a->data[0]; // r->data[0] take the value
105 r->data[1] = a->data[1]; // r->data[1] take the value
106 r->data[2] = a->data[2]; // r->data[2] take the value
107 r->data[3] = a->data[3]; // r->data[3] take the value
108 r->data[4] = a->data[4]; // r->data[4] take the value
109 r->data[5] = a->data[5]; // r->data[5] take the value
110 r->data[6] = a->data[6]; // r->data[6] take the value
111 r->data[7] = a->data[7]; // r->data[7] take the value
112 r->data[8] = a->data[8]; // r->data[8] take the value
113 }
114
FelemPointAssign(Point * ptR,const Point * ptIn)115 static inline void FelemPointAssign(Point *ptR, const Point *ptIn)
116 {
117 FelemAssign(&ptR->x, &ptIn->x);
118 FelemAssign(&ptR->y, &ptIn->y);
119 FelemAssign(&ptR->z, &ptIn->z);
120 }
121
FelemAssignWithMask(Felem * r,const Felem * a,uint64_t mask)122 static inline void FelemAssignWithMask(Felem *r, const Felem *a, uint64_t mask)
123 {
124 uint64_t rmask = ~mask;
125 r->data[0] = (a->data[0] & mask) | (r->data[0] & rmask); // r->data[0] Obtain a new value or remain unchanged.
126 r->data[1] = (a->data[1] & mask) | (r->data[1] & rmask); // r->data[1] Obtain a new value or remain unchanged.
127 r->data[2] = (a->data[2] & mask) | (r->data[2] & rmask); // r->data[2] Obtain a new value or remain unchanged.
128 r->data[3] = (a->data[3] & mask) | (r->data[3] & rmask); // r->data[3] Obtain a new value or remain unchanged.
129 r->data[4] = (a->data[4] & mask) | (r->data[4] & rmask); // r->data[4] Obtain a new value or remain unchanged.
130 r->data[5] = (a->data[5] & mask) | (r->data[5] & rmask); // r->data[5] Obtain a new value or remain unchanged.
131 r->data[6] = (a->data[6] & mask) | (r->data[6] & rmask); // r->data[6] Obtain a new value or remain unchanged.
132 r->data[7] = (a->data[7] & mask) | (r->data[7] & rmask); // r->data[7] Obtain a new value or remain unchanged.
133 r->data[8] = (a->data[8] & mask) | (r->data[8] & rmask); // r->data[8] Obtain a new value or remain unchanged.
134 }
135
FelemPointAssignWithMask(Point * ptR,const Point * ptIn,uint64_t mask)136 static inline void FelemPointAssignWithMask(Point *ptR, const Point *ptIn, uint64_t mask)
137 {
138 FelemAssignWithMask(&ptR->x, &ptIn->x, mask);
139 FelemAssignWithMask(&ptR->y, &ptIn->y, mask);
140 FelemAssignWithMask(&ptR->z, &ptIn->z, mask);
141 }
142
FelemSetLimb(Felem * felem,const uint64_t a)143 static inline void FelemSetLimb(Felem *felem, const uint64_t a)
144 {
145 felem->data[0] = a; // r->data[0] take the value of a
146 felem->data[1] = 0; // r->data[1] clear to 0
147 felem->data[2] = 0; // r->data[2] clear to 0
148 felem->data[3] = 0; // r->data[3] clear to 0
149 felem->data[4] = 0; // r->data[4] clear to 0
150 felem->data[5] = 0; // r->data[5] clear to 0
151 felem->data[6] = 0; // r->data[6] clear to 0
152 felem->data[7] = 0; // r->data[7] clear to 0
153 felem->data[8] = 0; // r->data[8] clear to 0
154 }
155
LongFelemMulLimb(LongFelem * r,const LongFelem * a,uint64_t limb)156 static inline void LongFelemMulLimb(LongFelem *r, const LongFelem *a, uint64_t limb)
157 {
158 r->data[0] = a->data[0] * limb; // r->data[0] take the value
159 r->data[1] = a->data[1] * limb; // r->data[1] take the value
160 r->data[2] = a->data[2] * limb; // r->data[2] take the value
161 r->data[3] = a->data[3] * limb; // r->data[3] take the value
162 r->data[4] = a->data[4] * limb; // r->data[4] take the value
163 r->data[5] = a->data[5] * limb; // r->data[5] take the value
164 r->data[6] = a->data[6] * limb; // r->data[6] take the value
165 r->data[7] = a->data[7] * limb; // r->data[7] take the value
166 r->data[8] = a->data[8] * limb; // r->data[8] take the value
167 }
168
FelemMulLimb(Felem * r,const Felem * a,uint64_t b)169 static inline void FelemMulLimb(Felem *r, const Felem *a, uint64_t b)
170 {
171 r->data[0] = a->data[0] * b; // r->data[0] take the value
172 r->data[1] = a->data[1] * b; // r->data[1] take the value
173 r->data[2] = a->data[2] * b; // r->data[2] take the value
174 r->data[3] = a->data[3] * b; // r->data[3] take the value
175 r->data[4] = a->data[4] * b; // r->data[4] take the value
176 r->data[5] = a->data[5] * b; // r->data[5] take the value
177 r->data[6] = a->data[6] * b; // r->data[6] take the value
178 r->data[7] = a->data[7] * b; // r->data[7] take the value
179 r->data[8] = a->data[8] * b; // r->data[8] take the value
180 }
181
FelemAdd(Felem * r,const Felem * a,const Felem * b)182 static inline void FelemAdd(Felem *r, const Felem *a, const Felem *b)
183 {
184 r->data[0] = a->data[0] + b->data[0]; // r->data[0] take the value
185 r->data[1] = a->data[1] + b->data[1]; // r->data[1] take the value
186 r->data[2] = a->data[2] + b->data[2]; // r->data[2] take the value
187 r->data[3] = a->data[3] + b->data[3]; // r->data[3] take the value
188 r->data[4] = a->data[4] + b->data[4]; // r->data[4] take the value
189 r->data[5] = a->data[5] + b->data[5]; // r->data[5] take the value
190 r->data[6] = a->data[6] + b->data[6]; // r->data[6] take the value
191 r->data[7] = a->data[7] + b->data[7]; // r->data[7] take the value
192 r->data[8] = a->data[8] + b->data[8]; // r->data[8] take the value
193 }
194
195 /*
196 * input:
197 * a->data[i] < 7*2^61
198 * b->data[i] < 2^60 - 2^3
199 * output:
200 * r->data[i] <= max(a->data[i]) + 2^61 - 2^3
201 */
FelemSub(Felem * r,const Felem * a,const Felem * b)202 static inline void FelemSub(Felem *r, const Felem *a, const Felem *b)
203 {
204 r->data[0] = a->data[0] + (MASK_58BITS << 3) - b->data[0]; // b->data[0] < 2^61 - 2^3
205 r->data[1] = a->data[1] + (MASK_58BITS << 3) - b->data[1]; // b->data[1] < 2^61 - 2^3
206 r->data[2] = a->data[2] + (MASK_58BITS << 3) - b->data[2]; // b->data[2] < 2^61 - 2^3
207 r->data[3] = a->data[3] + (MASK_58BITS << 3) - b->data[3]; // b->data[3] < 2^61 - 2^3
208 r->data[4] = a->data[4] + (MASK_58BITS << 3) - b->data[4]; // b->data[4] < 2^61 - 2^3
209 r->data[5] = a->data[5] + (MASK_58BITS << 3) - b->data[5]; // b->data[5] < 2^61 - 2^3
210 r->data[6] = a->data[6] + (MASK_58BITS << 3) - b->data[6]; // b->data[6] < 2^61 - 2^3
211 r->data[7] = a->data[7] + (MASK_58BITS << 3) - b->data[7]; // b->data[7] < 2^61 - 2^3
212 r->data[8] = a->data[8] + (MASK_57BITS << 3) - b->data[8]; // b->data[8] < 2^60 - 2^3
213 }
214 /*
215 * input:
216 * r->data[i] < 2^127
217 * a->data[i] < 2^64 - 2^6
218 * output:
219 * r->data[i] <= r->data[i] + 2^64 - 2^6
220 */
LongFelemDiff(LongFelem * r,const Felem * a)221 static inline void LongFelemDiff(LongFelem *r, const Felem *a)
222 {
223 r->data[0] += (MASK_58BITS << 6) - a->data[0]; // a->data[0] < 2^64 - 2^6
224 r->data[1] += (MASK_58BITS << 6) - a->data[1]; // a->data[1] < 2^64 - 2^6
225 r->data[2] += (MASK_58BITS << 6) - a->data[2]; // a->data[2] < 2^64 - 2^6
226 r->data[3] += (MASK_58BITS << 6) - a->data[3]; // a->data[3] < 2^64 - 2^6
227 r->data[4] += (MASK_58BITS << 6) - a->data[4]; // a->data[4] < 2^64 - 2^6
228 r->data[5] += (MASK_58BITS << 6) - a->data[5]; // a->data[5] < 2^64 - 2^6
229 r->data[6] += (MASK_58BITS << 6) - a->data[6]; // a->data[6] < 2^64 - 2^6
230 r->data[7] += (MASK_58BITS << 6) - a->data[7]; // a->data[7] < 2^64 - 2^6
231 r->data[8] += (MASK_57BITS << 6) - a->data[8]; // a->data[8] < 2^63 - 2^6
232 }
233
FelemNeg(Felem * r,const Felem * a)234 static inline void FelemNeg(Felem *r, const Felem *a)
235 {
236 r->data[0] = (MASK_58BITS << 3) - a->data[0]; // a->data[0], r->data[0] < 2^61 - 2^3
237 r->data[1] = (MASK_58BITS << 3) - a->data[1]; // a->data[1], r->data[1] < 2^61 - 2^3
238 r->data[2] = (MASK_58BITS << 3) - a->data[2]; // a->data[2], r->data[2] < 2^61 - 2^3
239 r->data[3] = (MASK_58BITS << 3) - a->data[3]; // a->data[3], r->data[3] < 2^61 - 2^3
240 r->data[4] = (MASK_58BITS << 3) - a->data[4]; // a->data[4], r->data[4] < 2^61 - 2^3
241 r->data[5] = (MASK_58BITS << 3) - a->data[5]; // a->data[5], r->data[5] < 2^61 - 2^3
242 r->data[6] = (MASK_58BITS << 3) - a->data[6]; // a->data[6], r->data[6] < 2^61 - 2^3
243 r->data[7] = (MASK_58BITS << 3) - a->data[7]; // a->data[7], r->data[7] < 2^61 - 2^3
244 r->data[8] = (MASK_57BITS << 3) - a->data[8]; // a->data[8], r->data[8] < 2^60 - 2^3
245 }
246
247 /* Calculate r = a / 2, which can be regarded as a cyclic right shift by one bit in the modulo P(2^521-1) field. */
FelemDivTwo(Felem * r,const Felem * a)248 static inline void FelemDivTwo(Felem *r, const Felem *a)
249 {
250 const uint64_t *pa = a->data;
251 r->data[0] = (pa[0] >> 1) + ((pa[1] & 1) << 57); // The 57th bit plus the LSB of pa[1]
252 r->data[1] = (pa[1] >> 1) + ((pa[2] & 1) << 57); // The 57th bit plus the LSB of pa[2]
253 r->data[2] = (pa[2] >> 1) + ((pa[3] & 1) << 57); // The 57th bit plus the LSB of pa[3]
254 r->data[3] = (pa[3] >> 1) + ((pa[4] & 1) << 57); // The 57th bit plus the LSB of pa[4]
255 r->data[4] = (pa[4] >> 1) + ((pa[5] & 1) << 57); // The 57th bit plus the LSB of pa[5]
256 r->data[5] = (pa[5] >> 1) + ((pa[6] & 1) << 57); // The 57th bit plus the LSB of pa[6]
257 r->data[6] = (pa[6] >> 1) + ((pa[7] & 1) << 57); // The 57th bit plus the LSB of pa[7]
258 r->data[7] = (pa[7] >> 1) + ((pa[8] & 1) << 57); // The 57th bit plus the LSB of pa[8]
259 r->data[8] = (pa[8] >> 1) + ((pa[0] & 1) << 56); // The 56th bit plus the LSB of pa[0]
260 }
261
262 /*
263 * reduce to prevent subsequent calculation overflow.
264 * input:
265 * in[i] < 2^128
266 * output:
267 * out[i] < 2^59 + 2^14
268 */
FelemReduce(Felem * r,const LongFelem * a)269 static void FelemReduce(Felem *r, const LongFelem *a)
270 {
271 uint64_t tmp;
272 const uint32_t shift2 = BASE_BITS * 2; // 116 = 58 * 2
273 r->data[0] = (uint64_t)(a->data[0] & MASK_58BITS); // r->data[0] < 2^58
274 r->data[1] = (uint64_t)(a->data[1] & MASK_58BITS); // r->data[1] < 2^58
275 r->data[2] = (uint64_t)(a->data[2] & MASK_58BITS); // r->data[2] < 2^58
276 r->data[3] = (uint64_t)(a->data[3] & MASK_58BITS); // r->data[3] < 2^58
277 r->data[4] = (uint64_t)(a->data[4] & MASK_58BITS); // r->data[4] < 2^58
278 r->data[5] = (uint64_t)(a->data[5] & MASK_58BITS); // r->data[5] < 2^58
279 r->data[6] = (uint64_t)(a->data[6] & MASK_58BITS); // r->data[6] < 2^58
280 r->data[7] = (uint64_t)(a->data[7] & MASK_58BITS); // r->data[7] < 2^58
281 r->data[8] = (uint64_t)(a->data[8] & MASK_58BITS); // r->data[8] < 2^58
282
283 r->data[1] += (uint64_t)(a->data[0] >> BASE_BITS) & MASK_58BITS; // r->data[1] < 2^59
284 r->data[2] += (uint64_t)(a->data[1] >> BASE_BITS) & MASK_58BITS; // r->data[2] < 2^59
285 r->data[3] += (uint64_t)(a->data[2] >> BASE_BITS) & MASK_58BITS; // r->data[3] < 2^59
286 r->data[4] += (uint64_t)(a->data[3] >> BASE_BITS) & MASK_58BITS; // r->data[4] < 2^59
287 r->data[5] += (uint64_t)(a->data[4] >> BASE_BITS) & MASK_58BITS; // r->data[5] < 2^59
288 r->data[6] += (uint64_t)(a->data[5] >> BASE_BITS) & MASK_58BITS; // r->data[6] < 2^59
289 r->data[7] += (uint64_t)(a->data[6] >> BASE_BITS) & MASK_58BITS; // r->data[7] < 2^59
290 r->data[8] += (uint64_t)(a->data[7] >> BASE_BITS) & MASK_58BITS; // r->data[8] < 2^59
291 // a->data[8] The most significant bits above 58bits correspond to the most significant bits above 522 bits.
292 tmp = (uint64_t)(a->data[8] >> BASE_BITS) & MASK_58BITS;
293 r->data[0] += tmp << 1; // r->data[0] < 3*2^58
294
295 r->data[2] += (uint64_t)(a->data[0] >> shift2); // r->data[2] < 2^59 + 2^6
296 r->data[3] += (uint64_t)(a->data[1] >> shift2); // r->data[3] < 2^59 + 2^6
297 r->data[4] += (uint64_t)(a->data[2] >> shift2); // r->data[4] < 2^59 + 2^6, add the upper bits of a[2]116 bits.
298 r->data[5] += (uint64_t)(a->data[3] >> shift2); // r->data[5] < 2^59 + 2^6, add the upper bits of a[3]116 bits.
299 r->data[6] += (uint64_t)(a->data[4] >> shift2); // r->data[6] < 2^59 + 2^6, add the upper bits of a[4]116 bits.
300 r->data[7] += (uint64_t)(a->data[5] >> shift2); // r->data[7] < 2^59 + 2^6, add the upper bits of a[5]116 bits.
301 r->data[8] += (uint64_t)(a->data[6] >> shift2); // r->data[8] < 2^59 + 2^6, add the upper bits of a[6]116 bits.
302 // a->data[7] The most significant bits above 116bits correspond to the most significant bits above 522 bits.
303 tmp = (uint64_t)(a->data[7] >> shift2);
304 r->data[0] += tmp << 1; // r->data[0] < 3*2^58 + 2^13
305 // a->data[8] The most significant bits above 116bits correspond to the most significant bits above 580 bits.
306 tmp = (uint64_t)(a->data[8] >> shift2);
307 r->data[1] += tmp << 1; // r->data[1] < 2^59 + 2^13
308
309 /* Considering that out[0] may be too large, carry needs to be continued. */
310 r->data[1] += r->data[0] >> BASE_BITS; // r->data[1] < 2^59 + 2^14
311 r->data[0] &= MASK_58BITS; // r->data[0] < 2^58
312 }
313 /*
314 * Reduce the number of bits to less than 58 to prevent subsequent operation overflow.
315 * input:
316 * felem->data[i] < 2^64 - 2^6
317 * output:
318 * felem->data[i] < 2^58
319 */
FelemShrink(Felem * felem)320 static void FelemShrink(Felem *felem)
321 {
322 uint64_t carry = 0; /* Carry value between Limb */
323
324 /* Reduce each limb to less than 58 bits */
325 for (int32_t i = 0; i < NUM_LIMBS - 1; i++) {
326 felem->data[i] += carry; /* plus the carry of the previous block */
327 carry = (uint64_t)(felem->data[i] >> BASE_BITS); /* Take the carry of this block. */
328 felem->data[i] &= MASK_58BITS; /* 58 bits reserved */
329 }
330 felem->data[NUM_LIMBS - 1] += carry;
331 carry = felem->data[NUM_LIMBS - 1] >> 57; /* 521 = 58 * 8 + 57, carry the upper bits to the lower bits */
332 felem->data[NUM_LIMBS - 1] &= MASK_57BITS; /* The upper bits are discarded, only the lower 521 bits are retained */
333
334 /* Add the bits above 521 to the lower bits. */
335 for (int32_t i = 0; i < NUM_LIMBS; i++) {
336 felem->data[i] += carry; /* plus the carry of the previous block */
337 carry = (uint64_t)(felem->data[i] >> BASE_BITS); /* Take the carry of this block. */
338 felem->data[i] &= MASK_58BITS; /* 58 bits reserved */
339 }
340 }
341
342 /*
343 * Reduce felem to a unique value less than P.
344 * input:
345 * felem->data[i] < 2^64 - 2^6
346 * output:
347 * felem->data[i] < 2^58
348 * Return value:
349 * If felem is 0, the mask is all 1s. Otherwise, the mask is 0.
350 */
FelemContract(Felem * felem)351 static uint64_t FelemContract(Felem *felem)
352 {
353 uint64_t notP = 0;
354 uint64_t notZero = 0;
355 uint64_t mask;
356 uint64_t carry;
357 FelemShrink(felem);
358
359 /* After the shrink command is executed, felem->data[i] < 2^58, but it may still be greater than 521 bits. */
360 carry = felem->data[NUM_LIMBS - 1] >> 57; /* 521 = 58 * 8 + 57, carry the upper bits to the lower bits */
361 felem->data[NUM_LIMBS - 1] &= MASK_57BITS;
362 /* Add the bits above 521 to the lower bits. */
363 for (int32_t i = 0; i < NUM_LIMBS; i++) {
364 felem->data[i] += carry; /* plus the carry of the previous block */
365 carry = (uint64_t)(felem->data[i] >> BASE_BITS); /* Take the carry of this block. */
366 felem->data[i] &= MASK_58BITS; /* 58 bits reserved */
367 }
368
369 /* Check whether the value is P. */
370 for (int32_t i = 0; i < NUM_LIMBS - 1; i++) {
371 notP |= felem->data[i] ^ MASK_58BITS; /* If the value of felem is P, the value remains 0. */
372 }
373 notP |= felem->data[NUM_LIMBS - 1] ^ MASK_57BITS;
374 /* The most significant bit is 1 only when notP = 0. In this case, the mask is 0. */
375 mask = 0 - ((0 - notP) >> 63); /* Shift rightwards by 63 bits and get the most significant bit. */
376
377 for (int32_t i = 0; i < NUM_LIMBS; i++) {
378 felem->data[i] &= mask; /* If the value is P, clear all the bits to 0 */
379 notZero |= felem->data[i]; /* an determine whether the value is zero. */
380 }
381 /* only when notZero == 0, the most significant bit is 1. In this case, each bit of the mask is all 1s. */
382 mask = ((0 - notZero) >> 63) - 1; /* Shift rightwards by 63 bits and get the most significant bit. */
383 return mask;
384 }
385
386 /* Convert a BigNum to the Felem *. Note that the value cannot be a negative number and cannot be greater than P. */
BN2Felem(Felem * r,const BN_BigNum * a)387 static int32_t BN2Felem(Felem *r, const BN_BigNum *a)
388 {
389 int32_t ret;
390 Array64 array = {0};
391 uint32_t len = NUM_LIMBS;
392 ret = BN_Bn2U64Array(a, array.data, &len);
393 if (ret != CRYPT_SUCCESS) {
394 return ret;
395 }
396 Array64ToFelem(r, &array);
397 return CRYPT_SUCCESS;
398 }
399
400 /* Felem * Convert to BigNum */
Felem2BN(BN_BigNum * r,const Felem * a)401 static int32_t Felem2BN(BN_BigNum *r, const Felem *a)
402 {
403 Array64 array = {0};
404 uint32_t len = NUM_LIMBS;
405 Felem felem;
406 FelemAssign(&felem, a);
407 FelemContract(&felem);
408 FelemToArray64(&array, &felem);
409 return BN_U64Array2Bn(r, array.data, len);
410 }
411
412 /*
413 * Calculate r = a * b
414 * input:
415 * a->data[i] < 2^63
416 * b->data[i] < 2^128 / (17*a[i])
417 * output:
418 * r->data[i] < 17(max(a[i]*b[i]))
419 */
FelemMul(LongFelem * r,const Felem * a,const Felem * b)420 static void FelemMul(LongFelem *r, const Felem *a, const Felem *b)
421 {
422 Felem ax2;
423 const uint64_t *pa = a->data;
424 const uint64_t *pb = b->data;
425 const uint64_t *p2 = ax2.data;
426 /* Because the modulo P is 2^521 - 1, the higher bits above the 521 bits
427 can be truncated and then added to the lower bits. */
428 FelemMulLimb(&ax2, a, 2);
429
430 r->data[0] = U128(pa[0]) * pb[0] + U128(p2[1]) * pb[8] + U128(p2[2]) * pb[7] + // 0 = 0+0, 1+8, 2+7 (mod 9)
431 U128(p2[3]) * pb[6] + U128(p2[4]) * pb[5] + U128(p2[5]) * pb[4] + // 0 = 3+6, 4+5, 5+4 (mod 9)
432 U128(p2[6]) * pb[3] + U128(p2[7]) * pb[2] + U128(p2[8]) * pb[1]; // 0 = 6+3, 7+2, 8+1 (mod 9)
433
434 r->data[1] = U128(pa[0]) * pb[1] + U128(pa[1]) * pb[0] + U128(p2[2]) * pb[8] + // 1 = 0+1, 1+0, 2+8 (mod 9)
435 U128(p2[3]) * pb[7] + U128(p2[4]) * pb[6] + U128(p2[5]) * pb[5] + // 1 = 3+7, 4+6, 5+5 (mod 9)
436 U128(p2[6]) * pb[4] + U128(p2[7]) * pb[3] + U128(p2[8]) * pb[2]; // 1 = 6+4, 7+3, 8+2 (mod 9)
437
438 r->data[2] = U128(pa[0]) * pb[2] + U128(pa[1]) * pb[1] + U128(pa[2]) * pb[0] + // 2 = 0+2, 1+1, 2+0 (mod 9)
439 U128(p2[3]) * pb[8] + U128(p2[4]) * pb[7] + U128(p2[5]) * pb[6] + // 2 = 3+8, 4+7, 5+6 (mod 9)
440 U128(p2[6]) * pb[5] + U128(p2[7]) * pb[4] + U128(p2[8]) * pb[3]; // 2 = 6+5, 7+4, 8+3 (mod 9)
441
442 r->data[3] = U128(pa[0]) * pb[3] + U128(pa[1]) * pb[2] + U128(pa[2]) * pb[1] + // 3 = 0+3, 1+2, 2+1 (mod 9)
443 U128(pa[3]) * pb[0] + U128(p2[4]) * pb[8] + U128(p2[5]) * pb[7] + // 3 = 3+0, 4+8, 5+7 (mod 9)
444 U128(p2[6]) * pb[6] + U128(p2[7]) * pb[5] + U128(p2[8]) * pb[4]; // 3 = 6+6, 7+5, 8+4 (mod 9)
445
446 r->data[4] = U128(pa[0]) * pb[4] + U128(pa[1]) * pb[3] + U128(pa[2]) * pb[2] + // 4 = 0+4, 1+3, 2+2 (mod 9)
447 U128(pa[3]) * pb[1] + U128(pa[4]) * pb[0] + U128(p2[5]) * pb[8] + // 4 = 3+1, 4+0, 5+8 (mod 9)
448 U128(p2[6]) * pb[7] + U128(p2[7]) * pb[6] + U128(p2[8]) * pb[5]; // 4 = 6+7, 7+6, 8+5 (mod 9)
449
450 r->data[5] = U128(pa[0]) * pb[5] + U128(pa[1]) * pb[4] + U128(pa[2]) * pb[3] + // 5 = 0+5, 1+4, 2+3 (mod 9)
451 U128(pa[3]) * pb[2] + U128(pa[4]) * pb[1] + U128(pa[5]) * pb[0] + // 5 = 3+2, 4+1, 5+0 (mod 9)
452 U128(p2[6]) * pb[8] + U128(p2[7]) * pb[7] + U128(p2[8]) * pb[6]; // 5 = 6+8, 7+7, 8+6 (mod 9)
453
454 r->data[6] = U128(pa[0]) * pb[6] + U128(pa[1]) * pb[5] + U128(pa[2]) * pb[4] + // 6 = 0+6, 1+5, 2+4 (mod 9)
455 U128(pa[3]) * pb[3] + U128(pa[4]) * pb[2] + U128(pa[5]) * pb[1] + // 6 = 3+3, 4+2, 5+1 (mod 9)
456 U128(pa[6]) * pb[0] + U128(p2[7]) * pb[8] + U128(p2[8]) * pb[7]; // 6 = 6+0, 7+8, 8+7 (mod 9)
457
458 r->data[7] = U128(pa[0]) * pb[7] + U128(pa[1]) * pb[6] + U128(pa[2]) * pb[5] + // 7 = 0+7, 1+6, 2+5 (mod 9)
459 U128(pa[3]) * pb[4] + U128(pa[4]) * pb[3] + U128(pa[5]) * pb[2] + // 7 = 3+4, 4+3, 5+2 (mod 9)
460 U128(pa[6]) * pb[1] + U128(pa[7]) * pb[0] + U128(p2[8]) * pb[8]; // 7 = 6+1, 7+0, 8+8 (mod 9)
461
462 r->data[8] = U128(pa[0]) * pb[8] + U128(pa[1]) * pb[7] + U128(pa[2]) * pb[6] + // 8 = 0+8, 1+7, 2+6 (mod 9)
463 U128(pa[3]) * pb[5] + U128(pa[4]) * pb[4] + U128(pa[5]) * pb[3] + // 8 = 3+5, 4+4, 5+3 (mod 9)
464 U128(pa[6]) * pb[2] + U128(pa[7]) * pb[1] + U128(pa[8]) * pb[0]; // 8 = 6+2, 7+1, 8+0 (mod 9)
465 }
466
467 /*
468 * Calculate r = a^2
469 * input:
470 * a->data[i] < 2^62 - 2^57
471 * output:
472 * r->data[i] < 17*max(a[i]*a[i])
473 */
FelemSqr(LongFelem * r,const Felem * a)474 static void FelemSqr(LongFelem *r, const Felem *a)
475 {
476 Felem ax2;
477 Felem ax4;
478 const uint64_t *pa = a->data;
479 const uint64_t *p2 = ax2.data;
480 const uint64_t *p4 = ax4.data;
481 /* Because the modulo P is 2^521 - 1, the higher bits above the 521 bits
482 can be truncated and then added to the lower bits. */
483 FelemMulLimb(&ax2, a, 2); /* ax2 is twice the value of a */
484 FelemMulLimb(&ax4, a, 4); /* ax4 is 4 times the value of a */
485
486 r->data[0] = U128(pa[0]) * pa[0]; // 0 = 0+0 (mod 9)
487 r->data[1] = U128(p2[5]) * pa[5]; // 1 = 5+5 (mod 9)
488 r->data[2] = U128(pa[1]) * pa[1]; // 2 = 1+1 (mod 9)
489 r->data[3] = U128(p2[6]) * pa[6]; // 3 = 6+6 (mod 9)
490 r->data[4] = U128(pa[2]) * pa[2]; // 4 = 2+2 (mod 9)
491 r->data[5] = U128(p2[7]) * pa[7]; // 5 = 7+7 (mod 9)
492 r->data[6] = U128(pa[3]) * pa[3]; // 6 = 3+3 (mod 9)
493 r->data[7] = U128(p2[8]) * pa[8]; // 7 = 8+8 (mod 9)
494 r->data[8] = U128(pa[4]) * pa[4]; // 8 = 4+4 (mod 9)
495
496 // r->data[0] < 17*49*2^114 < 2^124
497 // 0 = 1+8, 2+7, 3+6, 4+5 (mod 9)
498 r->data[0] += U128(p4[1]) * pa[8] + U128(p4[2]) * pa[7] + U128(p4[3]) * pa[6] + U128(p4[4]) * pa[5];
499 // 1 = 0+1, 2+8, 3+7, 4+6 (mod 9)
500 r->data[1] += U128(p2[0]) * pa[1] + U128(p4[2]) * pa[8] + U128(p4[3]) * pa[7] + U128(p4[4]) * pa[6];
501 // 2 = 0+2, 3+8, 4+7, 5+6 (mod 9)
502 r->data[2] += U128(p2[0]) * pa[2] + U128(p4[3]) * pa[8] + U128(p4[4]) * pa[7] + U128(p4[5]) * pa[6];
503 // 3 = 0+3, 1+2, 4+8, 5+7 (mod 9)
504 r->data[3] += U128(p2[0]) * pa[3] + U128(p2[1]) * pa[2] + U128(p4[4]) * pa[8] + U128(p4[5]) * pa[7];
505 // 4 = 0+4, 1+3, 5+8, 6+7 (mod 9)
506 r->data[4] += U128(p2[0]) * pa[4] + U128(p2[1]) * pa[3] + U128(p4[5]) * pa[8] + U128(p4[6]) * pa[7];
507 // 5 = 0+5, 1+4, 2+3, 6+8 (mod 9)
508 r->data[5] += U128(p2[0]) * pa[5] + U128(p2[1]) * pa[4] + U128(p2[2]) * pa[3] + U128(p4[6]) * pa[8];
509 // 6 = 0+6, 1+5, 2+4, 7+8 (mod 9)
510 r->data[6] += U128(p2[0]) * pa[6] + U128(p2[1]) * pa[5] + U128(p2[2]) * pa[4] + U128(p4[7]) * pa[8];
511 // 7 = 0+7, 1+6, 2+5, 3+4 (mod 9)
512 r->data[7] += U128(p2[0]) * pa[7] + U128(p2[1]) * pa[6] + U128(p2[2]) * pa[5] + U128(p2[3]) * pa[4];
513 // 8 = 0+8, 1+7, 2+6, 3+5 (mod 9)
514 r->data[8] += U128(p2[0]) * pa[8] + U128(p2[1]) * pa[7] + U128(p2[2]) * pa[6] + U128(p2[3]) * pa[5];
515 }
516
517 // Multiply and reduce
FelemMulReduce(Felem * r,const Felem * a,const Felem * b)518 static inline void FelemMulReduce(Felem *r, const Felem *a, const Felem *b)
519 {
520 LongFelem tmp;
521 FelemMul(&tmp, a, b);
522 FelemReduce(r, &tmp);
523 }
524
525 // Square and reduce
FelemSqrReduce(Felem * r,const Felem * in)526 static inline void FelemSqrReduce(Felem *r, const Felem *in)
527 {
528 LongFelem tmp;
529 FelemSqr(&tmp, in);
530 FelemReduce(r, &tmp);
531 }
532
533 /*
534 * Calculate r = 1/a (mod P)
535 * Fermat's Little Theorem:
536 * a^p = a mod p
537 * a^(p-1) = 1 mod p
538 * a^(p-2) = a^(-1) mod p
539 * Calculate the inverse modulus value according to this formula and:
540 * p = 2^521 - 1
541 * p - 2 = 2^521 - 3 = (2^519 - 1) << 2 + 1
542 */
FelemInv(Felem * r,const Felem * a)543 static void FelemInv(Felem *r, const Felem *a)
544 {
545 Felem tmp1, tmp2, tmp3;
546 int32_t bits;
547 /* Calculate a^e and update the e value until e = p - 2 */
548 FelemSqrReduce(&tmp1, a); /* (10) */
549 FelemMulReduce(&tmp1, &tmp1, a); /* (11) */
550
551 FelemSqrReduce(&tmp2, &tmp1); /* (110) */
552 FelemMulReduce(&tmp3, &tmp2, a); /* (111) is stored in tmp3 */
553 FelemSqrReduce(&tmp2, &tmp2); /* (1100) */
554 FelemMulReduce(&tmp1, &tmp2, &tmp1); /* (1111) */
555
556 FelemSqrReduce(&tmp2, &tmp1); /* (0001 1110) */
557 FelemSqrReduce(&tmp2, &tmp2); /* (0011 1100) */
558 FelemSqrReduce(&tmp2, &tmp2); /* (0111 1000) */
559 FelemMulReduce(&tmp3, &tmp2, &tmp3); /* (0111 1111) is stored in tmp3 */
560 FelemSqrReduce(&tmp2, &tmp2); /* (1111 0000) */
561 FelemMulReduce(&tmp1, &tmp2, &tmp1); /* 2^8 - 1 */
562
563 /* The current value of e is (11111111) The value consists of 8 bits */
564 bits = 8;
565
566 /* Perform the square & multiplication until the value of e becomes 2 ^ 512 - 1, that is, 512 bits */
567 while (bits < 512) {
568 FelemAssign(&tmp2, &tmp1);
569 for (int32_t i = 0; i < bits; i++) { /* e value shifts to the left */
570 FelemSqrReduce(&tmp2, &tmp2);
571 }
572 FelemMulReduce(&tmp1, &tmp2, &tmp1); /* e Change the lower bits 0 to 1, e = 2^bits - 1 */
573 /* In this case, the bit length of the e value becomes twice (* 2). */
574 bits *= 2;
575 }
576 /* In this case, the value of e is 2^512-1 */
577 for (int32_t i = 0; i < 7; i++) { /* e value shifts to the left by 7 bits */
578 FelemSqrReduce(&tmp1, &tmp1);
579 }
580 FelemMulReduce(&tmp1, &tmp1, &tmp3); /* 2^519 - 1, plus the previous &tmp3 */
581
582 FelemSqrReduce(&tmp1, &tmp1);
583 FelemSqrReduce(&tmp1, &tmp1); /* (2^519 - 1) << 2 */
584 FelemMulReduce(r, &tmp1, a); /* p - 2 */
585 }
586
587 /*
588 * "dbl-2001-b"
589 * delta = Z1^2
590 * gamma = Y1^2
591 * beta = X1*gamma
592 * alpha = 3*(X1-delta)*(X1+delta)
593 * X3 = alpha^2-8*beta
594 * Z3 = (Y1+Z1)^2-gamma-delta
595 * Y3 = alpha*(4*beta-X3)-8*gamma^2
596 */
597 /* Calculate the double point coordinates. */
FelemPointDouble(Point * pointOut,const Point * pointIn)598 static void FelemPointDouble(Point *pointOut, const Point *pointIn)
599 {
600 Felem delta, gamma, beta, alpha;
601 Felem tmp1, tmp2;
602 LongFelem ltmp1;
603
604 /* delta = Z1^2 */
605 FelemSqrReduce(&delta, &pointIn->z); // delta[i] < 2^59 + 2^14
606 /* gamma = Y1^2 */
607 FelemSqrReduce(&gamma, &pointIn->y); // gamma[i] < 2^59 + 2^14
608 /* beta = X1*gamma */
609 FelemMulReduce(&beta, &pointIn->x, &gamma); // beta[i] < 2^59 + 2^14
610
611 /* X1 - delta */
612 FelemSub(&tmp1, &pointIn->x, &delta); // tmp1[i] < 9*2^59 + 2^14
613 /* X1 + delta */
614 FelemAdd(&tmp2, &pointIn->x, &delta); // tmp2[i] < 2^60 + 2^15
615 /* 3*(X1 + delta) */
616 FelemMulLimb(&tmp2, &tmp2, 3); // tmp2[i] < 6*(2^59 + 2^14)
617 /* alpha = 3*(X1-delta)*(X1+delta) */
618 FelemMulReduce(&alpha, &tmp1, &tmp2); // alpha[i] < 2^59 + 2^14
619
620 /* alpha^2 */
621 FelemSqr(<mp1, &alpha); // ltmp1[i] < 2^125
622 /* 8*beta */
623 FelemMulLimb(&tmp2, &beta, 8); // tmp2[i] < 2^62 + 2^17
624 /* alpha^2-8*beta */
625 LongFelemDiff(<mp1, &tmp2); // ltmp1[i] < 2^126
626 /* X3 = alpha^2-8*beta */
627 FelemReduce(&pointOut->x, <mp1);
628
629 /* Y1+Z1 */
630 FelemAdd(&tmp1, &pointIn->y, &pointIn->z); // tmp1[i] < 2^60 + 2^15
631 /* (Y1+Z1)^2 */
632 FelemSqr(<mp1, &tmp1); // ltmp1[i] < 17*(2^60 + 2^15)
633 /* gamma+delta */
634 FelemAdd(&tmp2, &gamma, &delta); // tmp2[i] < 2^60 + 2^15
635 /* (Y1+Z1)^2 - gamma - delta */
636 LongFelemDiff(<mp1, &tmp2);
637 /* Z3 = (Y1+Z1)^2-gamma-delta */
638 FelemReduce(&pointOut->z, <mp1);
639
640 /* 4*beta */
641 FelemMulLimb(&tmp2, &beta, 4); // tmp2[i] < 2^61 + 2^16
642 /* 4*beta-X3 */
643 FelemSub(&tmp1, &tmp2, &pointOut->x); // tmp1[i] < 2^62 + 2^16
644 FelemShrink(&tmp1); // Subtraction and reduction process can be optimized
645 /* alpha*(4*beta-X3) */
646 FelemMul(<mp1, &alpha, &tmp1); // ltmp1[i] < 2^128
647 /* gamma^2 */
648 FelemSqrReduce(&tmp2, &gamma); // tmp2[i] < 2^59 + 2^14
649 /* 8*gamma^2 */
650 FelemMulLimb(&tmp1, &tmp2, 8); // tmp1[i] < 2^62 + 2^17
651 /* alpha*(4*beta-X3)-8*gamma^2 */
652 LongFelemDiff(<mp1, &tmp1);
653 /* Y3 = alpha*(4*beta-X3)-8*gamma^2 */
654 FelemReduce(&pointOut->y, <mp1);
655 }
656
657 /*
658 * "add-2007-bl"
659 * Z1Z1 = Z1^2
660 * Z2Z2 = Z2^2
661 * U1 = X1*Z2Z2
662 * S1 = Y1*Z2*Z2Z2
663 * U2 = X2*Z1Z1
664 * S2 = Y2*Z1*Z1Z1
665 * H = U2-U1
666 * r = 2*(S2-S1)
667 * I = (2*H)^2
668 * J = H*I
669 * V = U1*I
670 * X3 = r^2-J-2*V
671 * Y3 = r*(V-X3)-2*S1*J
672 * Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H
673 */
674 /* Calculate the point addition coordinates, pt3 = pt1 + pt2 */
FelemPointAdd(Point * pt3,const Point * pt1,const Point * pt2)675 static void FelemPointAdd(Point *pt3, const Point *pt1, const Point *pt2)
676 {
677 uint64_t pointEqual, xEqual, yEqual, z1Zero, z2Zero;
678 Felem z1z1, z2z2, u1, u2, s1, s2, h, r, i, j, v, tmp1;
679 Point res;
680 LongFelem ltmp1;
681
682 /* Z1Z1 = Z1^2 */
683 FelemSqrReduce(&z1z1, &pt1->z);
684 z1Zero = FelemContract(&z1z1); // z1z1[i] < 2^58
685
686 /* Z2Z2 = Z2^2 */
687 FelemSqrReduce(&z2z2, &pt2->z);
688 z2Zero = FelemContract(&z2z2); // z2z2[i] < 2^58
689
690 /* U1 = X1*Z2Z2 */
691 FelemMulReduce(&u1, &pt1->x, &z2z2); // u1[i] < 2^59 + 2^14
692
693 /* S1 = Y1*Z2*Z2Z2 */
694 FelemMulReduce(&tmp1, &pt1->y, &pt2->z);
695 FelemMulReduce(&s1, &tmp1, &z2z2); // s1[i] < 2^59 + 2^14
696
697 /* U2 = X2*Z1Z1 */
698 FelemMulReduce(&u2, &pt2->x, &z1z1); // u2[i] < 2^59 + 2^14
699
700 /* S2 = Y2*Z1*Z1Z1 */
701 FelemMulReduce(&tmp1, &pt2->y, &pt1->z);
702 FelemMulReduce(&s2, &tmp1, &z1z1); // s2[i] < 2^59 + 2^14
703
704 /* H = U2-U1 */
705 FelemSub(&h, &u2, &u1);
706 xEqual = FelemContract(&h); // h[i] < 2^58
707
708 /* r = 2*(S2-S1) */
709 FelemSub(&tmp1, &s2, &s1);
710 yEqual = FelemContract(&tmp1);
711 /* If the coordinates are equal, use the double point formula. */
712 pointEqual = (xEqual & yEqual & (~z1Zero) & (~z2Zero));
713 if (pointEqual != 0) {
714 FelemPointDouble(pt3, pt1);
715 return;
716 }
717 FelemMulLimb(&r, &tmp1, 2); // r[i] < 2^59
718
719 /* I = (2*h)^2 */
720 FelemMulLimb(&tmp1, &h, 2); // tmp1[i] < 2^59
721 FelemSqrReduce(&i, &tmp1);
722
723 /* J = H*I */
724 FelemMulReduce(&j, &h, &i); // j[i] < 2^59 + 2^14
725
726 /* v = U1*I */
727 FelemMulReduce(&v, &u1, &i); // v[i] < 2^59 + 2^14
728
729 /* X3 = r^2-j-2*v */
730 FelemSqr(<mp1, &r); // ltmp1[i] < 17*2^118
731 FelemMulLimb(&tmp1, &v, 2); // tmp1[i] < 2^60 + 2^15
732 FelemAdd(&tmp1, &tmp1, &j); // tmp1[i] < 3*(2^59 + 2^14)
733 LongFelemDiff(<mp1, &tmp1); // ltmp1 < 2^123
734 FelemReduce(&res.x, <mp1); // x[i] < 2^59 + 2^14
735
736 /* Y3 = r*(v-X3)-2*S1*j */
737 FelemSub(&tmp1, &v, &res.x); // tmp1[i] < 5*2^59 + 2^14
738 FelemMul(<mp1, &r, &tmp1); // ltmp1[i] < 17*(5*2^59 + 2^14)*(2^59) < 2^125
739 FelemMulReduce(&tmp1, &s1, &j); // tmp1[i] < 2^59 + 2^14
740 FelemMulLimb(&tmp1, &tmp1, 2); // tmp1[i] < 2^60 + 2^15
741 LongFelemDiff(<mp1, &tmp1); // ltmp1[i] < 2^125 + 2^64 - 2^6
742 FelemReduce(&res.y, <mp1); // y3[i] < 2^59 + 2^14
743
744 /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */
745 FelemAdd(&tmp1, &pt1->z, &pt2->z); // tmp1[i] < 2^60 + 2^15
746 FelemSqr(<mp1, &tmp1); // ltmp1[i] < 17*(2^120 + 2^76 + 2^30)
747 FelemAdd(&tmp1, &z1z1, &z2z2); // tmp1[i] < 2^59
748 LongFelemDiff(<mp1, &tmp1); // ltmp1[i] < 2^125
749 FelemReduce(&tmp1, <mp1); // tmp1[i] < 2^59 + 2^14
750 FelemMulReduce(&res.z, &tmp1, &h); // z3[i] < 2^59 + 2^14
751
752 FelemPointAssignWithMask(&res, pt2, z1Zero);
753 FelemPointAssignWithMask(&res, pt1, z2Zero);
754 FelemPointAssign(pt3, &res);
755 }
756
757 /*
758 * "madd-2007-bl"
759 * Z1Z1 = Z1^2
760 * U2 = X2*Z1Z1
761 * S2 = Y2*Z1*Z1Z1
762 * H = U2-X1
763 * r = 2*(S2-Y1)
764 * HH = H^2
765 * I = 4*HH
766 * J = H*I
767 * V = X1*I
768 * X3 = r^2-J-2*V
769 * Y3 = r*(V-X3)-2*Y1*J
770 * Z3 = (Z1+H)^2-Z1Z1-HH
771 */
772 /* Calculate the points coordinates addition in the mixed coordinate system, pt3 = pt1 + pt2, z2 == 1 */
FelemPointMixAdd(Point * pt3,const Point * pt1,const Point * pt2)773 static void FelemPointMixAdd(Point *pt3, const Point *pt1, const Point *pt2)
774 {
775 uint64_t pointEqual, xEqual, yEqual, z1Zero, z2Zero;
776 Felem z1z1, h, hh, r, i, j, v, tmp1, tmp2;
777 Point res;
778 LongFelem ltmp1;
779
780 /* Z1Z1 = Z1^2 */
781 FelemSqrReduce(&z1z1, &pt1->z);
782 z1Zero = FelemContract(&z1z1); // z1z1[i] < 2^58
783
784 FelemAssign(&tmp1, &pt2->z);
785 z2Zero = FelemContract(&tmp1);
786
787 /* U2 = X2*Z1Z1 */
788 FelemMulReduce(&tmp2, &pt2->x, &z1z1); // tmp2[i] < 2^59 + 2^14
789
790 /* S2 = Y2*Z1*Z1Z1 */
791 FelemMulReduce(&tmp1, &pt2->y, &pt1->z);
792 FelemMulReduce(&tmp1, &tmp1, &z1z1); // tmp1[i] < 2^59 + 2^14
793
794 /* H = U2-X1 */
795 FelemSub(&h, &tmp2, &pt1->x);
796 xEqual = FelemContract(&h); // h[i] < 2^58
797
798 /* r = 2*(S2-Y1) */
799 FelemSub(&tmp1, &tmp1, &pt1->y);
800 yEqual = FelemContract(&tmp1);
801 /* If the coordinates are equal, use the double point formula. */
802 pointEqual = (xEqual & yEqual & (~z1Zero) & (~z2Zero));
803 if (pointEqual != 0) {
804 FelemPointDouble(pt3, pt1);
805 return;
806 }
807 FelemMulLimb(&r, &tmp1, 2); // r[i] < 2^59
808
809 /* HH = H^2 */
810 FelemSqrReduce(&hh, &h); // hh[i] < 2^59 + 2^14
811
812 /* I = 4*HH */
813 FelemMulLimb(&i, &hh, 4); // i[i] < 2^61 + 2^16
814
815 /* J = H*I */
816 FelemMulReduce(&j, &h, &i); // j[i] < 2^59 + 2^14
817
818 /* V = X1*I */
819 FelemMulReduce(&v, &pt1->x, &i); // v[i] < 2^59 + 2^14
820
821 /* X3 = r^2-J-2*V */
822 FelemMulLimb(&tmp1, &v, 2); // tmp1[i] < 2^60 + 2^15
823 FelemAdd(&tmp2, &j, &tmp1); // tmp2[i] < 3*2^59 + 3*2^14
824 FelemSqr(<mp1, &r); // ltmp1[i] < 17*2^118
825 LongFelemDiff(<mp1, &tmp2); // ltmp1[i] < 2^123
826 FelemReduce(&res.x, <mp1); // x3[i] < 2^59 + 2^14
827
828 /* Y3 = r*(V-X3)-2*Y1*J */
829 FelemSub(&tmp1, &v, &res.x); // tmp1[i] < 5*2^59 + 2^14
830 FelemMul(<mp1, &r, &tmp1); // ltmp1[i] < 17*(5*2^59 + 2^14)*(2^59) < 2^125
831 FelemMulReduce(&tmp2, &pt1->y, &j); // tmp2[i] < 2^59 + 2^14
832 FelemMulLimb(&tmp1, &tmp2, 2); // tmp1[i] < 2^60 + 2^15
833 LongFelemDiff(<mp1, &tmp1); // ltmp1[i] < 2^125 + 2^64 - 2^6
834 FelemReduce(&res.y, <mp1); // y3[i] < 2^59 + 2^14
835
836 /* Z3 = (Z1+H)^2-Z1Z1-HH */
837 FelemAdd(&tmp1, &pt1->z, &h); // tmp1[i] < 3*2^59 + 2^14
838 FelemSqr(<mp1, &tmp1); // ltmp1[i] < 17*(9*2^118 + 3*2^74 + 2^28)
839 FelemAdd(&tmp2, &z1z1, &hh); // tmp2[i] < 3*2^59 + 2^14
840 LongFelemDiff(<mp1, &tmp2); // ltmp1[i] < 2^126
841 FelemReduce(&res.z, <mp1); // z3[i] < 2^59 + 2^14
842
843 FelemPointAssignWithMask(&res, pt2, z1Zero);
844 FelemPointAssignWithMask(&res, pt1, z2Zero);
845 FelemPointAssign(pt3, &res);
846 }
847
848 /*
849 * Y = 2*Y
850 * W = Z^4
851 * while (m > 0) {
852 * A = 3*(X^2-W)
853 * B = X*Y^2
854 * X = A^2-2*B
855 * Z = Z*Y
856 * m = m-1
857 * if (m > 0) {
858 * W = W*Y^4
859 * }
860 * Y = 2*A*(B-X)-Y^4
861 * }
862 * Y = Y/2
863 */
FelemPointMultDouble(Point * pointOut,const Point * pointIn,int32_t m)864 static void FelemPointMultDouble(Point *pointOut, const Point *pointIn, int32_t m)
865 {
866 Felem x, y, z;
867 Felem w, a, b;
868 Felem tmp1, tmp2, tmp3;
869 LongFelem ltmp1;
870 int32_t left = m;
871 if (left == 1) {
872 FelemPointDouble(pointOut, pointIn);
873 return;
874 }
875
876 FelemAssign(&x, &pointIn->x);
877 FelemAssign(&z, &pointIn->z);
878 /* Y = 2*Y */
879 FelemMulLimb(&y, &pointIn->y, 2); // y[i] < 2^60 + 2^15
880 /* W = Z^4 */
881 FelemSqrReduce(&tmp1, &pointIn->z);
882 FelemSqrReduce(&w, &tmp1); // w[i] < 2^59 + 2^14
883 while (left > 0) {
884 /* A = 3*(X^2-W) */
885 FelemSqr(<mp1, &x); // ltmp1[i] < 17*(2^118 + 2^74 + 2^28) < 17*(2^118 + 2^75)
886 LongFelemDiff(<mp1, &w); // ltmp1[i] < 18*2^118
887 LongFelemMulLimb(<mp1, <mp1, 3); // ltmp1[i] < 3*18*2^118 < 2^124
888 FelemReduce(&a, <mp1); // a[i] < 2^59 + 2^14
889
890 /* B = X*Y^2 */
891 FelemSqrReduce(&tmp3, &y); // tmp3[i] < 2^59 + 2^14, tmp3 = Y^2
892 FelemMulReduce(&b, &x, &tmp3); // b[i] < 2^59 + 2^14
893
894 /* X = A^2-2*B */
895 FelemSqr(<mp1, &a); // ltmp1[i] < 17*(2^118 + 2^74 + 2^28) < 17*(2^118 + 2^75)
896 FelemMulLimb(&tmp1, &b, 2); // tmp1[i] < 2^60 + 2^15
897 LongFelemDiff(<mp1, &tmp1); // ltmp1[i] < 18*2^118
898 FelemReduce(&x, <mp1); // x[i] < 2^59 + 2^14
899
900 /* Z = Z*Y */
901 FelemMulReduce(&z, &z, &y); // z[i] < 2^59 + 2^14
902 FelemSqrReduce(&tmp3, &tmp3); // tmp3[i] < 2^59 + 2^14, tmp3 = Y^4
903 left--;
904 if (left > 0) {
905 /* W = W*Y^4 */
906 FelemMulReduce(&w, &tmp3, &w); // w[i] < 2^59 + 2^14
907 }
908
909 /* Y = 2*A*(B-X)-Y^4 */
910 FelemMulLimb(&tmp1, &a, 2); // tmp1[i] < 2^60 + 2^15
911 FelemSub(&tmp2, &b, &x); // b[i] < 5*2^59 + 2^14
912 FelemMul(<mp1, &tmp1, &tmp2); // ltmp1[i] < 17*(5*2^119 + 6*2^74 + 2^29) < 2^126
913 LongFelemDiff(<mp1, &tmp3); // ltmp1[i] < 2^126 + 2^64
914 FelemReduce(&y, <mp1);
915 }
916
917 /* Y = Y/2 */
918 FelemDivTwo(&pointOut->y, &y);
919 FelemAssign(&pointOut->x, &x);
920 FelemAssign(&pointOut->z, &z);
921 }
922
923 /*
924 * Pre-computation table of base point G, which contains the X, Y, Z coordinates of n*G.
925 *
926 * index corresponding bit Value of n
927 * 0 0 0 0 0 0 + 0 + 0 + 0
928 * 1 0 0 0 1 0 + 0 + 0 + 1
929 * 2 0 0 1 0 0 + 0 + 2^130 + 0
930 * 3 0 0 1 1 0 + 0 + 2^130 + 1
931 * 4 0 1 0 0 0 + 2^260 + 0 + 0
932 * 5 0 1 0 1 0 + 2^260 + 0 + 1
933 * 6 0 1 1 0 0 + 2^260 + 2^130 + 0
934 * 7 0 1 1 1 0 + 2^260 + 2^130 + 1
935 * 8 1 0 0 0 2^390 + 0 + 0 + 0
936 * 9 1 0 0 1 2^390 + 0 + 0 + 1
937 * 10 1 0 1 0 2^390 + 0 + 2^130 + 0
938 * 11 1 0 1 1 2^390 + 0 + 2^130 + 1
939 * 12 1 1 0 0 2^390 + 2^260 + 0 + 0
940 * 13 1 1 0 1 2^390 + 2^260 + 0 + 1
941 * 14 1 1 1 0 2^390 + 2^260 + 2^130 + 0
942 * 15 1 1 1 1 2^390 + 2^260 + 2^130 + 1
943 */
944 static const Point PRE_COMPUTE_G[TABLE_G_SIZE] = {
945 {
946 {{0, 0, 0, 0, 0, 0, 0, 0, 0}},
947 {{0, 0, 0, 0, 0, 0, 0, 0, 0}},
948 {{0, 0, 0, 0, 0, 0, 0, 0, 0}}
949 }, {
950 {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
951 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
952 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404}},
953 {{0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
954 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
955 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b}},
956 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
957 }, {
958 {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
959 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
960 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5}},
961 {{0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
962 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
963 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7}},
964 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
965 }, {
966 {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
967 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
968 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9}},
969 {{0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
970 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
971 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe}},
972 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
973 }, {
974 {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
975 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
976 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065}},
977 {{0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
978 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
979 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524}},
980 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
981 }, {
982 {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
983 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
984 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe}},
985 {{0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
986 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
987 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7}},
988 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
989 }, {
990 {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
991 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
992 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256}},
993 {{0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
994 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
995 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd}},
996 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
997 }, {
998 {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
999 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1000 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23}},
1001 {{0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1002 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1003 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e}},
1004 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1005 }, {
1006 {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1007 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1008 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5}},
1009 {{0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1010 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1011 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242}},
1012 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1013 }, {
1014 {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1015 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1016 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203}},
1017 {{0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1018 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1019 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f}},
1020 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1021 }, {
1022 {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1023 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1024 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a}},
1025 {{0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1026 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1027 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a}},
1028 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1029 }, {
1030 {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1031 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1032 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b}},
1033 {{0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1034 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1035 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f}},
1036 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1037 }, {
1038 {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1039 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1040 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf}},
1041 {{0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1042 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1043 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d}},
1044 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1045 }, {
1046 {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1047 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1048 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684}},
1049 {{0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1050 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1051 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81}},
1052 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1053 }, {
1054 {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1055 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1056 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d}},
1057 {{0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1058 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1059 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42}},
1060 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1061 }, {
1062 {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1063 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1064 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f}},
1065 {{0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1066 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1067 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055}},
1068 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1069 }
1070 };
1071
1072 /*
1073 * Pre-computation table of base point G, which contains the X, Y, Z coordinates of n*G.
1074 *
1075 * index corresponding bit value of n
1076 * 0 0 0 0 0 0 + 0 + 0 + 0
1077 * 1 0 0 0 1 0 + 0 + 0 + 2^65
1078 * 2 0 0 1 0 0 + 0 + 2^195 + 0
1079 * 3 0 0 1 1 0 + 0 + 2^195 + 2^65
1080 * 4 0 1 0 0 0 + 2^325 + 0 + 0
1081 * 5 0 1 0 1 0 + 2^325 + 0 + 2^65
1082 * 6 0 1 1 0 0 + 2^325 + 2^195 + 0
1083 * 7 0 1 1 1 0 + 2^325 + 2^195 + 2^65
1084 * 8 1 0 0 0 2^455 + 0 + 0 + 0
1085 * 9 1 0 0 1 2^455 + 0 + 0 + 2^65
1086 * 10 1 0 1 0 2^455 + 0 + 2^195 + 0
1087 * 11 1 0 1 1 2^455 + 0 + 2^195 + 2^65
1088 * 12 1 1 0 0 2^455 + 2^325 + 0 + 0
1089 * 13 1 1 0 1 2^455 + 2^325 + 0 + 2^65
1090 * 14 1 1 1 0 2^455 + 2^325 + 2^195 + 0
1091 * 15 1 1 1 1 2^455 + 2^325 + 2^195 + 2^65
1092 */
1093 static const Point PRE_COMPUTE_G2[TABLE_G_SIZE] = {
1094 {
1095 {{0, 0, 0, 0, 0, 0, 0, 0, 0}},
1096 {{0, 0, 0, 0, 0, 0, 0, 0, 0}},
1097 {{0, 0, 0, 0, 0, 0, 0, 0, 0}}
1098 }, {
1099 {{0x0192b0164b374ff4, 0x037b520497f54a7c, 0x00ac45dfa717d3aa,
1100 0x012692d390795d21, 0x013153d4af815b65, 0x01dda688f88c3a92,
1101 0x0205e32bd883b127, 0x025156b962597ab5, 0x00a54cc9cfcf7717}},
1102 {{0x00fe2ea43f30741f, 0x0144a9495978f5d7, 0x035adaea005bd79c,
1103 0x009dff281db66901, 0x00166a36786b2593, 0x01d7f68c07aa0052,
1104 0x013e05225075d36d, 0x03181b67caeea6b5, 0x009004fc6adc182a}},
1105 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1106 }, {
1107 {{0x0287bbca1236bec1, 0x03aaf618239ad718, 0x013ef15fcd3c6c16,
1108 0x031f697f988c94c6, 0x01ac806cb8d4ee71, 0x0035f8035894c512,
1109 0x00a16689152cf169, 0x02236a87815c0f48, 0x014f6480d486bbf5}},
1110 {{0x03f70ab3fe2753e3, 0x03d291808faf7e0d, 0x00d7d89caf63a562,
1111 0x029ead2c77ee5cd6, 0x022c8c3421387422, 0x02e384f360359525,
1112 0x01901927d338b4bd, 0x0010c294d54a76b1, 0x00c739a28761a676}},
1113 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1114 }, {
1115 {{0x0321984bf604e26a, 0x00a0a4346e1beaa6, 0x03959055560b38f4,
1116 0x001c383384c9b58c, 0x013212bf16c0badc, 0x00fc4f13c1530004,
1117 0x0297632bcdf70503, 0x0306dbd604f5574d, 0x016c53a4d13a129b}},
1118 {{0x03534a0ccd1d6c44, 0x02279af4660bfa03, 0x030eb700f21771d7,
1119 0x01134017e2c6529e, 0x0237abadf41d7409, 0x03547fae79ff1ce6,
1120 0x027b74026ac60650, 0x038912af6d6a8213, 0x00c3257758f97db5}},
1121 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1122 }, {
1123 {{0x037d9850fb6f765b, 0x01b6f2b4333f3817, 0x025e9d97d6f42afe,
1124 0x00a0ddfdfa42799a, 0x02bfc71aab1b4029, 0x0378d9bd912c361e,
1125 0x012c4f53cffd5151, 0x03a0621175f5d2ca, 0x0017e0822ef93f88}},
1126 {{0x03f7c1d7104d2069, 0x03848b7b03f6c63f, 0x003395646b614e53,
1127 0x0342d1dd97dbc1e9, 0x022cb3def43f2341, 0x02a5f4833f79b757,
1128 0x037b25687d324787, 0x031f409c8d51daf2, 0x010bb03f98dc9303}},
1129 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1130 }, {
1131 {{0x03efcaf76c0f7bd7, 0x015b1ffdf6ccf484, 0x0263903e662a439d,
1132 0x036dfdd7c185fe97, 0x015b51f55e640b08, 0x01b1764270cbce73,
1133 0x01e346d1bb5c8f2a, 0x03199be2199e0b68, 0x004adb8d3d68e650}},
1134 {{0x019bce039c0da6bf, 0x0280560629ade3b2, 0x01418eb6001c82e3,
1135 0x01e464b38910b655, 0x02b21034d1a402e4, 0x028c2df0b056c5fa,
1136 0x032be9714380fe04, 0x01f9ecd5a9a2fcca, 0x015aa21ec32e0387}},
1137 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1138 }, {
1139 {{0x036ad61ba5561dd5, 0x02dab309a8810a66, 0x037393ceee004b75,
1140 0x001e6bf8a4921c61, 0x0316b2aa5307a051, 0x0014c93b7032e644,
1141 0x03f6b33b796d11e2, 0x023d7387badaa578, 0x003387854547b6ca}},
1142 {{0x002d4c5b57434eda, 0x01d6e1888a73d938, 0x0018f0f64605d2fa,
1143 0x028a20eeb35b0cc6, 0x03b68c858d509955, 0x01141d740c8bd567,
1144 0x010750725080144c, 0x023d6ac06393f441, 0x0042923f464fb5d1}},
1145 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1146 }, {
1147 {{0x037549a3c618e088, 0x008b414778fa66e4, 0x00723b6b05db1367,
1148 0x013e930419c79520, 0x0191ed1c4447ff41, 0x00bee132be6a81cb,
1149 0x02fa7516973beafc, 0x02e25b501cead6d6, 0x01fdb7d1dc08792c}},
1150 {{0x039cb1f8f679b9d1, 0x0083db2827d85eaf, 0x03b023aa80726182,
1151 0x022a7457eb1c3efa, 0x03caef438de54158, 0x033997a18583466f,
1152 0x02d7bffa14e33c59, 0x001b92a9cd69ce59, 0x0113258b03a75ad8}},
1153 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1154 }, {
1155 {{0x0324c5b4c56caae5, 0x0151cedc6869fdbe, 0x039370cf6ff1d385,
1156 0x00d2d6b3a7948969, 0x0126b6384f3cdb06, 0x02f045c111b79e63,
1157 0x00519f9f1ede134e, 0x03baffa03938dd55, 0x0179812e76db6349}},
1158 {{0x00b69f323b354956, 0x01e0bb3f034a976c, 0x02befa0dff80f27c,
1159 0x0098b221eb08aecb, 0x030ca3bf38ae8e58, 0x01327945cb922185,
1160 0x0308de377b1b7b43, 0x03ab15750b28636d, 0x0091c1b0482a4305}},
1161 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1162 }, {
1163 {{0x02c1b0be6b746613, 0x00478b27bfbe1387, 0x03ac86e5d9c6a2d3,
1164 0x034f25d578d34127, 0x014e05b75ec6fecc, 0x01f44f38b4e2189f,
1165 0x00660fddda38b664, 0x03d587c9195d6412, 0x00e9dcec7d477b78}},
1166 {{0x03321366097b5fe6, 0x011364f5be162f87, 0x03d074359e750aaa,
1167 0x01d55171921585a2, 0x022527bb5c6eb7c7, 0x01428f6af0426fe3,
1168 0x0036bb94e1d4d74e, 0x03c7c757a44dbe6a, 0x0088a86c9ed6cbef}},
1169 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1170 }, {
1171 {{0x02bff33e6cbff097, 0x019cccbe703a64a6, 0x01e7d4c24e09e350,
1172 0x021908a33eaf46a0, 0x01a07762f8cc7516, 0x01e12df29d8644c9,
1173 0x0098c656997c8284, 0x0373d9622e713265, 0x01ff6b101932f0be}},
1174 {{0x0048f9e92e2c1256, 0x033fae66bf45eb34, 0x0341ddb09e352f2b,
1175 0x0019a6a6560f97fe, 0x02cda473f1bd03ab, 0x013c344018f55636,
1176 0x00329598b2276e7e, 0x0388a96e2249b63f, 0x00b6d123f38483d2}},
1177 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1178 }, {
1179 {{0x02a97232bae87062, 0x0069587df4826cd5, 0x021402a5fdf14035,
1180 0x026f406d0b49dc31, 0x01efc862b95739c4, 0x00a6e35dc23a4083,
1181 0x0385b4e2faa85fd8, 0x01deae552ff5231e, 0x019e03275123852a}},
1182 {{0x0120d17ebd7a996d, 0x00a56e635a2ab069, 0x03a2d775353348fe,
1183 0x02c60edc1e521033, 0x01078fbf7ab9fefc, 0x0375262d1601e76e,
1184 0x00d963629d272a65, 0x027c82575888f1bb, 0x013629c8c2a9841f}},
1185 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1186 }, {
1187 {{0x02634027d95abe73, 0x01c2e2ef1799e9c6, 0x02eedf3cf13b5ffc,
1188 0x0060e6a5de211043, 0x01a7806f233bb516, 0x0355633a88a8638c,
1189 0x01dcbcc58d7d5dcc, 0x02071903acda896f, 0x01dce602b80ca444}},
1190 {{0x013c47920922ddb6, 0x013f221e68d728d8, 0x0128ca5192ab3cb8,
1191 0x002a19a405f6d544, 0x0074330020d40403, 0x0085611df0ce1a97,
1192 0x028fda4edff5fc93, 0x0303b834136862a5, 0x00f443f3b7cd86cf}},
1193 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1194 }, {
1195 {{0x00f5614d1673ed2e, 0x03e442a78b43dbb3, 0x02408ebf00c8324c,
1196 0x0043b6f94f69ea8b, 0x02a32bb5a7c8f6ac, 0x02b7758b243883fa,
1197 0x00f4bd68881089bf, 0x03f61eb91693a587, 0x001d298cf9f11b0b}},
1198 {{0x00ee97751d8d6f36, 0x0318dcb929941397, 0x022cf9840311e590,
1199 0x02fc6b1da06aae09, 0x0134298323032dcf, 0x00d7b9072d9bb059,
1200 0x01a099906260485b, 0x037d9ca3796ce405, 0x0147a49ba1ca4467}},
1201 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1202 }, {
1203 {{0x030993f7ba6f7b2c, 0x019720e705ec5bc1, 0x001c9ee10167839e,
1204 0x0378869753d92351, 0x02bb1ace9f456b2e, 0x0336d504d809599d,
1205 0x02d549f9910bffd0, 0x019c6284b1ec6150, 0x00c67a7fcc4ffb2c}},
1206 {{0x022fe778c100a1dc, 0x01d14e5e8e693cb1, 0x03c139f63d3a44d9,
1207 0x01d0b45344a8a5c9, 0x0253f5e630be559d, 0x01eaad81980912b1,
1208 0x003febb5458d1ece, 0x01c6d59feaae8cfd, 0x01c3558976ca7dd7}},
1209 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1210 }, {
1211 {{0x01ec0d67348526ae, 0x0334bb61a85f8ed5, 0x0286ba7fecf2d764,
1212 0x01600344518c0c0c, 0x034e83852188ae46, 0x023d71754d3c015c,
1213 0x010eeccfb5a5a825, 0x004247e9a02cded9, 0x0187b9aa607ca24c}},
1214 {{0x00e77b967bc701ac, 0x022a2a00ef91bdc3, 0x01fa7bfaf46148d2,
1215 0x003feb6276929d54, 0x028ad7f3a3f075ca, 0x035f6ba48b87bd53,
1216 0x03fd400e74a80040, 0x0150a714837d88b5, 0x003969fa95c4e093}},
1217 {{1, 0, 0, 0, 0, 0, 0, 0, 0}}
1218 }
1219 };
1220
1221 /* Select the point with subscript index in the table and place it in the point.
1222 The anti-side channel processing exists. */
GetPointFromTable(Point * point,const Point table[],uint32_t pointNum,const uint64_t index)1223 static void GetPointFromTable(Point *point, const Point table[],
1224 uint32_t pointNum, const uint64_t index)
1225 {
1226 uint64_t mask, i;
1227 for (i = 0; i < pointNum; i++) {
1228 /* If i is equal to index, the last mask is all Fs. Otherwise, the last mask is all 0s. */
1229 /* Shift rightwards by 63 bits and get the most significant bit. */
1230 mask = (0 - (i ^ index)) >> 63;
1231 mask--;
1232 /* Conditionally assign a value, which takes effect only when i = index. */
1233 FelemPointAssignWithMask(point, &table[i], mask);
1234 }
1235 }
1236
1237 /*
1238 * Four bits at a fixed interval are intercepted from the scalar k1,
1239 and then decoded to obtain the index of the precomputation table G
1240 * input:
1241 * k1 indicates a array of scalars, consisting of nine 64-bit data in little-endian order.
1242 * i Corresponding bit. The value is an integer ranging [0, 65]
1243 * output:
1244 * Value range: 0–15, indicating the index of the pre-computation table.
1245 */
GetIndexOfTableG(uint64_t * value1,uint64_t * value2,const Array64 * k1,uint32_t i)1246 static void GetIndexOfTableG(uint64_t *value1, uint64_t *value2, const Array64 *k1, uint32_t i)
1247 {
1248 // The scalar k1 contains a maximum of 521 bits. 521 = 65 * 4 * 2 + 1. Therefore, one bit needs special processing.
1249 if (i == 0) {
1250 *value1 = k1->data[0] & 1; // get the least significant bit of the scalar
1251 *value2 = 0;
1252 } else {
1253 uint64_t bits1, bits2;
1254 bits1 = GET_ARRAY64_BIT(k1, i + 390) << 3; // 3rd corresponds to the scalar k1 bit: [391, 455]
1255 bits1 |= GET_ARRAY64_BIT(k1, i + 260) << 2; // 2nd corresponds to the scalar k1 bit: [261, 325]
1256 bits1 |= GET_ARRAY64_BIT(k1, i + 130) << 1; // 1st corresponds to the scalar k1 bit: [131, 195]
1257 bits1 |= GET_ARRAY64_BIT(k1, i); // 0th corresponds to the scalar k1 bit: [1 , 65]
1258 *value1 = bits1;
1259 bits2 = GET_ARRAY64_BIT(k1, i + 455) << 3; // 3rd corresponds to the scalar k1 bit: [456, 520]
1260 bits2 |= GET_ARRAY64_BIT(k1, i + 325) << 2; // 2nd corresponds to the scalar k1 bit: [326, 390]
1261 bits2 |= GET_ARRAY64_BIT(k1, i + 195) << 1; // 1st corresponds to the scalar k1 bit: [196, 260]
1262 bits2 |= GET_ARRAY64_BIT(k1, i + 65); // 0th corresponds to the scalar k1 bit: [66 , 130]
1263 *value2 = bits2;
1264 }
1265 }
1266
1267 /*
1268 * Six consecutive bits (i-1 to i+4) are intercepted from the scalar k2,
1269 and then decoded to obtain an index of the precomputation table P and a sign of a point
1270 * input:
1271 * k2 indicates a array of scalars, consisting of nine 64-bit data in little-endian order.
1272 * i Corresponding bit. The value range is [0, 520], which can be exactly divisible by 5.
1273 * output:
1274 * sign 0 or 1: indicates whether the corresponding point needs negation.
1275 * value 0-16: indicates the index of the pre-computation table.
1276 */
GetIndexOfTableP(uint64_t * sign,uint64_t * value,const Array64 * k2,uint32_t i)1277 static void GetIndexOfTableP(uint64_t *sign, uint64_t *value, const Array64 *k2, uint32_t i)
1278 {
1279 uint32_t s, v;
1280 uint64_t bits;
1281 if (i == 0) {
1282 // When i is the least significant bit, only the four least significant bits of k2 are truncated.
1283 bits = k2->data[0] << 1;
1284 } else {
1285 uint32_t num = (i - 1) / 64; // Each uint64_t contains 64 bits.
1286 uint32_t shift = (i - 1) % 64; // Each uint64_t contains 64 bits.
1287 bits = (k2->data[num] >> shift);
1288 if (shift + 6 > 64) { // (64 - shift) bits have been truncated. If it is less than 6 bits, continue truncating.
1289 bits |= k2->data[num + 1] << (64 - shift);
1290 }
1291 }
1292 // truncates six bits. (5-bit signed number complement + 1-bit low-order carry flag)
1293 bits &= (1 << (WINDOW_SIZE + 1)) - 1;
1294
1295 DecodeScalarCode(&s, &v, (uint32_t)bits);
1296 *sign = s;
1297 *value = v;
1298 }
1299
1300 /*
1301 * Calculation point coordinate r = k1 * G + k2 * P
1302 * input:
1303 * k1 a scalar multiplied by point G. If k1 is null, it will be not calculated.
1304 * k2 a scalar multiplied by point P. If k1 is null, it will be not calculated.
1305 * preCompute P-point precalculation table (0P, 1P, ... 16P) 17 points in total.
1306 * output:
1307 * r Point of the calculation result
1308 */
FelemPointMul(Point * r,const Array64 * k1,const Array64 * k2,const Point preCompute[TABLE_P_SIZE])1309 static void FelemPointMul(Point *r, const Array64 *k1, const Array64 *k2,
1310 const Point preCompute[TABLE_P_SIZE])
1311 {
1312 Point res = {0}; // res is initialized to 0.
1313 Point tmp = {0};
1314 Felem negY;
1315 uint64_t mask, sign, index, index2;
1316 bool computeG = k1 != NULL;
1317 bool computeP = k2 != NULL;
1318 bool isZero = true; // Whether the res point is zero.
1319 int32_t step = computeP ? 5 : 1; // Times of one cycle multiple the point
1320 /* P point multiplication requires 520 times, and G point multiplication requires 65 times. */
1321 for (int32_t i = computeP ? 520 : 65; i >= 0; i -= step) {
1322 /* If the point out remains zero, the double point operation has no effect, skipping */
1323 if (!isZero) {
1324 FelemPointMultDouble(&res, &res, step);
1325 }
1326 // Calculate the multiplication of point G. i starts calculation in the range [0, 65].
1327 if (computeG && (i <= 65)) {
1328 /* If the G-point multiplication starts, the step of the multiple point needs to be changed to 1. */
1329 step = 1;
1330 /* Obtain the corresponding bits. */
1331 GetIndexOfTableG(&index, &index2, k1, (uint32_t)i);
1332 /* Add the points in Table 1 */
1333 if (isZero) {
1334 /* If the point out is zero, the point addition operation is equivalent to direct assignment. */
1335 GetPointFromTable(&res, PRE_COMPUTE_G, TABLE_G_SIZE, index);
1336 isZero = false;
1337 } else {
1338 GetPointFromTable(&tmp, PRE_COMPUTE_G, TABLE_G_SIZE, index);
1339 // precomputation table G is all affine coordinates, use the hybrid coordinates for acceleration.
1340 FelemPointMixAdd(&res, &res, &tmp);
1341 }
1342 /* Add the points in Table 2 */
1343 if (i != 0) {
1344 GetPointFromTable(&tmp, PRE_COMPUTE_G2, TABLE_G_SIZE, index2);
1345 // precomputation table G2 is all affine coordinates, use the hybrid coordinates for acceleration.
1346 FelemPointMixAdd(&res, &res, &tmp);
1347 }
1348 }
1349 // Calculate the multiplication of point P. The calculation is performed every 5 bits.
1350 if (computeP && (i % 5 == 0)) {
1351 /* Obtain the corresponding bits. */
1352 GetIndexOfTableP(&sign, &index, k2, (uint32_t)i);
1353 GetPointFromTable(&tmp, preCompute, TABLE_P_SIZE, index);
1354 /* If the value is a negative number, the point is also negative. */
1355 FelemNeg(&negY, &tmp.y);
1356 mask = 0 - sign;
1357 FelemAssignWithMask(&tmp.y, &negY, mask);
1358 /* execute point addition */
1359 if (isZero) {
1360 /* If the point out is zero, the point addition operation is equivalent to direct assignment. */
1361 FelemPointAssign(&res, &tmp);
1362 isZero = false;
1363 } else {
1364 // precomputation table P is not necessarily affine coordinates, using Jacobian coordinates addition.
1365 FelemPointAdd(&res, &res, &tmp);
1366 }
1367 }
1368 }
1369 FelemPointAssign(r, &res);
1370 }
1371
1372 /*
1373 * calculate pre-calculation table for the P point
1374 * input:
1375 * pt P point
1376 * output:
1377 * preCompute precalculation table of P point, (0P, 1P, ... 16P) 17 points in total
1378 */
InitPreComputeTable(Point preCompute[TABLE_P_SIZE],const ECC_Point * pt)1379 static int32_t InitPreComputeTable(Point preCompute[TABLE_P_SIZE], const ECC_Point *pt)
1380 {
1381 int32_t ret;
1382 /* zero point */
1383 FelemSetLimb(&preCompute[0].x, 0);
1384 FelemSetLimb(&preCompute[0].y, 0);
1385 FelemSetLimb(&preCompute[0].z, 0);
1386 /* 1x point */
1387 GOTO_ERR_IF_EX(BN2Felem(&preCompute[1].x, pt->x), ret);
1388 GOTO_ERR_IF_EX(BN2Felem(&preCompute[1].y, pt->y), ret);
1389 GOTO_ERR_IF_EX(BN2Felem(&preCompute[1].z, pt->z), ret);
1390 /* 2 to 16x points */
1391 for (uint32_t i = 2; i < TABLE_P_SIZE; i++) {
1392 if ((i & 1) == 0) {
1393 /* If multiple for even times, use the multiple point formula (2n)*P = 2*(n*P), where i == 2n */
1394 FelemPointDouble(&preCompute[i], &preCompute[i / 2]);
1395 } else {
1396 /* If multiple for odd times, use the point addition formula n*P = P + (n-1)*P, where i == n */
1397 FelemPointAdd(&preCompute[i], &preCompute[1], &preCompute[i - 1]);
1398 }
1399 }
1400 ERR:
1401 return ret;
1402 }
1403
ComputePointMulAdd(Point * out,const Array64 * binG,const Array64 * binP,const ECC_Point * pt)1404 static int32_t ComputePointMulAdd(Point *out, const Array64 *binG, const Array64 *binP, const ECC_Point *pt)
1405 {
1406 // The stack space of a function cannot exceed 4096 bytes.
1407 // Therefore, the precomputation table is allocated by the function.
1408 Point preCompute[TABLE_P_SIZE]; /* Pre-calculation table of point pt */
1409 int32_t ret = InitPreComputeTable(preCompute, pt);
1410 if (ret != CRYPT_SUCCESS) {
1411 return ret;
1412 }
1413 FelemPointMul(out, binG, binP, preCompute);
1414 return CRYPT_SUCCESS;
1415 }
1416
1417 /* Calculate r = k1 * G + k2 * pt */
ECP521_PointMulAdd(ECC_Para * para,ECC_Point * r,const BN_BigNum * k1,const BN_BigNum * k2,const ECC_Point * pt)1418 int32_t ECP521_PointMulAdd(ECC_Para *para, ECC_Point *r,
1419 const BN_BigNum *k1, const BN_BigNum *k2, const ECC_Point *pt)
1420 {
1421 int32_t ret;
1422 Array64 binG = {0};
1423 Array64 binP = {0};
1424 Point out;
1425 uint32_t len;
1426 /* Input parameter check */
1427 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP521), ret);
1428 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP521), ret);
1429 GOTO_ERR_IF(CheckBnValid(k1, FELEM_BITS), ret);
1430 GOTO_ERR_IF(CheckBnValid(k2, FELEM_BITS), ret);
1431 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP521), ret);
1432 if (BN_IsZero(pt->z)) {
1433 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1434 return CRYPT_ECC_POINT_AT_INFINITY;
1435 }
1436 /* Convert the input BigNum */
1437 len = NUM_LIMBS;
1438 GOTO_ERR_IF(BN_Bn2U64Array(k1, binG.data, &len), ret);
1439 len = NUM_LIMBS;
1440 GOTO_ERR_IF(BN_Bn2U64Array(k2, binP.data, &len), ret);
1441 /* Calculate */
1442 GOTO_ERR_IF_EX(ComputePointMulAdd(&out, &binG, &binP, pt), ret);
1443 /* Output result */
1444 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), ret);
1445 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), ret);
1446 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), ret);
1447 ERR:
1448 return ret;
1449 }
1450
1451 /* Calculate r = k * pt; If pt is NULL, calculate r = k * G. This is the ConstTime processing function. */
ECP521_PointMul(ECC_Para * para,ECC_Point * r,const BN_BigNum * k,const ECC_Point * pt)1452 int32_t ECP521_PointMul(ECC_Para *para, ECC_Point *r, const BN_BigNum *k, const ECC_Point *pt)
1453 {
1454 int32_t ret;
1455 Array64 bin = {0};
1456 uint32_t len = NUM_LIMBS;
1457 Point preCompute[TABLE_P_SIZE]; /* Pre-calculation table of Point pt */
1458 Point out;
1459 /* Input parameter check */
1460 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP521), ret);
1461 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP521), ret);
1462 GOTO_ERR_IF(CheckBnValid(k, FELEM_BITS), ret);
1463 if (pt != NULL) {
1464 if (pt->id != CRYPT_ECC_NISTP521) {
1465 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_ERR_CURVE_ID);
1466 return CRYPT_ECC_POINT_ERR_CURVE_ID;
1467 }
1468 if (BN_IsZero(pt->z)) {
1469 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1470 return CRYPT_ECC_POINT_AT_INFINITY;
1471 }
1472 }
1473 /* Convert the input BigNum */
1474 GOTO_ERR_IF(BN_Bn2U64Array(k, bin.data, &len), ret);
1475 /* Calculate */
1476 if (pt != NULL) {
1477 GOTO_ERR_IF_EX(InitPreComputeTable(preCompute, pt), ret);
1478 FelemPointMul(&out, NULL, &bin, preCompute);
1479 } else {
1480 FelemPointMul(&out, &bin, NULL, NULL);
1481 }
1482 /* Output result */
1483 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), ret);
1484 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), ret);
1485 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), ret);
1486 ERR:
1487 return ret;
1488 }
1489
MakeAffineWithInv(ECC_Point * r,const ECC_Point * a,const Felem * zInv)1490 static int32_t MakeAffineWithInv(ECC_Point *r, const ECC_Point *a, const Felem *zInv)
1491 {
1492 int32_t ret;
1493 Felem x, y, tmp;
1494 GOTO_ERR_IF_EX(BN2Felem(&x, a->x), ret);
1495 GOTO_ERR_IF_EX(BN2Felem(&y, a->y), ret);
1496 FelemMulReduce(&y, &y, zInv); // y/z
1497 FelemSqrReduce(&tmp, zInv); // 1/(z^2)
1498 FelemMulReduce(&x, &x, &tmp); // x/(z^2)
1499 FelemMulReduce(&y, &y, &tmp); // y/(z^3)
1500 GOTO_ERR_IF_EX(Felem2BN(r->x, &x), ret);
1501 GOTO_ERR_IF_EX(Felem2BN(r->y, &y), ret);
1502 GOTO_ERR_IF_EX(BN_SetLimb(r->z, 1), ret);
1503 ERR:
1504 return ret;
1505 }
1506
1507 /* Convert a point to affine coordinates. */
ECP521_Point2Affine(const ECC_Para * para,ECC_Point * r,const ECC_Point * pt)1508 int32_t ECP521_Point2Affine(const ECC_Para *para, ECC_Point *r, const ECC_Point *pt)
1509 {
1510 int32_t ret;
1511 Felem z, zInv;
1512 /* Input parameter check */
1513 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP521), ret);
1514 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP521), ret);
1515 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP521), ret);
1516 /* Special data processing */
1517 if (BN_IsZero(pt->z)) {
1518 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1519 return CRYPT_ECC_POINT_AT_INFINITY;
1520 }
1521 /* Convert the input data. */
1522 GOTO_ERR_IF_EX(BN2Felem(&z, pt->z), ret);
1523 /* Calculate and output result */
1524 FelemInv(&zInv, &z);
1525 GOTO_ERR_IF_EX(MakeAffineWithInv(r, pt, &zInv), ret);
1526 ERR:
1527 return ret;
1528 }
1529
1530 #endif /* defined(HITLS_CRYPTO_CURVE_NISTP521) && defined(HITLS_CRYPTO_NIST_USE_ACCEL) */
1531