1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15 #include "hitls_build.h"
16 #if defined(HITLS_CRYPTO_CURVE_NISTP256) && defined(HITLS_CRYPTO_NIST_USE_ACCEL)
17
18 #include <stdbool.h>
19 #include "securec.h"
20 #include "bsl_err_internal.h"
21 #include "crypt_errno.h"
22 #include "crypt_utils.h"
23 #include "crypt_bn.h"
24 #include "crypt_ecc.h"
25 #include "ecc_local.h"
26 #include "ecc_utils.h"
27 #include "bsl_util_internal.h"
28
29 #ifndef __SIZEOF_INT128__
30 #error "This nistp256 implementation require the compiler support 128-bits integer."
31 #endif
32
33 /* field element definition */
34 #define FELEM_BITS 256
35 #define FELEM_BYTES 32
36 #define LIMB_BITS (sizeof(uint128_t) << 3)
37 #define LIMB_NUM 4
38 #define BASE_BITS 64
39 /* The pre-calculation table of the G table has 16 points. */
40 #define TABLE_G_SIZE 16
41 /* The pre-calculation table of the P table has 17 points. */
42 #define TABLE_P_SIZE 17
43
44 /*
45 * Field elements, stored as arrays, all represented in little endian.
46 * Each element of the array is called a digit. Each digit represents an extended 2 ^ 64-bit.
47 * That is, Felem can be expressed as:
48 * f_0 + f_1 * 2^64 + f_2 * 2^128 + f_3 * 2^192
49 * LongFelem is used to store the result of multiplication of field elements and is twice the width of Felem.
50 * Point is a point represented as a Jacobian coordinate
51 */
52 typedef struct {
53 uint128_t data[LIMB_NUM];
54 } Felem;
55
56 typedef struct {
57 uint128_t data[LIMB_NUM * 2];
58 } LongFelem;
59
60 typedef struct {
61 Felem x;
62 Felem y;
63 Felem z;
64 } Point;
65
66 /* ------------------------------------------------------------ */
67 /* ECP256 field order p. The value is 2^256 - 2^224 + 2^192 + 2^96 - 1, little endian */
68 static const Felem FIELD_ORDER = {
69 {0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001}
70 };
71
72 /*
73 * Pre-computation table of the base point G, which contains the (X, Y, Z) coordinates of k*G
74 *
75 * PRE_MUL_G divides all bits into four equal parts.
76 * index corresponding bit value of k
77 * 0 0 0 0 0 0 + 0 + 0 + 0
78 * 1 0 0 0 1 0 + 0 + 0 + 1
79 * 2 0 0 1 0 0 + 0 + 2^64 + 0
80 * 3 0 0 1 1 0 + 0 + 2^64 + 1
81 * 4 0 1 0 0 0 + 2^128 + 0 + 0
82 * 5 0 1 0 1 0 + 2^128 + 0 + 1
83 * 6 0 1 1 0 0 + 2^128 + 2^64 + 0
84 * 7 0 1 1 1 0 + 2^128 + 2^64 + 1
85 * 8 1 0 0 0 2^192 + 0 + 0 + 0
86 * 9 1 0 0 1 2^192 + 0 + 0 + 1
87 * 10 1 0 1 0 2^192 + 0 + 2^64 + 0
88 * 11 1 0 1 1 2^192 + 0 + 2^64 + 1
89 * 12 1 1 0 0 2^192 + 2^128 + 0 + 0
90 * 13 1 1 0 1 2^192 + 2^128 + 0 + 1
91 * 14 1 1 1 0 2^192 + 2^128 + 2^64 + 0
92 * 15 1 1 1 1 2^192 + 2^128 + 2^64 + 1
93 *
94 */
95 static const Point PRE_MUL_G[TABLE_G_SIZE] = {
96 {
97 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
98 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
99 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
100 }, {
101 {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247}},
102 {{0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b}},
103 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
104 }, {
105 {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5}},
106 {{0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d}},
107 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
108 }, {
109 {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f}},
110 {{0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644}},
111 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
112 }, {
113 {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67}},
114 {{0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee}},
115 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
116 }, {
117 {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff}},
118 {{0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b}},
119 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
120 }, {
121 {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8}},
122 {{0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851}},
123 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
124 }, {
125 {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea}},
126 {{0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b}},
127 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
128 }, {
129 {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276}},
130 {{0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816}},
131 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
132 }, {
133 {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad}},
134 {{0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663}},
135 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
136 }, {
137 {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d}},
138 {{0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321}},
139 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
140 }, {
141 {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287}},
142 {{0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6}},
143 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
144 }, {
145 {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466}},
146 {{0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20}},
147 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
148 }, {
149 {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9}},
150 {{0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61}},
151 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
152 }, {
153 {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a}},
154 {{0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc}},
155 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
156 }, {
157 {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c}},
158 {{0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab}},
159 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
160 }
161 };
162
163 /*
164 * Pre-computation table of the base point G, which contains the (X, Y, Z) coordinates of k*G
165 *
166 * PRE_MUL_G2[] = PRE_MUL_G[] * 2^32
167 * index corresponding bit value of k
168 * 0 0 0 0 0 0 + 0 + 0 + 0
169 * 1 0 0 0 1 0 + 0 + 0 + 2^32
170 * 2 0 0 1 0 0 + 0 + 2^96 + 0
171 * 3 0 0 1 1 0 + 0 + 2^96 + 2^32
172 * 4 0 1 0 0 0 + 2^160 + 0 + 0
173 * 5 0 1 0 1 0 + 2^160 + 0 + 2^32
174 * 6 0 1 1 0 0 + 2^160 + 2^96 + 0
175 * 7 0 1 1 1 0 + 2^160 + 2^96 + 2^32
176 * 8 1 0 0 0 2^224 + 0 + 0 + 0
177 * 9 1 0 0 1 2^224 + 0 + 0 + 2^32
178 * 10 1 0 1 0 2^224 + 0 + 2^96 + 0
179 * 11 1 0 1 1 2^224 + 0 + 2^96 + 2^32
180 * 12 1 1 0 0 2^224 + 2^160 + 0 + 0
181 * 13 1 1 0 1 2^224 + 2^160 + 0 + 2^32
182 * 14 1 1 1 0 2^224 + 2^160 + 2^96 + 0
183 * 15 1 1 1 1 2^224 + 2^160 + 2^96 + 2^32
184 */
185 static const Point PRE_MUL_G2[TABLE_G_SIZE] = {
186 {
187 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
188 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
189 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
190 }, {
191 {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89}},
192 {{0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624}},
193 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
194 }, {
195 {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6}},
196 {{0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1}},
197 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
198 }, {
199 {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a}},
200 {{0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593}},
201 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
202 }, {
203 {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617}},
204 {{0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7}},
205 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
206 }, {
207 {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276}},
208 {{0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a}},
209 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
210 }, {
211 {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908}},
212 {{0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e}},
213 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
214 }, {
215 {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7}},
216 {{0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec}},
217 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
218 }, {
219 {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee}},
220 {{0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6}},
221 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
222 }, {
223 {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109}},
224 {{0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5}},
225 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
226 }, {
227 {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba}},
228 {{0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44}},
229 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
230 }, {
231 {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b}},
232 {{0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc}},
233 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
234 }, {
235 {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107}},
236 {{0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387}},
237 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
238 }, {
239 {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503}},
240 {{0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be}},
241 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
242 }, {
243 {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9}},
244 {{0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a}},
245 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
246 }, {
247 {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6}},
248 {{0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81}},
249 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
250 }
251 };
252
253 /* --------------------------helper function-------------------------- */
254 /*
255 * Convert big-endian byte stream to Felem
256 */
Bin2Felem(Felem * out,const uint8_t in[FELEM_BYTES])257 static inline void Bin2Felem(Felem *out, const uint8_t in[FELEM_BYTES])
258 {
259 // Write the input data to 128-bit digits every 64 bits.
260 out->data[3] = (uint128_t)Uint64FromBeBytes(in); // Index 3 read 0~7 bytes
261 out->data[2] = (uint128_t)Uint64FromBeBytes(in + 8); // Index 2 read 8~15 bytes
262 out->data[1] = (uint128_t)Uint64FromBeBytes(in + 16); // Index 1 read 16~23 bytes
263 out->data[0] = (uint128_t)Uint64FromBeBytes(in + 24); // Index 0 read 24~31 bytes
264 }
265
266 /*
267 * Convert Felem to big-endian byte stream
268 * Input:
269 * in[] < 2^64
270 * Output:
271 * out length is 32
272 */
Felem2Bin(uint8_t out[FELEM_BYTES],const Felem * in)273 static inline void Felem2Bin(uint8_t out[FELEM_BYTES], const Felem *in)
274 {
275 Uint64ToBeBytes((uint64_t)in->data[3], out); // Index 3 write 0~7 bytes
276 Uint64ToBeBytes((uint64_t)in->data[2], out + 8); // Index 2 write 8~15 bytes
277 Uint64ToBeBytes((uint64_t)in->data[1], out + 16); // Index 1 write 16~23 bytes
278 Uint64ToBeBytes((uint64_t)in->data[0], out + 24); // Index 0 write 24~31 bytes
279 }
280
281 /*
282 * Convert BN to Felem
283 * Output:
284 * out[] < 2^64
285 */
BN2Felem(Felem * out,const BN_BigNum * in)286 static int32_t BN2Felem(Felem *out, const BN_BigNum *in)
287 {
288 int32_t ret;
289 uint8_t bin[FELEM_BYTES];
290 uint32_t len = FELEM_BYTES;
291
292 GOTO_ERR_IF(BN_Bn2Bin(in, bin, &len), ret);
293
294 for (uint32_t i = 0; i < FELEM_BYTES; ++i) {
295 bin[FELEM_BYTES - 1 - i] = i < len ? bin[len - 1 - i] : 0;
296 }
297
298 Bin2Felem(out, bin);
299 ERR:
300 return ret;
301 }
302
303 /*
304 * Convert Felem to BN
305 * Input:
306 * in[] < 2^64
307 */
Felem2BN(BN_BigNum * out,const Felem * in)308 static int32_t Felem2BN(BN_BigNum *out, const Felem *in)
309 {
310 int32_t ret;
311 uint8_t bin[FELEM_BYTES];
312
313 Felem2Bin(bin, in);
314
315 GOTO_ERR_IF(BN_Bin2Bn(out, bin, FELEM_BYTES), ret);
316 ERR:
317 return ret;
318 }
319
320 /* ---------------------------field operation--------------------------- */
321 /*
322 * Assignment
323 */
FelemAssign(Felem * out,const Felem * in)324 static inline void FelemAssign(Felem *out, const Felem *in)
325 {
326 out->data[0] = in->data[0]; // out->data[0] get the value
327 out->data[1] = in->data[1]; // out->data[1] get the value
328 out->data[2] = in->data[2]; // out->data[2] get the value
329 out->data[3] = in->data[3]; // out->data[3] get the value
330 }
331
332 /*
333 * Copy each digit by mask. If the corresponding bit is 1, copy it. If the corresponding bit is 0, retain it.
334 */
FelemAssignWithMask(Felem * out,const Felem * in,const uint128_t mask)335 static inline void FelemAssignWithMask(Felem *out, const Felem *in, const uint128_t mask)
336 {
337 uint128_t rmask = ~mask;
338 // The value of out->data[0] is changed or remains unchanged.
339 out->data[0] = (in->data[0] & mask) | (out->data[0] & rmask);
340 // The value of out->data[1] is changed or remains unchanged.
341 out->data[1] = (in->data[1] & mask) | (out->data[1] & rmask);
342 // The value of out->data[2] is changed or remains unchanged.
343 out->data[2] = (in->data[2] & mask) | (out->data[2] & rmask);
344 // The value of out->data[3] is changed or remains unchanged.
345 out->data[3] = (in->data[3] & mask) | (out->data[3] & rmask);
346 }
347
348 /*
349 * Set the lowest digit
350 */
FelemSetLimb(Felem * out,const uint128_t in)351 static inline void FelemSetLimb(Felem *out, const uint128_t in)
352 {
353 out->data[0] = in; // out->data[0] get the value
354 out->data[1] = 0; // out->data[1] clear to 0
355 out->data[2] = 0; // out->data[2] clear to 0
356 out->data[3] = 0; // out->data[3] clear to 0
357 }
358
359 /*
360 * Zero judgment: input less than 2 ^ 256, only 0 and p need to be judged.
361 * Input:
362 * in[] < 2^64
363 */
FelemIsZero(const Felem * in)364 static inline uint128_t FelemIsZero(const Felem *in)
365 {
366 uint128_t isZero, isP;
367
368 // Check whether digits 0, 1, 2, and 3 are all 0.
369 isZero = in->data[0] | in->data[1] | in->data[2] | in->data[3];
370 isZero -= 1; // If in == 0, the most significant bit is 1.
371
372 // Determine that in is equal to the field order.
373 isP = (in->data[0] ^ FIELD_ORDER.data[0]) | // Determine whether the digits 0 is equal to the order
374 (in->data[1] ^ FIELD_ORDER.data[1]) | // Determine whether the digits 1 is equal to the order
375 (in->data[2] ^ FIELD_ORDER.data[2]) | // Determine whether the digits 2 is equal to the order
376 (in->data[3] ^ FIELD_ORDER.data[3]); // Determine whether the digits 3 is equal to the order
377 isP -= 1; // If in == p, the most significant bit is 1.
378
379 return (isZero | isP) >> (LIMB_BITS - 1);
380 }
381
382 /*
383 * Obtain the bit string whose length is len at idx in Felem.
384 * Input:
385 * in[] < 2^64
386 * 0 < len <= 64
387 */
FelemGetBits(const Felem * in,int32_t idx,uint32_t len)388 static uint64_t FelemGetBits(const Felem *in, int32_t idx, uint32_t len)
389 {
390 uint128_t ret;
391 uint32_t lower, upper;
392 uint64_t mask;
393
394 lower = (uint32_t)idx;
395 // When 0 <= lower < 256, the most significant bit is 1, obtain the most significant bit by right shifted by 31 bits
396 mask = (uint64_t)0 - ((~lower & (lower - 256)) >> 31);
397 ret = (uint64_t)in->data[(lower / BASE_BITS) & mask] & mask;
398
399 upper = (uint32_t)idx + BASE_BITS; // next unary block
400 // When 0 <= upper < 256, the most significant bit is 1, obtain the most significant bit by right shifted by 31 bits
401 mask = (uint64_t)0 - ((~upper & (upper - 256)) >> 31);
402 ret |= (uint128_t)((uint64_t)in->data[(upper / BASE_BITS) & mask] & mask) << BASE_BITS;
403
404 // Take the lower six bits, that is, mod 64, regardless of the positive and negative values of "lower".
405 ret >>= lower & (BASE_BITS - 1);
406 ret &= ((uint64_t)1 << len) - 1; // All 1-bit string with the len length
407
408 return (uint64_t)ret;
409 }
410
411 /*
412 * Field element reduction, retains the base bit (64 bits) of each digit in the 4-ary Felem, and reduce the high bit.
413 * Should be called before modular multiplication, modulus square, or modulus
414 * Input:
415 * in[] < 2^127
416 * Output:
417 * out[] < 2^64
418 */
FelemReduce(Felem * out,const Felem * in)419 static void FelemReduce(Felem *out, const Felem *in)
420 {
421 uint128_t *po = out->data;
422 const uint128_t *pi = in->data;
423 const uint128_t borrow = (uint128_t)1 << 96; // 2 ^ 96 borrowed from low to high
424 const uint128_t lend = (uint128_t)1 << 32; // 2 ^ 32 lent from high to low
425 uint128_t carryLimb, carryLimbTotal;
426
427 // Process the carry of each digit first: 0 -> 1 -> 2 -> 3
428 po[1] = pi[1] + (pi[0] >> BASE_BITS); // po[1] takes the input value and adds the carry of digit 0.
429 po[2] = pi[2] + (po[1] >> BASE_BITS); // po[2] takes the input value and adds the carry of digit 1.
430 po[3] = pi[3] + (po[2] >> BASE_BITS); // po[3] takes the input value and adds the carry of digit 2.
431
432 po[0] = (uint64_t)pi[0]; // po[0] takes the basic digit.
433 po[1] = (uint64_t)po[1]; // po[1] takes the basic digit.
434 po[2] = (uint64_t)po[2]; // po[2] takes the basic digit.
435
436 // Now reduce the highest digit. Only need to reduce the carry of the highest digit three times.
437 // Note the carry bit of out[3] as carryLimb.
438 // It can be known from the equation:
439 // carryLimb * 2^256 = carryLimb * (2^224 - 2^192 - 2^96 + 1) (mod 2^256 - 2^224 + 2^192 + 2^96 - 1)
440 // The part of carryLimb * (2^224 - 2^192) needs to be placed in out[3]
441 //
442 // First reduction: po[3] < 2^128 ---> po[3] < 2^96
443 // Assume that po[3] = 0x ffffffffffffffff ffffffffffffffff
444 // 0x 0000000000000000 ffffffffffffffff -> po[3] basic digit(unit)
445 // + 0x 00000000ffffffff ffffffff00000000 -> carryLimb * 2^32
446 // - 0x 0000000000000000 ffffffffffffffff -> carryLimb
447 // -----------------------------------------
448 // = 0x 00000000ffffffff ffffffff00000000 -> po[3] <= 2^96 - 2^32 < 2^96
449 // Second reduction: po[3] < 2^96 ---> po[3] < 2^65 - 2^33
450 // Assume that po[3] = 0x 00000000ffffffff ffffffffffffffff
451 // 0x 0000000000000000 ffffffffffffffff
452 // + 0x 0000000000000000 ffffffff00000000
453 // - 0x 0000000000000000 00000000ffffffff
454 // -----------------------------------------
455 // = 0x 0000000000000001 fffffffe00000000 -> po[3] <= 2^65 - 2^33 < 2^65 - 2^33
456 // Third reduction: po[3] < 2^65 - 2^32 ---> po[3] < 2^64
457 // Assume that po[3] = 0x 0000000000000001 ffffffff00000000
458 // 0x 0000000000000000 ffffffff00000000
459 // + 0x 0000000000000000 0000000100000000
460 // - 0x 0000000000000000 0000000000000001
461 // -----------------------------------------
462 // = 0x 0000000000000000 ffffffffffffffff -> po[3] < 2^64
463 //
464 // In addition, use carryLimbTotal to store the sum of carry bits.
465 // In this way, carryLimbTotal*(-2^96 + 1) can be calculated at a time.
466 // The simple conclusion is that when out[3] is the maximum, carryLimbTotal is the maximum.
467 // Consider the maximum value of out[3].
468 // 0x 7fffffffffffffff ffffffffffffffff -> Maximum value of pi[3] (pi[3] < 2^127)
469 // + 0x 0000000000000000 8000000000000000 -> Maximum value of carry of pi[2]
470 // -----------------------------------------
471 // = 0x 8000000000000000 7fffffffffffffff -> Maximum value of po[3]
472 // Use the value to perform three reduction:
473 // carryLimbTotal = 0x8000000000000000 + 0x7fffffff + 0x1 = 0x8000000080000000 < 2^64
474 carryLimbTotal = carryLimb = po[3] >> BASE_BITS; // Reduce carryLimb and take the carry of po[3].
475 po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb; // The reduction factor on po[3] is (2 ^ 32 - 1).
476 carryLimbTotal += (carryLimb = po[3] >> BASE_BITS); // Reduce carryLimb and take the carry of po[3].
477 po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb; // The reduction factor on po[3] is (2 ^ 32 - 1).
478 carryLimbTotal += (carryLimb = po[3] >> BASE_BITS); // Reduce carryLimb and take the carry of po[3].
479 po[3] = (uint64_t)po[3] + (carryLimb << 32) - carryLimb; // The reduction factor on po[3] is (2 ^ 32 - 1).
480
481 // Calculate the remaining carryLimbTotal * (-2^96 + 1) <= 0
482 // In this case, it is impossible to carry out[4]. Therefore, po[3] < 2^64.
483 // If carryLimbTotal > 0, po[3] must be equal to at least 2 ^ 32 - 1. (i.e., po[3] = 2^64 in the last step)
484 // In this case, it is obvious that (2^32 - 1) * 2^192 > |carryLimbTotal * (-2^96 + 1)|
485 po[0] += carryLimbTotal; // reduction of carryLimbTotal which the coefficient is 1
486 // po[1] + po[0] carry + Borrowed Bits - Reduction Factor 2^32
487 po[1] += ((po[0] >> BASE_BITS) ^ borrow) - (carryLimbTotal << 32);
488 // po[2] + po[1] carry + Borrowing Bits - Lending Bits
489 po[2] += ((po[1] >> BASE_BITS) ^ borrow) - lend;
490 // po[3] + po[2] carry - Lending Bits
491 po[3] += (po[2] >> BASE_BITS) - lend;
492
493 po[0] = (uint64_t)po[0]; // po[0] takes the basic digit.
494 po[1] = (uint64_t)po[1]; // po[1] takes the basic digit.
495 po[2] = (uint64_t)po[2]; // po[2] takes the basic digit.
496 }
497
498 /*
499 * Field element reduction, converting 8-ary LongFelem to 4-ary Felem
500 * Input:
501 * in[] < 2^80
502 * Output:
503 * out[] < 2^115
504 * - out[0] < (2^114 + 2^82 - 2^50) + 2^(80 + 32) + 2 * 2^80 < 2^115
505 * - out[1] < (2^114 + 2^82 - 2^50) + 2^(80 + 33) + 2 * 2^80 < 2^115
506 * - out[2] < (2^114 + 2^82 - 2^50) + 2^(80 + 33) + 4 * 2^80 < 2^115
507 * - out[3] < (2^114 + 2^82 - 2^50) + 2^(80 + 32) + 4 * 2^80 < 2^115
508 * out[] > 0
509 * - out[0] > (2^114 - 2^82) - 2 * 2^(80 + 32) - 2 * 2^80 > 0
510 * - out[1] > (2^114 - 2^82) - 2^(80 + 32) - 2^80 > 0
511 * - out[2] > (2^114 - 2^82) - 2^(80 + 32) - 2^80 > 0
512 * - out[3] > (2^114 - 2^82) - 2 * 2^(80 + 32) - 2^80 > 0
513 */
LongFelemReduce(Felem * out,const LongFelem * in)514 static void LongFelemReduce(Felem *out, const LongFelem *in)
515 {
516 // n ≡ n / a * (a - b) ≡ n / a * b (mod (a - b))
517 // The following formula can be obtained:
518 // 2^n ≡ 2^(n - 256) * (2^224 - 2^192 - 2^96 + 1) (mod (2^256 - 2^224 + 2^192 + 2^96 - 1))
519 //
520 // 2^256 mod p
521 // = 2^224 - 2^192 - 2^96 + 1
522 //
523 // 2^288 mod p
524 // = 2^256 - 2^224 - 2^128 + 2^32
525 // = -2^192 - 2^128 - 2^96 + 2^32 + 1
526 //
527 // 2^320 mod p
528 // = 2^288 - 2^256 - 2^160 + 2^64
529 // = -2^224 - 2^160 - 2^128 + 2^64 + 2^32
530 //
531 // 2^352 mod p
532 // = 2^320 - 2^288 - 2^192 + 2^96
533 // = -2^224 - 2^160 + 2 * 2^96 + 2^64 - 1
534 //
535 // 2^384 mod p
536 // = 2^352 - 2^320 - 2^224 + 2^128
537 // = -2^224 + 2 * 2^128 + 2 * 2^96 - 2^32 - 1
538 //
539 // 2^416 mod p
540 // = 2^384 - 2^352 - 2^256 + 2^160
541 // = -2^224 + 2^192 + 2 * 2^160 + 2 * 2^128 + 2^96 - 2^64 - 2^32 - 1
542 //
543 // 2^448 mod p
544 // = 2^416 - 2^384 - 2^288 + 2^192
545 // = 3 * 2^192 + 2 * 2^160 + 2^128 - 2^64 - 2^32 - 1
546 //
547 // 2^480 mod p
548 // = 2^448 - 2^416 - 2^320 + 2^224
549 // = 3 * 2^224 + 2 * 2^192 + 2^160 - 2^96 - 2^64 - 2^32
550
551 static const Felem zeroBase = {
552 {
553 (((uint128_t)1 << 64) - 1) << 50,
554 (((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 50,
555 (((uint128_t)1 << 64) - 1) << 50,
556 (((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 50,
557 }
558 };
559
560 uint128_t *po = out->data;
561 const uint128_t *pi = in->data;
562 // Add the term reduced by pi[4]*2^256, pi[5]*2^320, pi[6]*2^384, pi[7]*2^448 to the corresponding digit.
563 // Assign a value after zero adjustment.
564 // Adjust to zero of digit 0. Take the input value pi[0].
565 // The reduction item list is (pi[4], pi[5]*2^32, -pi[6] - pi[6]*2^32, -pi[7] - pi[7]*2^32)
566 po[0] = zeroBase.data[0] + pi[0] + pi[4] + (pi[5] << 32) - pi[6] - (pi[6] << 32) - pi[7] - (pi[7] << 32);
567 // Adjust to zero of digit 1. Take the input value pi[1].
568 // The reduction item list is (-pi[4]*2^32, pi[5], pi[6]*2^33, -pi[7])
569 po[1] = zeroBase.data[1] + pi[1] - (pi[4] << 32) + pi[5] + (pi[6] << 33) - pi[7];
570 // Adjust to zero of digit 2. Take the input value pi[2].
571 // The reduction item list is (0, -pi[5] - pi[5]*2^32, -pi[6]*2, pi[7] + pi[7]*2^33)
572 po[2] = zeroBase.data[2] + pi[2] - pi[5] - (pi[5] << 32) + (pi[6] << 1) + pi[7] + (pi[7] << 33);
573 // Adjust to zero of digit 3. Take the input value pi[3].
574 // The reduction item list is (-pi[4] + pi[4]*2^32, -pi[5]*2^32, -pi[6]*2^32, pi[7]*3)
575 po[3] = zeroBase.data[3] + pi[3] - pi[4] + (pi[4] << 32) - (pi[5] << 32) - (pi[6] << 32) + (pi[7] * 3);
576 }
577
578 /*
579 * field element modulo: convert the field element to a unique value within [0, p)
580 * Input:
581 * in[] < 2^64
582 * Output:
583 * out < p, out[] < 2^64
584 */
FelemContract(Felem * out,const Felem * in)585 static void FelemContract(Felem *out, const Felem *in)
586 {
587 uint128_t *po = out->data;
588 const uint128_t *pi = in->data;
589 const uint128_t borrow = (uint128_t)1 << BASE_BITS;
590 const uint128_t lend = 1;
591
592 bool isGreaterOrEqual = true;
593 for (int32_t i = 3; i >= 0; i--) {
594 if (pi[i] > FIELD_ORDER.data[i]) {
595 break;
596 } else if (pi[i] < FIELD_ORDER.data[i]) {
597 isGreaterOrEqual = false;
598 break;
599 }
600 }
601
602 if (isGreaterOrEqual) {
603 // p_3 = 0xffffffff00000001
604 po[3] = (pi[3] - lend) - FIELD_ORDER.data[3];
605 // p_2 = 0x0000000000000000
606 po[2] = (borrow ^ pi[2]) - lend;
607 // p_1 = 0x00000000ffffffff
608 po[1] = ((borrow ^ pi[1]) - lend) - FIELD_ORDER.data[1];
609 // p_0 = 0xffffffffffffffff
610 po[0] = (borrow ^ pi[0]) - FIELD_ORDER.data[0];
611 }
612
613 // Process carry 0 -> 1 -> 2 -> 3
614 po[1] += po[0] >> BASE_BITS; // po[1] + po[0] carry
615 po[2] += po[1] >> BASE_BITS; // po[2] + po[1] carry
616 po[3] += po[2] >> BASE_BITS; // po[3] + po[2] carry
617
618 po[0] = (uint64_t)po[0]; // po[0] takes the basic digit.
619 po[1] = (uint64_t)po[1]; // po[1] takes the basic digit.
620 po[2] = (uint64_t)po[2]; // po[2] takes the basic digit.
621 }
622
623 /*
624 * Field Addition
625 */
FelemAdd(Felem * out,const Felem * a,const Felem * b)626 static inline void FelemAdd(Felem *out, const Felem *a, const Felem *b)
627 {
628 out->data[0] = a->data[0] + b->data[0]; // out->data[0] get the value
629 out->data[1] = a->data[1] + b->data[1]; // out->data[1] get the value
630 out->data[2] = a->data[2] + b->data[2]; // out->data[2] get the value
631 out->data[3] = a->data[3] + b->data[3]; // out->data[3] get the value
632 }
633
634 /*
635 * Field element negation
636 * Input:
637 * in[] <= 2^124 - 2^92
638 * Output:
639 * out[] <= 2^124 + 2^92 - 2^60 + in[]
640 */
FelemNeg(Felem * out,const Felem * in)641 static inline void FelemNeg(Felem *out, const Felem *in)
642 {
643 static const Felem zeroBase = {{
644 (((uint128_t)1 << 64) - 1) << 60,
645 (((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 60,
646 (((uint128_t)1 << 64) - 1) << 60,
647 (((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 60,
648 }};
649
650 out->data[0] = zeroBase.data[0] - in->data[0]; // out->data[0] get the value
651 out->data[1] = zeroBase.data[1] - in->data[1]; // out->data[1] get the value
652 out->data[2] = zeroBase.data[2] - in->data[2]; // out->data[2] get the value
653 out->data[3] = zeroBase.data[3] - in->data[3]; // out->data[3] get the value
654 }
655
656 /*
657 * Field subtraction
658 * Input:
659 * a[] < 2^128 - 2^124 - 2^92 + 2^60,b[] <= 2^124 - 2^92
660 * Output:
661 * out[] <= 2^124 + 2^92 - 2^60 + a[] < 2^128
662 */
FelemSub(Felem * out,const Felem * a,const Felem * b)663 static inline void FelemSub(Felem *out, const Felem *a, const Felem *b)
664 {
665 static const Felem zeroBase = {{
666 (((uint128_t)1 << 64) - 1) << 60,
667 (((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 60,
668 (((uint128_t)1 << 64) - 1) << 60,
669 (((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 60,
670 }};
671
672 out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0]; // out->data[0] get the value
673 out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1]; // out->data[1] get the value
674 out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2]; // out->data[2] get the value
675 out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3]; // out->data[3] get the value
676 }
677
678 /*
679 * Field subtraction. Input LongFelem directly.
680 * Input:
681 * a[] < 2^128 - 2^74 - 2^42 + 2^10,b[] <= 2^74 - 2^42
682 * Output:
683 * out[] <= 2^74 + 2^42 - 2^10 + a[] < 2^128
684 */
LongFelemSub(LongFelem * out,const LongFelem * a,const LongFelem * b)685 static void LongFelemSub(LongFelem *out, const LongFelem *a, const LongFelem *b)
686 {
687 static const LongFelem zeroBase = {{
688 ((uint128_t)1 << 64) << 10,
689 (((uint128_t)1 << 64) - 1) << 10,
690 (((uint128_t)1 << 64) - 1) << 10,
691 (((uint128_t)1 << 64) - 1) << 10,
692 (((uint128_t)1 << 64) - 2) << 10,
693 (((uint128_t)1 << 64) + ((uint128_t)1 << 32) - 1) << 10,
694 (((uint128_t)1 << 64) - 1) << 10,
695 (((uint128_t)1 << 64) - ((uint128_t)1 << 32)) << 10,
696 }};
697
698 out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0]; // out->data[0] get the value
699 out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1]; // out->data[1] get the value
700 out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2]; // out->data[2] get the value
701 out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3]; // out->data[3] get the value
702 out->data[4] = zeroBase.data[4] + a->data[4] - b->data[4]; // out->data[4] get the value
703 out->data[5] = zeroBase.data[5] + a->data[5] - b->data[5]; // out->data[5] get the value
704 out->data[6] = zeroBase.data[6] + a->data[6] - b->data[6]; // out->data[6] get the value
705 out->data[7] = zeroBase.data[7] + a->data[7] - b->data[7]; // out->data[7] get the value
706 }
707
708 /*
709 * Scale the field element
710 * Use only a small magnification(scale) factor to ensure that in[] * scalar does not overflow.
711 */
FelemScale(Felem * out,const Felem * in,const uint32_t scalar)712 static inline void FelemScale(Felem *out, const Felem *in, const uint32_t scalar)
713 {
714 out->data[0] = in->data[0] * scalar; // out->data[0] get the value
715 out->data[1] = in->data[1] * scalar; // out->data[1] get the value
716 out->data[2] = in->data[2] * scalar; // out->data[2] get the value
717 out->data[3] = in->data[3] * scalar; // out->data[3] get the value
718 }
719
720 /*
721 * Scale the field element. Input LongFelem directly.
722 * Use only a small magnification(scale) factor to ensure that in[] * scalar does not overflow.
723 */
LongFelemScale(LongFelem * out,const LongFelem * in,const uint32_t scalar)724 static inline void LongFelemScale(LongFelem *out, const LongFelem *in, const uint32_t scalar)
725 {
726 out->data[0] = in->data[0] * scalar; // out->data[0] get the value
727 out->data[1] = in->data[1] * scalar; // out->data[1] get the value
728 out->data[2] = in->data[2] * scalar; // out->data[2] get the value
729 out->data[3] = in->data[3] * scalar; // out->data[3] get the value
730 out->data[4] = in->data[4] * scalar; // out->data[4] get the value
731 out->data[5] = in->data[5] * scalar; // out->data[5] get the value
732 out->data[6] = in->data[6] * scalar; // out->data[6] get the value
733 out->data[7] = in->data[7] * scalar; // out->data[7] get the value
734 }
735
736 /*
737 * Field Multiplication
738 * Input:
739 * a[] < 2^64, b[] < 2^64
740 * Output:
741 * out[] < 2^67
742 * - out[0] < 2^64
743 * - out[1] < 2^64 * 3
744 * - out[2] < 2^64 * 5
745 * - out[3] < 2^64 * 7
746 * - out[4] < 2^64 * 7
747 * - out[5] < 2^64 * 5
748 * - out[6] < 2^64 * 3
749 * - out[7] < 2^64
750 */
FelemMul(LongFelem * out,const Felem * a,const Felem * b)751 static void FelemMul(LongFelem *out, const Felem *a, const Felem *b)
752 {
753 // out[0] = a[0]*b[0]
754 // out[1] = a[0]*b[1] + a[1]*b[0]
755 // out[2] = a[0]*b[2] + a[1]*b[1] + a[2]*b[0]
756 // out[3] = a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0]
757 // out[4] = a[1]*b[3] + a[2]*b[2] + a[3]*b[1]
758 // out[5] = a[2]*b[3] + a[3]*b[2]
759 // out[6] = a[3]*b[3]
760
761 const uint64_t a64[4] = {(uint64_t)a->data[0], (uint64_t)a->data[1], (uint64_t)a->data[2], (uint64_t)a->data[3]};
762 const uint64_t b64[4] = {(uint64_t)b->data[0], (uint64_t)b->data[1], (uint64_t)b->data[2], (uint64_t)b->data[3]};
763 uint128_t limbMul;
764
765 // out[0] = a[0]*b[0]
766 limbMul = (uint128_t)a64[0] * b64[0]; // a[0] * b[0]
767 out->data[0] = (uint64_t)limbMul; // Digit 0 plus the basic digit
768 out->data[1] = limbMul >> BASE_BITS; // Digit 1 plus the carry
769
770 // out[1] = a[0]*b[1] + a[1]*b[0]
771 limbMul = (uint128_t)a64[0] * b64[1]; // a[0] * b[1]
772 out->data[1] += (uint64_t)limbMul; // Digit 1 plus the basic digit
773 out->data[2] = limbMul >> BASE_BITS; // Digit 2 plus the carry
774
775 limbMul = (uint128_t)a64[1] * b64[0]; // a[1] * b[0]
776 out->data[1] += (uint64_t)limbMul; // Digit 1 plus the basic digit
777 out->data[2] += limbMul >> BASE_BITS; // Digit 2 plus the carry
778
779 // out[2] = a[0]*b[2] + a[1]*b[1] + a[2]*b[0]
780 limbMul = (uint128_t)a64[0] * b64[2]; // a[0] * b[2]
781 out->data[2] += (uint64_t)limbMul; // Digit 2 plus the basic digit
782 out->data[3] = limbMul >> BASE_BITS; // Digit 3 plus the carry
783
784 limbMul = (uint128_t)a64[1] * b64[1]; // a[1] * a[1]
785 out->data[2] += (uint64_t)limbMul; // Digit 2 plus the basic digit
786 out->data[3] += limbMul >> BASE_BITS; // Digit 3 plus the carry
787
788 limbMul = (uint128_t)a64[2] * b64[0]; // a[2] * b[0]
789 out->data[2] += (uint64_t)limbMul; // Digit 2 plus the basic digit
790 out->data[3] += limbMul >> BASE_BITS; // Digit 3 plus the carry
791
792 // out[3] = a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0]
793 limbMul = (uint128_t)a64[0] * b64[3]; // a[0] * b[3]
794 out->data[3] += (uint64_t)limbMul; // Digit 3 plus the basic digit
795 out->data[4] = limbMul >> BASE_BITS; // Digit 4 plus the carry
796
797 limbMul = (uint128_t)a64[1] * b64[2]; // a[1] * b[2]
798 out->data[3] += (uint64_t)limbMul; // Digit 3 plus the basic digit
799 out->data[4] += limbMul >> BASE_BITS; // Digit 4 plus the carry
800
801 limbMul = (uint128_t)a64[2] * b64[1]; // a[2] * b[1]
802 out->data[3] += (uint64_t)limbMul; // Digit 3 plus the basic digit
803 out->data[4] += limbMul >> BASE_BITS; // Digit 4 plus the carry
804
805 limbMul = (uint128_t)a64[3] * b64[0]; // a[3] * b[0]
806 out->data[3] += (uint64_t)limbMul; // Digit 3 plus the basic digit
807 out->data[4] += limbMul >> BASE_BITS; // Digit 4 plus the carry
808
809 // out[4] = a[1]*b[3] + a[2]*b[2] + a[3]*b[1]
810 limbMul = (uint128_t)a64[1] * b64[3]; // a[1] * b[3]
811 out->data[4] += (uint64_t)limbMul; // Digit 4 plus the basic digit
812 out->data[5] = limbMul >> BASE_BITS; // Digit 5 plus the carry
813
814 limbMul = (uint128_t)a64[2] * b64[2]; // a[2] * b[2]
815 out->data[4] += (uint64_t)limbMul; // Digit 4 plus the basic digit
816 out->data[5] += limbMul >> BASE_BITS; // Digit 5 plus the carry
817
818 limbMul = (uint128_t)a64[3] * b64[1]; // a[3] * b[1]
819 out->data[4] += (uint64_t)limbMul; // Digit 4 plus the basic digit
820 out->data[5] += limbMul >> BASE_BITS; // Digit 5 plus the carry
821
822 // out[5] = a[2]*b[3] + a[3]*b[2]
823 limbMul = (uint128_t)a64[2] * b64[3]; // a[2] * b[3]
824 out->data[5] += (uint64_t)limbMul; // Digit 5 plus the basic digit
825 out->data[6] = limbMul >> BASE_BITS; // Digit 6 plus the carry
826
827 limbMul = (uint128_t)a64[3] * b64[2]; // a[3] * b[2]
828 out->data[5] += (uint64_t)limbMul; // Digit 5 plus the basic digit
829 out->data[6] += limbMul >> BASE_BITS; // Digit 6 plus the carry
830
831 // out[6] = a[3]*b[3]
832 limbMul = (uint128_t)a64[3] * b64[3]; // a[3] * b[3]
833 out->data[6] += (uint64_t)limbMul; // Digit 6 plus the basic digit
834 out->data[7] = limbMul >> BASE_BITS; // Digit 7 plus the carry
835 }
836
837 /*
838 * Field square
839 * Input:
840 * a[] < 2^64
841 * Output:
842 * out[] < 2^67
843 * - out[0] < 2^64
844 * - out[1] < 2^64 * 2
845 * - out[2] < 2^64 * 4
846 * - out[3] < 2^64 * 5
847 * - out[4] < 2^64 * 6
848 * - out[5] < 2^64 * 4
849 * - out[6] < 2^64 * 3
850 * - out[7] < 2^64
851 */
FelemSqr(LongFelem * out,const Felem * a)852 static void FelemSqr(LongFelem *out, const Felem *a)
853 {
854 // out[0] = a[0]*a[0]
855 // out[1] = a[0]*a[1]*2
856 // out[2] = a[0]*a[2]*2 + a[1]*a[1]
857 // out[3] = a[0]*a[3]*2 + a[1]*a[2]*2
858 // out[4] = a[1]*a[3]*2 + a[2]*a[2]
859 // out[5] = a[2]*a[3]*2
860 // out[6] = a[3]*a[3]
861
862 const uint64_t a64[4] = {(uint64_t)a->data[0], (uint64_t)a->data[1], (uint64_t)a->data[2], (uint64_t)a->data[3]};
863 uint128_t limbMul;
864
865 // out[0] = a[0]*a[0]
866 limbMul = (uint128_t)a64[0] * a64[0]; // a[0] * a[0]
867 out->data[0] = (uint64_t)limbMul; // Digit 0 plus the basic digit
868 out->data[1] = limbMul >> BASE_BITS; // Digit 1 plus the carry
869
870 // out[1] = a[0]*a[1]*2
871 limbMul = (uint128_t)a64[0] * a64[1]; // a[0] * a[1]
872 out->data[1] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 1
873 out->data[2] = limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 2
874
875 // out[2] = a[0]*a[2]*2 + a[1]*a[1]
876 limbMul = (uint128_t)a64[0] * a64[2]; // a[0] * a[2]
877 out->data[2] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 2
878 out->data[3] = limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 3
879
880 limbMul = (uint128_t)a64[1] * a64[1]; // a[1] * a[1]
881 out->data[2] += (uint64_t)limbMul; // Digit 2 plus the basic digit
882 out->data[3] += limbMul >> BASE_BITS; // Digit 3 plus the carry
883
884 // out[3] = a[0]*a[3]*2 + a[1]*a[2]*2
885 limbMul = (uint128_t)a64[0] * a64[3]; // a[0] * a[3]
886 out->data[3] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 3
887 out->data[4] = limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 4
888
889 limbMul = (uint128_t)a64[1] * a64[2]; // a[1] * a[2]
890 out->data[3] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 3
891 out->data[4] += limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 4
892
893 // out[4] = a[1]*a[3]*2 + a[2]*a[2]
894 limbMul = (uint128_t)a64[1] * a64[3]; // a[1] * a[3]
895 out->data[4] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 4
896 out->data[5] = limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 5
897
898 limbMul = (uint128_t)a64[2] * a64[2]; // a[2] * a[2]
899 out->data[4] += (uint64_t)limbMul; // Digit 4 plus the basic digit
900 out->data[5] += limbMul >> BASE_BITS; // Digit 5 plus the carry
901
902 // out[5] = a[2]*a[3]*2
903 limbMul = (uint128_t)a64[2] * a64[3]; // a[2] * a[3]
904 out->data[5] += (uint64_t)limbMul << 1; // basic digit after the product is left shifted by 1bit, add to digit 5
905 out->data[6] = limbMul >> (BASE_BITS - 1); // carry after product shift, add to digit 6
906
907 // out[6] = a[3]*a[3]
908 limbMul = (uint128_t)a64[3] * a64[3]; // a[3] * a[3]
909 out->data[6] += (uint64_t)limbMul; // Digit 6 plus the basic digit
910 out->data[7] = limbMul >> BASE_BITS; // Digit 7 plus the carry
911 }
912
FelemMulReduce(Felem * out,const Felem * a,const Felem * b)913 static inline void FelemMulReduce(Felem *out, const Felem *a, const Felem *b)
914 {
915 LongFelem ltmp;
916 FelemMul(<mp, a, b);
917 LongFelemReduce(out, <mp);
918 }
919
FelemSqrReduce(Felem * out,const Felem * in)920 static inline void FelemSqrReduce(Felem *out, const Felem *in)
921 {
922 LongFelem ltmp;
923 FelemSqr(<mp, in);
924 LongFelemReduce(out, <mp);
925 }
926
FelemMulReduceToBase(Felem * out,const Felem * a,const Felem * b)927 static inline void FelemMulReduceToBase(Felem *out, const Felem *a, const Felem *b)
928 {
929 LongFelem ltmp;
930 FelemMul(<mp, a, b);
931 LongFelemReduce(out, <mp);
932 FelemReduce(out, out);
933 }
934
FelemSqrReduceToBase(Felem * out,const Felem * in)935 static inline void FelemSqrReduceToBase(Felem *out, const Felem *in)
936 {
937 LongFelem ltmp;
938 FelemSqr(<mp, in);
939 LongFelemReduce(out, <mp);
940 FelemReduce(out, out);
941 }
942
943 /*
944 * Field element inversion
945 * From Fermat's little theorem, in^(p - 2) = in^(-1) (mod p)
946 * in^(-1) = in^(2^256 - 2^224 + 2^192 + 2^96 - 1 - 2) (mod p)
947 * Input:
948 * in[] < 2^64
949 * Output:
950 * out[] < 2^64
951 */
FelemInv(Felem * out,const Felem * in)952 static void FelemInv(Felem *out, const Felem *in)
953 {
954 Felem inE3, inEf, inEff, inEffff, inEffffffff, inEfx16Lsh32, inEfffffffd;
955
956 // Construct in^(p - 2) by moving left and adding.
957 // The value of p - 2 is as follows:
958 // ffffffff 00000001
959 // 00000000 00000000
960 // 00000000 ffffffff
961 // ffffffff fffffffd
962
963 // Construct the {1, 3, f, ff, ffff, ffffffff} power of in by left shift and addition.
964 // Construct in^1
965 FelemAssign(out, in); // in^1
966 // Construct in^3
967 FelemSqrReduceToBase(out, out); // in^2
968 FelemMulReduceToBase(out, out, in); // in^3
969 FelemAssign(&inE3, out);
970 // Construct in^f
971 FelemSqrReduceToBase(out, out); // in^6
972 FelemSqrReduceToBase(out, out); // in^c
973 FelemMulReduceToBase(&inEfffffffd, out, in); // inEfffffffd = in^d
974 FelemMulReduceToBase(out, out, &inE3); // in^f
975 FelemAssign(&inEf, out);
976 // Construct in^ff
977 FelemSqrReduceToBase(out, out); // in^1e
978 FelemSqrReduceToBase(out, out); // in^3c
979 FelemSqrReduceToBase(out, out); // in^78
980 FelemSqrReduceToBase(out, out); // in^f0
981 FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd); // inEfffffffd = in^fd
982 FelemMulReduceToBase(out, out, &inEf); // in^ff
983 FelemAssign(&inEff, out);
984 // Construct in^ffff and shift ff to the left by 8 bits to obtain ff00
985 for (int32_t i = 0; i < 8; ++i) {
986 FelemSqrReduceToBase(out, out);
987 } // in^ff00
988 FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd); // inEfffffffd = in^fffd
989 FelemMulReduceToBase(out, out, &inEff); // in^ffff
990 FelemAssign(&inEffff, out);
991 // Construct in^ffffffff and shift ffff to the left by 16 bits to obtain ffff0000
992 for (int32_t i = 0; i < 16; ++i) {
993 FelemSqrReduceToBase(out, out);
994 } // in^ffff0000
995 FelemMulReduceToBase(&inEfffffffd, out, &inEfffffffd); // inEfffffffd = in^fffffffd
996 FelemMulReduceToBase(out, out, &inEffff); // in^ffffffff
997 FelemAssign(&inEffffffff, out);
998
999 // Construct in^ffffffff ffffffff 00000000
1000 // Obtain in^ffffffff 00000000 and shift ffffffff to the left by 32 bits.
1001 for (int32_t i = 0; i < 32; ++i) {
1002 FelemSqrReduceToBase(out, out);
1003 } // in^ffffffff 00000000
1004 FelemAssign(&inEfx16Lsh32, out);
1005 // Then obtain ffffffff 00000000 00000000 and shift it leftwards by 32 bits.
1006 for (int32_t i = 0; i < 32; ++i) {
1007 FelemSqrReduceToBase(out, out);
1008 } // out = in^ffffffff 00000000 00000000
1009 FelemMulReduceToBase(&inEfx16Lsh32, out, &inEfx16Lsh32); // inEfx16Lsh32 = in^ffffffff ffffffff 00000000
1010
1011 // Construct in^ffffffff 00000001 00000000
1012 FelemMulReduceToBase(out, out, &inEffffffff); // in^ffffffff 00000000 ffffffff
1013 FelemMulReduceToBase(out, out, in); // in^ffffffff 00000001 00000000
1014
1015 // Shift leftward by 160 bits to the top.
1016 for (int32_t i = 0; i < 160; ++i) {
1017 FelemSqrReduceToBase(out, out);
1018 } // in^ffffffff 00000001 00000000 00000000 00000000 00000000 00000000 00000000
1019
1020 // Construct in^ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff 00000000
1021 FelemMulReduceToBase(out, out, &inEfx16Lsh32);
1022 // Construct in^ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff fffffffd
1023 FelemMulReduceToBase(out, out, &inEfffffffd);
1024 }
1025
1026 /* --------------------------Point group operation-------------------------- */
PtAssign(Point * out,const Point * in)1027 static inline void PtAssign(Point *out, const Point *in)
1028 {
1029 FelemAssign(&out->x, &in->x);
1030 FelemAssign(&out->y, &in->y);
1031 FelemAssign(&out->z, &in->z);
1032 }
1033
PtAssignWithMask(Point * out,const Point * in,const uint128_t mask)1034 static inline void PtAssignWithMask(Point *out, const Point *in, const uint128_t mask)
1035 {
1036 FelemAssignWithMask(&out->x, &in->x, mask);
1037 FelemAssignWithMask(&out->y, &in->y, mask);
1038 FelemAssignWithMask(&out->z, &in->z, mask);
1039 }
1040
1041 /*
1042 * point double
1043 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1044 * Number of field operations: 3M + 5S
1045 * delta = Z^2
1046 * gamma = Y^2
1047 * beta = X * gamma
1048 * alpha = 3 * (X - delta) * (X + delta)
1049 * X' = alpha^2 - 8 * beta
1050 * Z' = (Y + Z)^2 - gamma - delta
1051 * Y' = alpha * (4 * beta - X') - 8 * gamma^2
1052 * Input:
1053 * in->x[], in->y[], in->z[] < 2^64
1054 * Output:
1055 * out->x[], out->y[], out->z[] < 2^64
1056 */
PtDouble(Point * out,const Point * in)1057 static void PtDouble(Point *out, const Point *in)
1058 {
1059 Felem delta, gamma, beta, alpha;
1060 Felem tmp, tmp2;
1061 LongFelem ltmp, ltmp2;
1062
1063 // delta = Z^2
1064 FelemSqrReduce(&delta, &in->z); // delta[] < 2^115
1065
1066 // gamma = Y^2
1067 FelemSqrReduceToBase(&gamma, &in->y);
1068
1069 // beta = X * gamma
1070 FelemMulReduce(&beta, &in->x, &gamma); // beta[] < 2^115
1071
1072 // alpha = 3 * (X - delta) * (X + delta)
1073 FelemAdd(&tmp, &in->x, &delta); // tmp[] < 2^64 + 2^115
1074 FelemScale(&tmp, &tmp, 3); // 3 * (X + delta), tmp[] < (2^64 + 2^115) * 3 < 2^117
1075 FelemSub(&tmp2, &in->x, &delta); // tmp2[] < (2^124 + 2^92 - 2^60) + 2^64 < 2^125
1076 FelemReduce(&tmp, &tmp);
1077 FelemReduce(&tmp2, &tmp2);
1078 FelemMulReduceToBase(&alpha, &tmp, &tmp2);
1079
1080 // X' = alpha^2 - 8 * beta
1081 FelemSqrReduce(&tmp, &alpha); // alpha^2, tmp[] < 2^115
1082 FelemScale(&tmp2, &beta, 8); // 8 * beta, tmp2[] < 2^115 * 8 = 2^119
1083 FelemSub(&out->x, &tmp, &tmp2); // out->x[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1084 FelemReduce(&out->x, &out->x); // out->x[] < 2^64
1085
1086 // Z' = (Y + Z)^2 - gamma - delta
1087 FelemAdd(&tmp, &in->y, &in->z);
1088 FelemReduce(&tmp, &tmp);
1089 FelemSqrReduce(&tmp, &tmp); // (Y + Z)^2, tmp[] < 2^115
1090 FelemAdd(&tmp2, &gamma, &delta); // (gamma + delta), tmp2[] < 2^64 + 2^115 < 2^116
1091 FelemSub(&out->z, &tmp, &tmp2); // out->z[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1092 FelemReduce(&out->z, &out->z); // out->z[] < 2^64
1093
1094 // Y' = alpha * (4 * beta - X') - 8 * gamma^2
1095 FelemScale(&beta, &beta, 4); // beta[] < 2^115 * 4 = 2^117
1096 FelemSub(&tmp, &beta, &out->x);
1097 FelemReduce(&tmp, &tmp);
1098 FelemMul(<mp, &alpha, &tmp); // alpha * (4 * beta - X'), ltmp[] < 2^67
1099 FelemSqr(<mp2, &gamma);
1100 LongFelemScale(<mp2, <mp2, 8); // 8 * gamma^2, ltmp2[] < 2^67 * 8 < 2^70
1101 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < (2^74 + 2^42 - 2^10) + 2^67 < 2^75
1102 LongFelemReduce(&out->y, <mp); // out->y[] < 2^115
1103 FelemReduce(&out->y, &out->y); // out->y[] < 2^64
1104 }
1105
1106 /*
1107 * point addition
1108 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
1109 * Infinity point calculation is not supported.
1110 * Number of field operations: 11M + 5S
1111 * Z1Z1 = Z1^2
1112 * Z2Z2 = Z2^2
1113 * U1 = X1 * Z2Z2
1114 * U2 = X2 * Z1Z1
1115 * S1 = Y1 * Z2 * Z2Z2
1116 * S2 = Y2 * Z1 * Z1Z1
1117 * H = U2 - U1
1118 * I = (2 * H)^2
1119 * J = H * I
1120 * r = 2 * (S2 - S1)
1121 * V = U1 * I
1122 * X3 = r^2 - J - 2 * V
1123 * Y3 = r * (V - X3) - 2 * S1 * J
1124 * Z3 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2) * H
1125 * Input:
1126 * a->x[], a->y[], a->z[] < 2^64
1127 * b->x[], b->y[], b->z[] < 2^64
1128 * Output:
1129 * out->x[], out->y[], out->z[] < 2^64
1130 */
PtAdd(Point * out,const Point * a,const Point * b)1131 static void PtAdd(Point *out, const Point *a, const Point *b)
1132 {
1133 Point result;
1134 Felem z1sqr, z2sqr, u1, u2, s1, s2, h, i, j, r, v;
1135 Felem tmp;
1136 LongFelem ltmp, ltmp2;
1137 uint128_t isZ1Zero, isZ2Zero;
1138
1139 // Z1Z1 = Z1^2
1140 FelemSqrReduceToBase(&z1sqr, &a->z);
1141
1142 // Z2Z2 = Z2^2
1143 FelemSqrReduceToBase(&z2sqr, &b->z);
1144
1145 isZ1Zero = 0 - FelemIsZero(&z1sqr);
1146 isZ2Zero = 0 - FelemIsZero(&z2sqr);
1147
1148 // U1 = X1 * Z2Z2
1149 FelemMulReduceToBase(&u1, &a->x, &z2sqr);
1150
1151 // U2 = X2 * Z1Z1
1152 FelemMulReduce(&u2, &b->x, &z1sqr); // u2[] < 2^115
1153
1154 // S1 = Y1 * Z2 * Z2Z2
1155 FelemMulReduceToBase(&s1, &b->z, &z2sqr);
1156 FelemMulReduceToBase(&s1, &a->y, &s1);
1157
1158 // S2 = Y2 * Z1 * Z1Z1
1159 FelemMulReduceToBase(&s2, &a->z, &z1sqr);
1160 FelemMulReduce(&s2, &b->y, &s2); // s2[] < 2^115
1161
1162 // H = U2 - U1
1163 FelemSub(&h, &u2, &u1); // h[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1164 FelemReduce(&h, &h);
1165
1166 // r = 2 * (S2 - S1)
1167 FelemSub(&r, &s2, &s1); // r[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1168 FelemScale(&r, &r, 2); // r[] < 2^126
1169 FelemReduce(&r, &r);
1170
1171 // H and r can determine whether x and y of the affine coordinates of two points are equal.
1172 // If the values are equal, use double().
1173 if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
1174 // Use a smaller b point
1175 PtDouble(out, b);
1176 return;
1177 }
1178
1179 // I = (2 * H)^2
1180 FelemSqrReduce(&i, &h); // H^2, i[] < 2^115
1181 FelemScale(&i, &i, 4); // 4 * H^2, i[] < 2^117
1182 FelemReduce(&i, &i);
1183
1184 // J = H * I
1185 FelemMulReduceToBase(&j, &h, &i);
1186
1187 // V = U1 * I
1188 FelemMulReduce(&v, &u1, &i); // v[] < 2^115
1189
1190 // X3 = r^2 - (J + 2 * V)
1191 FelemSqrReduce(&result.x, &r); // result.x[] < 2^115
1192 FelemScale(&tmp, &v, 2); // tmp[] < 2^115 * 2 = 2^116
1193 FelemAdd(&tmp, &j, &tmp); // tmp[] < 2^64 + 2^116 < 2^117
1194 FelemSub(&result.x, &result.x, &tmp); // result.x[] < (2^124 + 2^90 - 2^60) + 2^115 < 2^125
1195 FelemReduce(&result.x, &result.x); // result.x[] < 2^64
1196
1197 // Y3 = r * (V - X3) - 2 * S1 * J
1198 FelemSub(&tmp, &v, &result.x);
1199 FelemReduce(&tmp, &tmp);
1200 FelemMul(<mp, &r, &tmp); // r * (V - X3), ltmp[] < 2^67
1201 FelemMul(<mp2, &s1, &j); // ltmp2[] < 2^67
1202 LongFelemScale(<mp2, <mp2, 2); // 2 * S1 * J, ltmp2[] < 2^68
1203 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < (2^74 + 2^42 - 2^10) + 2^67 < 2^75
1204 LongFelemReduce(&result.y, <mp); // result.y[] < 2^115
1205 FelemReduce(&result.y, &result.y); // result.y[] < 2^64
1206
1207 // Z3 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2) * H
1208 FelemAdd(&result.z, &a->z, &b->z);
1209 FelemReduce(&result.z, &result.z);
1210 FelemSqrReduce(&result.z, &result.z); // (Z1 + Z2)^2
1211 FelemAdd(&tmp, &z1sqr, &z2sqr); // Z1Z1 + Z2Z2
1212 FelemSub(&result.z, &result.z, &tmp); // ((Z1 + Z2)^2 - Z1Z1 - Z2Z2)
1213 FelemReduce(&result.z, &result.z);
1214 FelemMulReduceToBase(&result.z, &result.z, &h); // result.z[] < 2^64
1215
1216 // Special case processing for infinity points
1217 PtAssignWithMask(&result, a, isZ2Zero);
1218 PtAssignWithMask(&result, b, isZ1Zero);
1219 PtAssign(out, &result);
1220 }
1221
1222 /*
1223 * Mixed point addition
1224 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-madd-2007-bl
1225 * Infinity point calculation is not supported.
1226 * Number of field operations: 7M + 4S
1227 * Z1Z1 = Z1^2
1228 * U2 = X2 * Z1Z1
1229 * S2 = Y2 * Z1 * Z1Z1
1230 * H = U2 - X1
1231 * HH = H^2
1232 * I = 4 * HH
1233 * J = H * I
1234 * r = 2 * (S2 - Y1)
1235 * V = X1 * I
1236 * X3 = r^2 - J - 2 * V
1237 * Y3 = r * (V - X3) - 2 * Y1 * J
1238 * Z3 = (Z1 + H)^2 - Z1Z1 - HH
1239 * Input:
1240 * a->x[] < 2^64, a->y[] < 2^64, a->z[] < 2^64
1241 * b->x < p, b->y < p, b->z = 0 或 1
1242 * Output:
1243 * out->x[] < 2^64
1244 * out->y[] < 2^64
1245 * out->z[] < 2^64
1246 */
PtAddMixed(Point * out,const Point * a,const Point * b)1247 static void PtAddMixed(Point *out, const Point *a, const Point *b)
1248 {
1249 Point result;
1250 Felem z1sqr, u2, s2, h, hsqr, i, j, r, v;
1251 Felem tmp;
1252 LongFelem ltmp, ltmp2;
1253 uint128_t isZ1Zero, isZ2Zero;
1254
1255 // Z1Z1 = Z1^2
1256 FelemSqrReduceToBase(&z1sqr, &a->z);
1257
1258 isZ1Zero = 0 - FelemIsZero(&z1sqr);
1259 // The Z coordinate of point b can only be 0 or 1, that is, digit 0 can only be 0 or 1, and the other digits are 0.
1260 isZ2Zero = b->z.data[0] - 1;
1261
1262 // U2 = X2 * Z1Z1
1263 FelemMulReduce(&u2, &b->x, &z1sqr); // u2[] < 2^115
1264
1265 // S2 = Y2 * Z1 * Z1Z1
1266 FelemMulReduceToBase(&s2, &a->z, &z1sqr);
1267 FelemMulReduce(&s2, &b->y, &s2); // s2[] < 2^115
1268
1269 // H = U2 - X1
1270 FelemSub(&h, &u2, &a->x); // h[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1271 FelemReduce(&h, &h);
1272
1273 // r = 2 * (S2 - Y1)
1274 FelemSub(&r, &s2, &a->y); // r[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1275 FelemScale(&r, &r, 2); // r[] < 2^126
1276 FelemReduce(&r, &r);
1277
1278 // H and r can determine whether x and y of the affine coordinates of two points are equal.
1279 // If the values are equal, use double().
1280 if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
1281 // Use a smaller b point
1282 PtDouble(out, b);
1283 return;
1284 }
1285
1286 // HH = H^2
1287 FelemSqrReduce(&hsqr, &h); // hsqr[] < 2^115
1288
1289 // I = 4 * HH
1290 FelemScale(&i, &hsqr, 4); // i[] < 2^117
1291 FelemReduce(&i, &i);
1292
1293 // J = H * I
1294 FelemMulReduceToBase(&j, &h, &i);
1295
1296 // V = X1 * I
1297 FelemMulReduce(&v, &a->x, &i); // v[] < 2^115
1298
1299 // X3 = r^2 - J - 2 * V
1300 FelemSqrReduce(&result.x, &r);
1301 FelemScale(&tmp, &v, 2); // tmp[] < 2^116
1302 FelemAdd(&tmp, &j, &tmp); // tmp[] < 2^64 + 2^116 < 2^117
1303 FelemSub(&result.x, &result.x, &tmp); // result.x[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1304 FelemReduce(&result.x, &result.x); // result.x[] < 2^64
1305
1306 // Y3 = r * (V - X3) - 2 * Y1 * J
1307 FelemSub(&tmp, &v, &result.x); // tmp[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1308 FelemReduce(&tmp, &tmp);
1309 FelemMul(<mp, &r, &tmp); // ltmp[] < 2^67
1310 FelemMul(<mp2, &a->y, &j); // ltmp2[] < 2^67
1311 LongFelemScale(<mp2, <mp2, 2); // ltmp2[] < 2^68
1312 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < (2^74 + 2^42 - 2^10) + 2^67 < 2^75
1313 LongFelemReduce(&result.y, <mp); // result.y[] < 2^115
1314 FelemReduce(&result.y, &result.y); // result.y[] < 2^64
1315
1316 // Z3 = (Z1 + H)^2 - Z1Z1 - HH
1317 FelemAdd(&result.z, &a->z, &h); // result.z[] < 2^64 + 2^64 = 2^65
1318 FelemReduce(&result.z, &result.z);
1319 FelemSqrReduce(&result.z, &result.z); // result.z[] < 2^115
1320 FelemAdd(&tmp, &z1sqr, &hsqr); // tmp[] < 2^64 + 2^115 < 2^116
1321 FelemSub(&result.z, &result.z, &tmp); // result.z[] < (2^124 + 2^92 - 2^60) + 2^115 < 2^125
1322 FelemReduce(&result.z, &result.z); // result.z[] < 2^64
1323
1324 // Special case processing for infinity points
1325 PtAssignWithMask(&result, a, isZ2Zero);
1326 PtAssignWithMask(&result, b, isZ1Zero);
1327 PtAssign(out, &result);
1328 }
1329
1330 /* Select the point with subscript index in the table and place it in the point. Anti-side channel process is exists. */
GetPointFromTable(Point * point,const Point table[],uint32_t pointNum,const uint32_t index)1331 static inline void GetPointFromTable(Point *point, const Point table[], uint32_t pointNum, const uint32_t index)
1332 {
1333 uint128_t mask;
1334 for (uint32_t i = 0; i < pointNum; i++) {
1335 /* If i is equal to index, the last mask is all Fs. Otherwise, the last mask is all 0s. */
1336 mask = (0 - (i ^ index)) >> 31; // shifted rightwards by 31 bits and obtain the most significant bit.
1337 mask--;
1338 /* Conditional value assignment, valid only when i == index */
1339 PtAssignWithMask(point, &table[i], mask);
1340 }
1341 }
1342
1343 /*
1344 * Input:
1345 * k1 < n
1346 * 0 <= i < 32
1347 * Output:
1348 * out->x < p, out->y < p, out->z = 0 或 1
1349 */
GetLowerPrecomputePtOfG(Point * out,const Felem * k1,int32_t curBit)1350 static inline void GetLowerPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
1351 {
1352 uint32_t bits;
1353 uint32_t i = (uint32_t)curBit;
1354
1355 bits = (uint32_t)(k1->data[0] >> i) & 1; // i-bit of the lower half of the digit 0
1356 bits |= ((uint32_t)(k1->data[1] >> i) & 1) << 1; // i-bit of the lower half of the digit 1
1357 bits |= ((uint32_t)(k1->data[2] >> i) & 1) << 2; // i-bit of the lower half of the digit 2
1358 bits |= ((uint32_t)(k1->data[3] >> i) & 1) << 3; // i-bit of the lower half of the digit 3
1359
1360 GetPointFromTable(out, PRE_MUL_G, TABLE_G_SIZE, bits);
1361 }
1362
1363 /*
1364 * Input:
1365 * k1 < n
1366 * 0 <= i < 32
1367 * Output:
1368 * out->x < p, out->y < p, out->z = 0 或 1
1369 */
GetUpperPrecomputePtOfG(Point * out,const Felem * k1,int32_t curBit)1370 static inline void GetUpperPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
1371 {
1372 uint32_t bits;
1373 uint32_t i = (uint32_t)curBit;
1374
1375 // i-bit of the upper half of the digit 0. (BASE_BITS/2) is the half width.
1376 bits = (uint32_t)(k1->data[0] >> (i + BASE_BITS / 2)) & 1;
1377 // i-bit of the upper half of the digit 1. (BASE_BITS/2) is the half width.
1378 bits |= ((uint32_t)(k1->data[1] >> (i + BASE_BITS / 2)) & 1) << 1;
1379 // i-bit of the upper half of the digit 2. (BASE_BITS/2) is the half width.
1380 bits |= ((uint32_t)(k1->data[2] >> (i + BASE_BITS / 2)) & 1) << 2;
1381 // i-bit of the upper half of the digit 3. (BASE_BITS/2) is the half width.
1382 bits |= ((uint32_t)(k1->data[3] >> (i + BASE_BITS / 2)) & 1) << 3;
1383
1384 GetPointFromTable(out, PRE_MUL_G2, TABLE_G_SIZE, bits);
1385 }
1386
1387 /*
1388 * Input:
1389 * k2 < n
1390 * 0 <= i <= 255
1391 * The coordinates of each point of preMulPt are reduced.
1392 * Output:
1393 * out->x[] < 2^64, out->y[] < 2^64, out->z[] < 2^64
1394 */
GetPrecomputePtOfP(Point * out,const Felem * k2,int32_t curBit,const Point preMulPt[TABLE_P_SIZE])1395 static inline void GetPrecomputePtOfP(Point *out, const Felem *k2, int32_t curBit, const Point preMulPt[TABLE_P_SIZE])
1396 {
1397 uint32_t bits;
1398 uint32_t sign, value; // Indicates the grouping sign and actual value.
1399 Felem negY;
1400 // Obtain the 5-bit signed code. Read the sign bits of the next group of numbers
1401 // to determine whether there is a carry. The total length is 6.
1402 bits = (uint32_t)FelemGetBits(k2, curBit - 1, WINDOW_SIZE + 1);
1403 DecodeScalarCode(&sign, &value, bits);
1404
1405 GetPointFromTable(out, preMulPt, TABLE_P_SIZE, value);
1406
1407 FelemNeg(&negY, &out->y);
1408 FelemReduce(&negY, &negY);
1409 FelemAssignWithMask(&out->y, &negY, (uint128_t)0 - sign);
1410 }
1411
1412 /*
1413 * Calculate k1 * G + k2 * P
1414 * Input:
1415 * k1 < n
1416 * k2 < n
1417 * The coordinates of each point of preMulPt are reduced.
1418 * Output:
1419 * out->x < p, out->y < p, out->z < p
1420 */
PtMul(Point * out,const Felem * k1,const Felem * k2,const Point preMulPt[TABLE_P_SIZE])1421 static void PtMul(Point *out, const Felem *k1, const Felem *k2, const Point preMulPt[TABLE_P_SIZE])
1422 {
1423 Point ptQ = {}; // ptQ stores the result
1424 Point ptPre = {}; // ptPre stores the points obtained from the table.
1425 bool isGMul = k1 != NULL;
1426 bool isPtMul = k2 != NULL && preMulPt != NULL;
1427 int32_t curBit;
1428
1429 // Initialize the Q point.
1430 if (isPtMul) {
1431 curBit = 255; // Start from 255th bit.
1432 // Select the initial point from bit coding (_, _, _, _, 255, 254) of k2
1433 GetPrecomputePtOfP(&ptQ, k2, curBit, preMulPt);
1434 } else if (isGMul) {
1435 curBit = 31; // Start from 31.
1436 // Select the initial point from bit coding (223, 159, 95, 31) of k1
1437 GetLowerPrecomputePtOfG(&ptQ, k1, curBit);
1438 // Select a precomputation point from the (223 + 32, 159 + 32, 95 + 32, 31 + 32) bit of k1
1439 // and add the precomputation point to the point Q
1440 GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
1441 PtAddMixed(&ptQ, &ptQ, &ptPre);
1442 } else {
1443 // k1 and k2 are NULL, output the infinite point.
1444 (void)memset_s((void *)out, sizeof(Point), 0, sizeof(Point));
1445 return;
1446 }
1447
1448 // Operation chain: Q point output range:
1449 // x[] y[] z[]
1450 // Init value < 2^64 < 2^64 < 2^64
1451 // ↓
1452 // double ←↑ < 2^64 < 2^64 < 2^64
1453 // ↓ ↑
1454 // mixed add ↑ < 2^64 < 2^64 < 2^64
1455 // ↓ ↑
1456 // mixed add →↑ < 2^64 < 2^64 < 2^64
1457 // ↓ ↑
1458 // Y negation & reduction ↑ < 2^64 < 2^64 < 2^64
1459 // ↓ ↑
1460 // add →↑ < 2^64 < 2^64 < 2^64
1461
1462 while (--curBit >= 0) {
1463 // Start to shift right bit by bit. Because the most significant bit is initialized,
1464 // common point multiplication starts from 254th bit and base point multiplication starts from 30th bit.
1465 // Whether G-point multiplication is performed in the current cycle.
1466 // It is calculated once in each cycle starting from bit 31.
1467 bool isStepGMul = curBit <= 31;
1468 // Whether the current cycle is a common point multiplication, calculated once every 5 cycles
1469 bool isStepPtMul = curBit % WINDOW_SIZE == 0;
1470
1471 PtDouble(&ptQ, &ptQ);
1472
1473 // Generator G-point multiplication part.
1474 // Divide k1 into 8 segments, from high bits to low bits, select bits from each segment
1475 // and combine them together, then read the pre-computation table.
1476 // Specially, to shrink the precomputation table, the divided 8 segments are combined
1477 // according to the upper half and the lower half.
1478 if (isGMul && isStepGMul) {
1479 // Add the point multiplication result of the current bit of the eight-segment packet to the point Q
1480 GetLowerPrecomputePtOfG(&ptPre, k1, curBit);
1481 PtAddMixed(&ptQ, &ptQ, &ptPre);
1482
1483 GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
1484 PtAddMixed(&ptQ, &ptQ, &ptPre);
1485 }
1486
1487 // Common point multiplication part.
1488 // Use the sliding window signed encoding method
1489 // to group the most significant bits to the least significant bits every five bits.
1490 // Each group of numbers is regarded as a signed number. 00000 to 01111 are decimal numbers 0 to 15,
1491 // and 10000 to 11111 are decimal numbers (32-16) to (32-1).
1492 // This is equivalent to the set of numbers in the complement form after carry 1.
1493 // for example:
1494 // 11011(complement) = 100000 - 00101
1495 if (isPtMul && isStepPtMul) {
1496 // Add the point multiplication result of the current group to point Q.
1497 GetPrecomputePtOfP(&ptPre, k2, curBit, preMulPt);
1498 PtAdd(&ptQ, &ptQ, &ptPre);
1499 }
1500 }
1501
1502 // Output the modulo operation.
1503 FelemContract(&ptQ.x, &ptQ.x);
1504 FelemContract(&ptQ.y, &ptQ.y);
1505 FelemContract(&ptQ.z, &ptQ.z);
1506
1507 PtAssign(out, &ptQ);
1508 }
1509
1510 /*
1511 * Convert Jacobian coordinates to affine coordinates by a given module inverse
1512 * Input:
1513 * in->x[] < 2^64, in->y[] < 2^64
1514 * zInv < 2^64
1515 * Output:
1516 * out->x < p, out->y < p, out->z = 1
1517 */
PtMakeAffineWithInv(Point * out,const Point * in,const Felem * zInv)1518 static void PtMakeAffineWithInv(Point *out, const Point *in, const Felem *zInv)
1519 {
1520 Felem tmp;
1521
1522 // 1/Z^2
1523 FelemSqrReduceToBase(&tmp, zInv);
1524 // X/Z^2
1525 FelemMulReduceToBase(&out->x, &in->x, &tmp);
1526 FelemContract(&out->x, &out->x);
1527
1528 // 1/Z^3
1529 FelemMulReduceToBase(&tmp, &tmp, zInv);
1530 // Y/Z^3
1531 FelemMulReduceToBase(&out->y, &in->y, &tmp);
1532 FelemContract(&out->y, &out->y);
1533
1534 FelemSetLimb(&out->z, 1);
1535 }
1536
1537 /* --------------------------other functions-------------------------- */
1538 /*
1539 * Obtain the pre-multiplication table of the input point pt, including 0pt-16pt. All points are reduced.
1540 */
GetPreMulPt(Point preMulPt[TABLE_P_SIZE],const ECC_Point * pt)1541 static int32_t GetPreMulPt(Point preMulPt[TABLE_P_SIZE], const ECC_Point *pt)
1542 {
1543 int32_t ret;
1544
1545 // 0pt
1546 (void)memset_s((void *)&preMulPt[0], sizeof(Point), 0, sizeof(Point));
1547 // 1pt
1548 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].x, pt->x), ret);
1549 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].y, pt->y), ret);
1550 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].z, pt->z), ret);
1551 // 2pt ~ 15pt
1552 for (uint32_t i = 2; i < 15; i += 2) {
1553 PtDouble(&preMulPt[i], &preMulPt[i >> 1]);
1554 PtAdd(&preMulPt[i + 1], &preMulPt[i], &preMulPt[1]);
1555 }
1556 // 16pt
1557 PtDouble(&preMulPt[16], &preMulPt[16 >> 1]);
1558 ERR:
1559 return ret;
1560 }
1561
ECP256_PointMulAdd(ECC_Para * para,ECC_Point * r,const BN_BigNum * k1,const BN_BigNum * k2,const ECC_Point * pt)1562 int32_t ECP256_PointMulAdd(
1563 ECC_Para *para, ECC_Point *r, const BN_BigNum *k1, const BN_BigNum *k2, const ECC_Point *pt)
1564 {
1565 int32_t ret;
1566 Felem felemK1;
1567 Felem felemK2;
1568 Point preMulPt[TABLE_P_SIZE];
1569 Point out;
1570
1571 // Check the input parameters.
1572 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP256), ret);
1573 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP256), ret);
1574 GOTO_ERR_IF(CheckBnValid(k1, FELEM_BITS), ret);
1575 GOTO_ERR_IF(CheckBnValid(k2, FELEM_BITS), ret);
1576 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP256), ret);
1577 // Special treatment of infinity points
1578 if (BN_IsZero(pt->z)) {
1579 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1580 return CRYPT_ECC_POINT_AT_INFINITY;
1581 }
1582
1583 GOTO_ERR_IF_EX(BN2Felem(&felemK1, k1), ret);
1584 GOTO_ERR_IF_EX(BN2Felem(&felemK2, k2), ret);
1585 GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), ret);
1586
1587 PtMul(&out, &felemK1, &felemK2, preMulPt);
1588
1589 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), ret);
1590 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), ret);
1591 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), ret);
1592 ERR:
1593 return ret;
1594 }
1595
ECP256_PointMul(ECC_Para * para,ECC_Point * r,const BN_BigNum * k,const ECC_Point * pt)1596 int32_t ECP256_PointMul(ECC_Para *para, ECC_Point *r, const BN_BigNum *k, const ECC_Point *pt)
1597 {
1598 int32_t ret;
1599 Felem felemK;
1600 Point preMulPt[TABLE_P_SIZE];
1601 Point out;
1602
1603 // Check the input parameters.
1604 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP256), ret);
1605 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP256), ret);
1606 GOTO_ERR_IF(CheckBnValid(k, FELEM_BITS), ret);
1607 if (pt != NULL) {
1608 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP256), ret);
1609 // Special treatment of infinity points
1610 if (BN_IsZero(pt->z)) {
1611 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1612 return CRYPT_ECC_POINT_AT_INFINITY;
1613 }
1614 }
1615
1616 GOTO_ERR_IF_EX(BN2Felem(&felemK, k), ret);
1617 if (pt != NULL) {
1618 GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), ret);
1619 PtMul(&out, NULL, &felemK, preMulPt);
1620 } else {
1621 PtMul(&out, &felemK, NULL, NULL);
1622 }
1623
1624 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), ret);
1625 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), ret);
1626 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), ret);
1627 ERR:
1628 return ret;
1629 }
1630
ECP256_Point2Affine(const ECC_Para * para,ECC_Point * r,const ECC_Point * pt)1631 int32_t ECP256_Point2Affine(const ECC_Para *para, ECC_Point *r, const ECC_Point *pt)
1632 {
1633 int32_t ret;
1634 Point out;
1635 Felem zInv;
1636
1637 // Check the input parameters.
1638 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP256), ret);
1639 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP256), ret);
1640 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP256), ret);
1641 // Special treatment of infinity points
1642 if (BN_IsZero(pt->z)) {
1643 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1644 return CRYPT_ECC_POINT_AT_INFINITY;
1645 }
1646
1647 GOTO_ERR_IF_EX(BN2Felem(&out.x, pt->x), ret);
1648 GOTO_ERR_IF_EX(BN2Felem(&out.y, pt->y), ret);
1649 GOTO_ERR_IF_EX(BN2Felem(&out.z, pt->z), ret);
1650
1651 FelemInv(&zInv, &out.z);
1652 PtMakeAffineWithInv(&out, &out, &zInv);
1653
1654 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), ret);
1655 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), ret);
1656 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), ret);
1657 ERR:
1658 return ret;
1659 }
1660
ECP256_ModOrderInv(const ECC_Para * para,BN_BigNum * r,const BN_BigNum * a)1661 int32_t ECP256_ModOrderInv(const ECC_Para *para, BN_BigNum *r, const BN_BigNum *a)
1662 {
1663 if (para == NULL || r == NULL || a == NULL) {
1664 BSL_ERR_PUSH_ERROR(CRYPT_NULL_INPUT);
1665 return CRYPT_NULL_INPUT;
1666 }
1667
1668 if (para->id != CRYPT_ECC_NISTP256) {
1669 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_ERR_CURVE_ID);
1670 return CRYPT_ECC_POINT_ERR_CURVE_ID;
1671 }
1672
1673 if (BN_IsZero(a)) {
1674 BSL_ERR_PUSH_ERROR(CRYPT_ECC_INVERSE_INPUT_ZERO);
1675 return CRYPT_ECC_INVERSE_INPUT_ZERO;
1676 }
1677 return ECP_ModOrderInv(para, r, a);
1678 }
1679
1680 #endif /* defined(HITLS_CRYPTO_CURVE_NISTP256) && defined(HITLS_CRYPTO_NIST_USE_ACCEL) */
1681