1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16 #include "hitls_build.h"
17 #if defined(HITLS_CRYPTO_CURVE_NISTP224) && defined(HITLS_CRYPTO_NIST_USE_ACCEL)
18
19 #include <stdbool.h>
20 #include "securec.h"
21 #include "bsl_err_internal.h"
22 #include "crypt_errno.h"
23 #include "crypt_utils.h"
24 #include "crypt_bn.h"
25 #include "crypt_ecc.h"
26 #include "ecc_local.h"
27 #include "ecc_utils.h"
28 #include "bsl_util_internal.h"
29
30 #ifndef __SIZEOF_INT128__
31 #error "This nistp224 implementation require the compiler support 128-bits integer."
32 #endif
33
34 /* field element definition */
35 #define FELEM_BITS 224
36 #define FELEM_BYTES 28
37 #define LIMB_BITS (sizeof(uint64_t) << 3)
38 #define LIMB_NUM 4
39 #define BASE_BITS 56
40 #define BASE_MASK 0x00ffffffffffffff
41 /* The pre-calculation table of the G table has 16 points. */
42 #define TABLE_G_SIZE 16
43 /* The pre-calculation table of the P table has 17 points. */
44 #define TABLE_P_SIZE 17
45
46 /*
47 * field elements, stored as arrays, all represented in little endian.
48 * Each element of the array is called a digit, and each digit represents an extended 2^56-number-system digit,
49 * that is, Felem can be expressed as:
50 * f_0 + f_1 * 2^56 + f_2 * 2^112 + f_3 * 2^168
51 * LongFelem is the same, but twice the width of Felem.
52 * It is used to store the result of multiplication of field elements.
53 * Point is a point represented as a Jacobian coordinate
54 */
55 typedef struct {
56 uint64_t data[LIMB_NUM];
57 } Felem;
58
59 typedef struct {
60 uint128_t data[LIMB_NUM * 2 - 1];
61 } LongFelem;
62
63 typedef struct {
64 Felem x;
65 Felem y;
66 Felem z;
67 } Point;
68
69 /* ------------------------------------------------------------ */
70 /* ECP224 field order p, the value is 2^224 - 2^96 + 1, little endian. */
71 static const Felem FIELD_ORDER = {
72 {0x0000000000000001, 0x00ffff0000000000, 0x00ffffffffffffff, 0x00ffffffffffffff}
73 };
74
75 /*
76 * A pre-calculated table of the base point G, which contains the (X, Y, Z) coordinates of k*G
77 *
78 * PRE_MUL_G divides all bits into four equal parts.
79 * index Corresponding bit value of k
80 * 0 0 0 0 0 0 + 0 + 0 + 0
81 * 1 0 0 0 1 0 + 0 + 0 + 1
82 * 2 0 0 1 0 0 + 0 + 2^56 + 0
83 * 3 0 0 1 1 0 + 0 + 2^56 + 1
84 * 4 0 1 0 0 0 + 2^112 + 0 + 0
85 * 5 0 1 0 1 0 + 2^112 + 0 + 1
86 * 6 0 1 1 0 0 + 2^112 + 2^56 + 0
87 * 7 0 1 1 1 0 + 2^112 + 2^56 + 1
88 * 8 1 0 0 0 2^168 + 0 + 0 + 0
89 * 9 1 0 0 1 2^168 + 0 + 0 + 1
90 * 10 1 0 1 0 2^168 + 0 + 2^56 + 0
91 * 11 1 0 1 1 2^168 + 0 + 2^56 + 1
92 * 12 1 1 0 0 2^168 + 2^112 + 0 + 0
93 * 13 1 1 0 1 2^168 + 2^112 + 0 + 1
94 * 14 1 1 1 0 2^168 + 2^112 + 2^56 + 0
95 * 15 1 1 1 1 2^168 + 2^112 + 2^56 + 1
96 */
97 static const Point PRE_MUL_G[TABLE_G_SIZE] = {
98 {
99 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
100 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
101 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
102 }, {
103 {{0x003280d6115c1d21, 0x00c1d356c2112234, 0x007f321390b94a03, 0x00b70e0cbd6bb4bf}},
104 {{0x00d5819985007e34, 0x0075a05a07476444, 0x00fb4c22dfe6cd43, 0x00bd376388b5f723}},
105 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
106 }, {
107 {{0x00fd9675666ebbe9, 0x00bca7664d40ce5e, 0x002242df8d8a2a43, 0x001f49bbb0f99bc5}},
108 {{0x0029e0b892dc9c43, 0x00ece8608436e662, 0x00dc858f185310d0, 0x009812dd4eb8d321}},
109 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
110 }, {
111 {{0x006d3e678d5d8eb8, 0x00559eed1cb362f1, 0x0016e9a3bbce8a3f, 0x00eedcccd8c2a748}},
112 {{0x00f19f90ed50266d, 0x00abf2b4bf65f9df, 0x00313865468fafec, 0x005cb379ba910a17}},
113 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
114 }, {
115 {{0x000641966cab26e3, 0x0091fb2991fab0a0, 0x00efec27a4e13a0b, 0x000499aa8a5f8ebe}},
116 {{0x007510407766af5d, 0x0084d929610d5450, 0x0081d77aae82f706, 0x006916f6d4338c5b}},
117 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
118 }, {
119 {{0x00ea95ac3b1f15c6, 0x00086000905e82d4, 0x00dd323ae4d1c8b1, 0x00932b56be7685a3}},
120 {{0x009ef93dea25dbbf, 0x0041665960f390f0, 0x00fdec76dbe2a8a7, 0x00523e80f019062a}},
121 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
122 }, {
123 {{0x00822fdd26732c73, 0x00a01c83531b5d0f, 0x00363f37347c1ba4, 0x00c391b45c84725c}},
124 {{0x00bbd5e1b2d6ad24, 0x00ddfbcde19dfaec, 0x00c393da7e222a7f, 0x001efb7890ede244}},
125 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
126 }, {
127 {{0x004c9e90ca217da1, 0x00d11beca79159bb, 0x00ff8d33c2c98b7c, 0x002610b39409f849}},
128 {{0x0044d1352ac64da0, 0x00cdbb7b2c46b4fb, 0x00966c079b753c89, 0x00fe67e4e820b112}},
129 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
130 }, {
131 {{0x00e28cae2df5312d, 0x00c71b61d16f5c6e, 0x0079b7619a3e7c4c, 0x0005c73240899b47}},
132 {{0x009f7f6382c73e3a, 0x0018615165c56bda, 0x00641fab2116fd56, 0x0072855882b08394}},
133 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
134 }, {
135 {{0x000469182f161c09, 0x0074a98ca8d00fb5, 0x00b89da93489a3e0, 0x0041c98768fb0c1d}},
136 {{0x00e5ea05fb32da81, 0x003dce9ffbca6855, 0x001cfe2d3fbf59e6, 0x000e5e03408738a7}},
137 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
138 }, {
139 {{0x00dab22b2333e87f, 0x004430137a5dd2f6, 0x00e03ab9f738beb8, 0x00cb0c5d0dc34f24}},
140 {{0x00764a7df0c8fda5, 0x00185ba5c3fa2044, 0x009281d688bcbe50, 0x00c40331df893881}},
141 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
142 }, {
143 {{0x00b89530796f0f60, 0x00ade92bd26909a3, 0x001a0c83fb4884da, 0x001765bf22a5a984}},
144 {{0x00772a9ee75db09e, 0x0023bc6c67cec16f, 0x004c1edba8b14e2f, 0x00e2a215d9611369}},
145 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
146 }, {
147 {{0x00571e509fb5efb3, 0x00ade88696410552, 0x00c8ae85fada74fe, 0x006c7e4be83bbde3}},
148 {{0x00ff9f51160f4652, 0x00b47ce2495a6539, 0x00a2946c53b582f4, 0x00286d2db3ee9a60}},
149 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
150 }, {
151 {{0x0040bbd5081a44af, 0x000995183b13926c, 0x00bcefba6f47f6d0, 0x00215619e9cc0057}},
152 {{0x008bc94d3b0df45e, 0x00f11c54a3694f6f, 0x008631b93cdfe8b5, 0x00e7e3f4b0982db9}},
153 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
154 }, {
155 {{0x00b17048ab3e1c7b, 0x00ac38f36ff8a1d8, 0x001c29819435d2c6, 0x00c813132f4c07e9}},
156 {{0x002891425503b11f, 0x0008781030579fea, 0x00f5426ba5cc9674, 0x001e28ebf18562bc}},
157 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
158 }, {
159 {{0x009f31997cc864eb, 0x0006cd91d28b5e4c, 0x00ff17036691a973, 0x00f1aef351497c58}},
160 {{0x00dd1f2d600564ff, 0x00dead073b1402db, 0x0074a684435bd693, 0x00eea7471f962558}},
161 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
162 }
163 };
164
165 /*
166 * A pre-calculated table of the base point G, which contains the (X, Y, Z) coordinates of k*G
167 *
168 * PRE_MUL_G2[] = PRE_MUL_G[] * 2^28
169 * index Corresponding bit value of k
170 * 0 0 0 0 0 0 + 0 + 0 + 0
171 * 1 0 0 0 1 0 + 0 + 0 + 2^28
172 * 2 0 0 1 0 0 + 0 + 2^84 + 0
173 * 3 0 0 1 1 0 + 0 + 2^84 + 2^28
174 * 4 0 1 0 0 0 + 2^140 + 0 + 0
175 * 5 0 1 0 1 0 + 2^140 + 0 + 2^28
176 * 6 0 1 1 0 0 + 2^140 + 2^84 + 0
177 * 7 0 1 1 1 0 + 2^140 + 2^84 + 2^28
178 * 8 1 0 0 0 2^196 + 0 + 0 + 0
179 * 9 1 0 0 1 2^196 + 0 + 0 + 2^28
180 * 10 1 0 1 0 2^196 + 0 + 2^84 + 0
181 * 11 1 0 1 1 2^196 + 0 + 2^84 + 2^28
182 * 12 1 1 0 0 2^196 + 2^140 + 0 + 0
183 * 13 1 1 0 1 2^196 + 2^140 + 0 + 2^28
184 * 14 1 1 1 0 2^196 + 2^140 + 2^84 + 0
185 * 15 1 1 1 1 2^196 + 2^140 + 2^84 + 2^28
186 */
187 static const Point PRE_MUL_G2[TABLE_G_SIZE] = {
188 {
189 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
190 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}},
191 {{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
192 }, {
193 {{0x009665266dddf554, 0x009613d78b60ef2d, 0x00ce27a34cdba417, 0x00d35ab74d6afc31}},
194 {{0x0085ccdd22deb15e, 0x002137e5783a6aab, 0x00a141cffd8c93c6, 0x00355a1830e90f2d}},
195 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
196 }, {
197 {{0x001a494eadaade65, 0x00d6da4da77fe53c, 0x00e7992996abec86, 0x0065c3553c6090e3}},
198 {{0x00fa610b1fb09346, 0x00f1c6540b8a4aaf, 0x00c51a13ccd3cbab, 0x0002995b1b18c28a}},
199 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
200 }, {
201 {{0x007874568e7295ef, 0x0086b419fbe38d04, 0x00dc0690a7550d9a, 0x00d3966a44beac33}},
202 {{0x002b7280ec29132f, 0x00beaa3b6a032df3, 0x00dc7dd88ae41200, 0x00d25e2513e3a100}},
203 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
204 }, {
205 {{0x00924857eb2efafd, 0x00ac2bce41223190, 0x008edaa1445553fc, 0x00825800fd3562d5}},
206 {{0x008d79148ea96621, 0x0023a01c3dd9ed8d, 0x00af8b219f9416b5, 0x00d8db0cc277daea}},
207 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
208 }, {
209 {{0x0076a9c3b1a700f0, 0x00e9acd29bc7e691, 0x0069212d1a6b0327, 0x006322e97fe154be}},
210 {{0x00469fc5465d62aa, 0x008d41ed18883b05, 0x001f8eae66c52b88, 0x00e4fcbe9325be51}},
211 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
212 }, {
213 {{0x00825fdf583cac16, 0x00020b857c7b023a, 0x00683c17744b0165, 0x0014ffd0a2daf2f1}},
214 {{0x00323b36184218f9, 0x004944ec4e3b47d4, 0x00c15b3080841acf, 0x000bced4b01a28bb}},
215 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
216 }, {
217 {{0x0092ac22230df5c4, 0x0052f33b4063eda8, 0x00cb3f19870c0c93, 0x0040064f2ba65233}},
218 {{0x00fe16f0924f8992, 0x00012da25af5b517, 0x001a57bb24f723a6, 0x0006f8bc76760def}},
219 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
220 }, {
221 {{0x004a7084f7817cb9, 0x00bcab0738ee9a78, 0x003ec11e11d9c326, 0x00dc0fe90e0f1aae}},
222 {{0x00cf639ea5f98390, 0x005c350aa22ffb74, 0x009afae98a4047b7, 0x00956ec2d617fc45}},
223 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
224 }, {
225 {{0x004306d648c1be6a, 0x009247cd8bc9a462, 0x00f5595e377d2f2e, 0x00bd1c3caff1a52e}},
226 {{0x00045e14472409d0, 0x0029f3e17078f773, 0x00745a602b2d4f7d, 0x00191837685cdfbb}},
227 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
228 }, {
229 {{0x005b6ee254a8cb79, 0x004953433f5e7026, 0x00e21faeb1d1def4, 0x00c4c225785c09de}},
230 {{0x00307ce7bba1e518, 0x0031b125b1036db8, 0x0047e91868839e8f, 0x00c765866e33b9f3}},
231 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
232 }, {
233 {{0x003bfece24f96906, 0x004794da641e5093, 0x00de5df64f95db26, 0x00297ecd89714b05}},
234 {{0x00701bd3ebb2c3aa, 0x007073b4f53cb1d5, 0x0013c5665658af16, 0x009895089d66fe58}},
235 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
236 }, {
237 {{0x000fef05f78c4790, 0x002d773633b05d2e, 0x0094229c3a951c94, 0x00bbbd70df4911bb}},
238 {{0x00b2c6963d2c1168, 0x00105f47a72b0d73, 0x009fdf6111614080, 0x007b7e94b39e67b0}},
239 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
240 }, {
241 {{0x00ad1a7d6efbe2b3, 0x00f012482c0da69d, 0x006b3bdf12438345, 0x0040d7558d7aa4d9}},
242 {{0x008a09fffb5c6d3d, 0x009a356e5d9ffd38, 0x005973f15f4f9b1c, 0x00dcd5f59f63c3ea}},
243 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
244 }, {
245 {{0x00acf39f4c5ca7ab, 0x004c8071cc5fd737, 0x00c64e3602cd1184, 0x000acd4644c9abba}},
246 {{0x006c011a36d8bf6e, 0x00fecd87ba24e32a, 0x0019f6f56574fad8, 0x00050b204ced9405}},
247 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
248 }, {
249 {{0x00ed4f1cae7d9a96, 0x005ceef7ad94c40a, 0x00778e4a3bf3ef9b, 0x007405783dc3b55e}},
250 {{0x0032477c61b6e8c6, 0x00b46a97570f018b, 0x0091176d0a7e95d1, 0x003df90fbc4c7d0e}},
251 {{0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}
252 }
253 };
254
255 /* --------------------------helper function-------------------------- */
256 /*
257 * Convert big endian byte stream to Felem
258 */
Bin2Felem(Felem * out,const uint8_t in[FELEM_BYTES])259 static void Bin2Felem(Felem *out, const uint8_t in[FELEM_BYTES])
260 {
261 int32_t offset;
262 for (int32_t i = LIMB_NUM - 1; i >= 0; --i) {
263 offset = 7 * (LIMB_NUM - 1 - i); // 56bits occupy 7 bytes.
264 out->data[i] = ((uint64_t)in[offset + 0] << 48) | // Byte 0 is shifted by 48 bits to the left.
265 ((uint64_t)in[offset + 1] << 40) | // The 1st byte is shifted leftward by 40 bits.
266 ((uint64_t)in[offset + 2] << 32) | // The 2nd byte is shifted leftward by 32 bits.
267 ((uint64_t)in[offset + 3] << 24) | // The 3rd byte is shifted leftward by 24 bits.
268 ((uint64_t)in[offset + 4] << 16) | // The 4th byte is shifted leftward by 16 bits.
269 ((uint64_t)in[offset + 5] << 8) | // The 5th byte is shifted leftward by 8 bits.
270 ((uint64_t)in[offset + 6]); // No shift is required for the 6th byte.
271 }
272 }
273
274 /*
275 * Convert Felem to big-endian byte stream
276 * Input:
277 * in[] < 2^56
278 * Output:
279 * the length of out is 28
280 */
Felem2Bin(uint8_t out[FELEM_BYTES],const Felem * in)281 static void Felem2Bin(uint8_t out[FELEM_BYTES], const Felem *in)
282 {
283 int32_t offset;
284 for (int32_t i = LIMB_NUM - 1; i >= 0; --i) {
285 offset = 7 * (LIMB_NUM - 1 - i); // 56bits occupy 7 bytes.
286 out[offset + 0] = (uint8_t)(in->data[i] >> 48); // out[i + 0] get 48~55 bits
287 out[offset + 1] = (uint8_t)(in->data[i] >> 40); // out[i + 1] get 40~47 bits
288 out[offset + 2] = (uint8_t)(in->data[i] >> 32); // out[i + 2] get 32~39 bits
289 out[offset + 3] = (uint8_t)(in->data[i] >> 24); // out[i + 3] get 24~31 bits
290 out[offset + 4] = (uint8_t)(in->data[i] >> 16); // out[i + 4] get 16~23 bits
291 out[offset + 5] = (uint8_t)(in->data[i] >> 8); // out[i + 5] get 8~15 bits
292 out[offset + 6] = (uint8_t)in->data[i]; // out[i + 6] get 0~7 bits
293 }
294 }
295
296 /*
297 * Convert BN to Felem
298 * Output:
299 * out[] < 2^56
300 */
BN2Felem(Felem * out,const BN_BigNum * in)301 static int32_t BN2Felem(Felem *out, const BN_BigNum *in)
302 {
303 int32_t retVal;
304 uint8_t bin[FELEM_BYTES];
305 uint32_t len = FELEM_BYTES;
306
307 GOTO_ERR_IF(BN_Bn2Bin(in, bin, &len), retVal);
308
309 for (uint32_t i = 0; i < FELEM_BYTES; ++i) {
310 bin[FELEM_BYTES - 1 - i] = i < len ? bin[len - 1 - i] : 0;
311 }
312
313 Bin2Felem(out, bin);
314 ERR:
315 return retVal;
316 }
317
318 /*
319 * Convert Felem to BN
320 * Input:
321 * in[] < 2^56
322 */
Felem2BN(BN_BigNum * out,const Felem * in)323 static int32_t Felem2BN(BN_BigNum *out, const Felem *in)
324 {
325 int32_t retVal;
326 uint8_t bin[FELEM_BYTES];
327
328 Felem2Bin(bin, in);
329
330 GOTO_ERR_IF(BN_Bin2Bn(out, bin, FELEM_BYTES), retVal);
331 ERR:
332 return retVal;
333 }
334
335 /* ---------------------------field operation--------------------------- */
336 /*
337 * Assignment
338 */
FelemAssign(Felem * out,const Felem * in)339 static inline void FelemAssign(Felem *out, const Felem *in)
340 {
341 out->data[0] = in->data[0]; // out->data[0] takes the value
342 out->data[1] = in->data[1]; // out->data[1] takes the value
343 out->data[2] = in->data[2]; // out->data[2] takes the value
344 out->data[3] = in->data[3]; // out->data[3] takes the value
345 }
346
347 /*
348 * Copy bits by mask. If the corresponding bit is 1, copy the bit. If the corresponding bit is 0, retain the bit.
349 */
FelemAssignWithMask(Felem * out,const Felem * in,const uint64_t mask)350 static inline void FelemAssignWithMask(Felem *out, const Felem *in, const uint64_t mask)
351 {
352 uint64_t rmask = ~mask;
353 out->data[0] = (in->data[0] & mask) | (out->data[0] & rmask); // out->data[0] get a new value or remain unchanged.
354 out->data[1] = (in->data[1] & mask) | (out->data[1] & rmask); // out->data[1] get a new value or remain unchanged.
355 out->data[2] = (in->data[2] & mask) | (out->data[2] & rmask); // out->data[2] get a new value or remain unchanged.
356 out->data[3] = (in->data[3] & mask) | (out->data[3] & rmask); // out->data[3] get a new value or remain unchanged.
357 }
358
359 /*
360 * Set the lowest digit
361 */
FelemSetLimb(Felem * out,const uint64_t in)362 static inline void FelemSetLimb(Felem *out, const uint64_t in)
363 {
364 out->data[0] = in; // out->data[0] takes the value
365 out->data[1] = 0; // out->data[1] clear to 0
366 out->data[2] = 0; // out->data[2] clear to 0
367 out->data[3] = 0; // out->data[3] clear to 0
368 }
369
370 /*
371 * Zero judgment: is input less than(2^224 + 2^(16 + 192)). Only 0 and p need to be judged.
372 * Input:
373 * in[] < 2^56 + 2^16
374 * - in[0] < 2^56
375 * - in[1] < 2^56
376 * - in[2] < 2^56
377 * - in[3] < 2^56 + 2^16
378 */
FelemIsZero(const Felem * in)379 static inline uint64_t FelemIsZero(const Felem *in)
380 {
381 uint64_t isZero, isP;
382
383 // Check whether digits 0, 1, 2, and 3 of in are all 0.
384 isZero = in->data[0] | in->data[1] | in->data[2] | in->data[3];
385 isZero -= 1; // If in == 0, the most significant bit is 1.
386
387 // Determine that in is equal to the field order.
388 isP = (in->data[0] ^ FIELD_ORDER.data[0]) | // Determines whether the digits 0 are equal.
389 (in->data[1] ^ FIELD_ORDER.data[1]) | // Determines whether the digits 1 are equal.
390 (in->data[2] ^ FIELD_ORDER.data[2]) | // Determines whether the digits 2 are equal.
391 (in->data[3] ^ FIELD_ORDER.data[3]); // Determines whether the digits 3 are equal.
392 isP -= 1; // If in == p, the most significant bit is 1
393
394 return (isZero | isP) >> (LIMB_BITS - 1);
395 }
396
397 /*
398 * Obtain the bit string whose length is len at idx in Felem.
399 * Input:
400 * in[i] < 2^56
401 * 0 < len <= 64
402 */
FelemGetBits(const Felem * in,int32_t idx,uint32_t len)403 static uint64_t FelemGetBits(const Felem *in, int32_t idx, uint32_t len)
404 {
405 uint64_t ret;
406 uint32_t lower, upper;
407 uint64_t mask;
408
409 lower = (uint32_t)idx;
410 // when 0 <= lower < 224, the most significant bit is 1. Obtain the most significant bit by right shifted by 31 bits
411 mask = (uint64_t)0 - ((~lower & (lower - 224)) >> 31);
412 ret = (in->data[(lower / BASE_BITS) & mask] & BASE_MASK & mask) >> (lower % BASE_BITS);
413
414 upper = (uint32_t)idx + BASE_BITS; // Next Unary Block
415 // when 0 <= upper < 224, the most significant bit is 1. Obtain the most significant bit by right shifted by 31 bits
416 mask = (uint64_t)0 - ((~upper & (upper - 224)) >> 31);
417 ret |= (in->data[(upper / BASE_BITS) & mask] & BASE_MASK & mask) << (BASE_BITS - upper % BASE_BITS);
418
419 ret &= ((uint64_t)1 << len) - 1; // All 1s of the len length
420
421 return ret;
422 }
423
424 /*
425 * field element reduction, retain the lower 56 bits of each Limb in the 4-ary Felem,
426 * perform reduction on the upper bits, and perform modulo operation to reduce all Limbs to [0, 2 ^ 56 + 2 ^ 7).
427 * Input:
428 * in[] < 2^63
429 * Output:
430 * out[] < 2^56 + 2^7
431 * - out[0] < 2^56
432 * - out[1] < 2^56
433 * - out[2] < 2^56
434 * - out[3] < 2^56 + 2^7
435 */
FelemReduce(Felem * out,const Felem * in)436 static void FelemReduce(Felem *out, const Felem *in)
437 {
438 uint64_t carryLimb = in->data[3] >> BASE_BITS;
439 const uint64_t borrow = (uint64_t)1 << 56;
440 const uint64_t lend = 1;
441
442 // reduction carryLimb * (2^96 - 1), because it's the non-negative number, it must can be borrowed.
443 // Calculate out->data[0]
444 out->data[0] = in->data[0] + borrow - carryLimb; // carryLimb * (-1)
445 // Calculate out->data[1]
446 out->data[1] = in->data[1] + (out->data[0] >> BASE_BITS) + (carryLimb << 40) - lend; // carryLimb * 2^(56 + 40)
447 // Calculate out->data[2]
448 out->data[2] = in->data[2] + (out->data[1] >> BASE_BITS);
449 // Calculate out->data[3]
450 out->data[3] = (in->data[3] & BASE_MASK) + (out->data[2] >> BASE_BITS); // < 2^56 + 2^7
451
452 out->data[0] &= BASE_MASK; // out->data[0] Take the lower bits.
453 out->data[1] &= BASE_MASK; // out->data[1] Take the lower bits.
454 out->data[2] &= BASE_MASK; // out->data[2] Take the lower bits.
455 }
456
457 /*
458 * field element reduction, convert 7-ary LongFelem to 4-ary Felem,
459 * and modulo reduction to all Limbs to [0, 2 ^ 56 + 2 ^ 16)
460 * Input:
461 * in[] < 2^126
462 * Output:
463 * out[] < 2^56 + 2^16
464 * - out[0] < 2^56
465 * - out[1] < 2^56
466 * - out[2] < 2^56
467 * - out[3] < 2^56 + 2^16
468 */
LongFelemReduce(Felem * out,const LongFelem * in)469 static void LongFelemReduce(Felem *out, const LongFelem *in)
470 {
471 const uint128_t *pi = in->data;
472 // p shifts left by 15
473 static const LongFelem zeroBase = {
474 {
475 (((uint128_t)1 << 112) - ((uint128_t)1 << 96) + 1) << 15, // 2^127 - 2^111 + 2^15
476 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 15, // 2^127 - 2^71
477 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 15, // 2^127 - 2^71
478 0,
479 0,
480 0,
481 0,
482 }
483 };
484
485 uint128_t lout[4] = {
486 zeroBase.data[0] + pi[0], // < 2^127 + 2^126 - 2^111 + 2^15
487 zeroBase.data[1] + pi[1], // < 2^127 + 2^126 - 2^71
488 zeroBase.data[2] + pi[2], // < 2^127 + 2^126 - 2^71
489 pi[3] // < 2^126
490 };
491 uint128_t carryLimb = pi[4]; // < 2^126
492
493 // n = n / a * (a - b) ≡ n / a * b (mod (a - b))
494 // It can be obtained from the above formula:
495 // 2^n = 2^(n - 128) - 2^(n - 224) (mod (2^224 - 2^96 + 1))
496 //
497 // The following equation can be listed:
498 // 2^224 mod p
499 // = 2^96 - 1
500 //
501 // 2^280 mod p
502 // = 2^152 - 2^56
503 //
504 // 2^336 mod p
505 // = 2^208 - 2^112
506
507 // reduce pi[6] and obtain the part higher 16 bits.
508 carryLimb += pi[6] >> 16;
509 // lout[3], reduce pi[6], shift leftwards by 40 bits then truncate; reduce pi[5] and obtain the part higher 16 bits.
510 lout[3] += ((pi[6] << 40) & BASE_MASK) + (pi[5] >> 16);
511 // lout[2], reduce pi[5], leftshift by 40bit then truncate; reduce carryLimb, obtain the higher 16bits. reduce pi[6]
512 lout[2] += ((pi[5] << 40) & BASE_MASK) + (carryLimb >> 16) - pi[6];
513 // lout[1], reduce carryLimb, shift leftwards by 40 bits then truncate; reduce pi[5]
514 lout[1] += ((carryLimb << 40) & BASE_MASK) - pi[5];
515 // lout[0], reduce carryLimb
516 lout[0] -= carryLimb;
517
518 // Range after reduction:
519 // carryLimb < 2^126 + 2^110
520 // lout[3] < 2^126 + 2^56 + 2^110 < 2^127
521 // lout[2] < (2^127 + 2^126 - 2^71) + 2^56 + (2^110 + 2^94) < 2^128
522 // lout[1] < (2^127 + 2^126 - 2^71) + 2^56
523 // lout[0] < 2^127 + 2^126 - 2^111 + 2^15 < 2^128
524
525 // carry
526 lout[3] += lout[2] >> BASE_BITS; // lout[3] < 2^127 + 2^72 < 2^128
527 carryLimb = lout[3] >> BASE_BITS; // carryLimb < 2^72
528
529 lout[2] &= BASE_MASK; // lout[2] < 2^56
530 lout[3] &= BASE_MASK; // lout[3] < 2^56
531
532 // reduce carryLimb
533 // lout[2],reduce carryLimb and obtain the part higher 16 bits.
534 lout[2] += carryLimb >> 16;
535 // lout[1],reduce carryLimb, shift leftwards by 40 bits then truncate;
536 lout[1] += (carryLimb << 40) & BASE_MASK;
537 // lout[0],reduce carryLimb
538 lout[0] -= carryLimb;
539
540 // Range after reduction:
541 // lout[2] < 2^56 + 2^56
542 // lout[1] < (2^127 + 2^126 - 2^71 + 2^56) + 2^56
543 // lout[0] < 2^128
544
545 // carry
546 lout[1] += lout[0] >> BASE_BITS; // lout[1] < (2^127 + 2^126 - 2^71 + 2^57 - 2^41) + 2^72
547 lout[2] += lout[1] >> BASE_BITS; // lout[2] < 2^57 + (2^71 + 2^70 + 2^16 - 2^15 + 1) < 2^72
548
549 out->data[0] = (uint64_t)lout[0] & BASE_MASK; // out->data[0] < 2^56
550 out->data[1] = (uint64_t)lout[1] & BASE_MASK; // out->data[1] < 2^56
551 out->data[2] = (uint64_t)lout[2] & BASE_MASK; // out->data[2] < 2^56
552 out->data[3] = (uint64_t)lout[3] + (uint64_t)(lout[2] >> BASE_BITS); // out->data[3] < 2^56 + 2^16
553 }
554
555 /*
556 * field element modulo in [0, p), call FelemReduce or LongFelemReduce in advance.
557 * Input:
558 * in[] < 2^56 + 2^16
559 * - in[0] < 2^56
560 * - in[1] < 2^56
561 * - in[2] < 2^56
562 * - in[3] < 2^56 + 2^16
563 * Output:
564 * out < p, out[i] < 2^56
565 */
FelemContract(Felem * out,const Felem * in)566 static void FelemContract(Felem *out, const Felem *in)
567 {
568 Felem tmp = {{in->data[0], in->data[1], in->data[2], in->data[3] & BASE_MASK}}; // tmp[] < 2^56
569 uint64_t carryLimb = in->data[3] >> BASE_BITS; // 0 or 1, because of in[3] < 2^56 + 2^16
570 uint64_t mask; // Check the mask greater than or equal to p.
571
572 const uint64_t lower40Mask = 0x000000ffffffffff; // Lower 40-bit mask
573 const uint64_t borrow = (uint64_t)1 << BASE_BITS;
574 const uint64_t lend = 1;
575
576 // Check whether tmp is greater than or equal to p. The upper 128 bits of p are all 1s and the lower 96 bits are 1.
577 // If the upper 128 bits (digit 3, 2, and 1) are all 1s, the value is 1. Otherwise, the value is 0.
578 mask = ((tmp.data[3] & tmp.data[2] & (tmp.data[1] | lower40Mask)) + 1) >> BASE_BITS;
579 // If the lower 96 bits are not zero, the value is 1. Otherwise, the value is 0.
580 mask &= (0 - ((tmp.data[1] & lower40Mask) | tmp.data[0])) >> (LIMB_BITS - 1);
581 mask = 0 - mask; // If tmp is greater than or equal to p, the value is 1. Otherwise, the value is 0.
582
583 // reduce carryLimb or subtract p. Note that carryLimb and mask cannot be true at the same time.
584 // p_0 = 0x0000000000000001
585 tmp.data[0] = (tmp.data[0] ^ borrow) - carryLimb - (mask & FIELD_ORDER.data[0]);
586 // p_1 = 0x00ffff0000000000
587 tmp.data[1] += carryLimb << 40; // reduction carryLimb * 2^(56 + 40)
588 // If the value is less than p, remains unchanged. If it's greater than or equal to p, subtract 0x00ffff0000000000.
589 // That is, the lower 40 bits remain unchanged, and other bits are set to 0.
590 tmp.data[1] &= ~mask | lower40Mask;
591 // Lend to the low bit: if reduce it, then 2^96 - 1 > 0, if minus p, then tmp[1] + tmp[0] > p_1 + p_2
592 tmp.data[1] -= lend;
593
594 // If the value is less than p, remains unchanged. If it's greater than or equal to p, subtract 0x00ffffffffffffff.
595 // That is, set to 0.
596 tmp.data[2] &= ~mask; // p_2 = 0x00ffffffffffffff
597
598 // If the value is less than p, remains unchanged. If it's greater than or equal to p, subtract 0x00ffffffffffffff.
599 // That is, set to 0.
600 tmp.data[3] &= ~mask; // p_3 = 0x00ffffffffffffff
601
602 // carry 0 -> 1 -> 2 -> 3
603 out->data[0] = tmp.data[0] & BASE_MASK; // tmp.data[0] get the lower bits.
604 out->data[1] = (tmp.data[1] += tmp.data[0] >> BASE_BITS) & BASE_MASK; // tmp.data[1] plus carry & get the lower bits
605 out->data[2] = (tmp.data[2] += tmp.data[1] >> BASE_BITS) & BASE_MASK; // tmp.data[2] plus carry & get the lower bits
606 out->data[3] = (tmp.data[3] += tmp.data[2] >> BASE_BITS) & BASE_MASK; // tmp.data[3] plus carry & get the lower bits
607 }
608
609 /*
610 * field Addition
611 */
FelemAdd(Felem * out,const Felem * a,const Felem * b)612 static inline void FelemAdd(Felem *out, const Felem *a, const Felem *b)
613 {
614 out->data[0] = a->data[0] + b->data[0]; // out->data[0] takes the value
615 out->data[1] = a->data[1] + b->data[1]; // out->data[1] takes the value
616 out->data[2] = a->data[2] + b->data[2]; // out->data[2] takes the value
617 out->data[3] = a->data[3] + b->data[3]; // out->data[3] takes the value
618 }
619
620 /*
621 * field element negation(NOT)
622 * Input:
623 * in[] <= 2^57 - 2^41 - 2^1
624 * Output:
625 * out[] <= 2^57 + 2^1 + in[] < 2^64
626 */
FelemNeg(Felem * out,const Felem * in)627 static inline void FelemNeg(Felem *out, const Felem *in)
628 {
629 static const Felem zeroBase = {{
630 (((uint64_t)1 << 56) + 1) << 1,
631 (((uint64_t)1 << 56) - ((uint64_t)1 << 40) - 1) << 1,
632 (((uint64_t)1 << 56) - 1) << 1,
633 (((uint64_t)1 << 56) - 1) << 1,
634 }};
635
636 out->data[0] = zeroBase.data[0] - in->data[0]; // out->data[0] takes the value
637 out->data[1] = zeroBase.data[1] - in->data[1]; // out->data[1] takes the value
638 out->data[2] = zeroBase.data[2] - in->data[2]; // out->data[2] takes the value
639 out->data[3] = zeroBase.data[3] - in->data[3]; // out->data[3] takes the value
640 }
641
642 /*
643 * field subtraction
644 * Input:
645 * a[] < 2^64 - 2^60 - 2^4,b[] <= 2^60 - 2^44 - 2^4
646 * Output:
647 * out[] <= 2^60 + 2^4 + a[] < 2^64
648 */
FelemSub(Felem * out,const Felem * a,const Felem * b)649 static inline void FelemSub(Felem *out, const Felem *a, const Felem *b)
650 {
651 static const Felem zeroBase = {{
652 (((uint64_t)1 << 56) + 1) << 4,
653 (((uint64_t)1 << 56) - ((uint64_t)1 << 40) - 1) << 4,
654 (((uint64_t)1 << 56) - 1) << 4,
655 (((uint64_t)1 << 56) - 1) << 4,
656 }};
657
658 out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0]; // out->data[0] takes the value
659 out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1]; // out->data[1] takes the value
660 out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2]; // out->data[2] takes the value
661 out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3]; // out->data[3] takes the value
662 }
663
664 /*
665 * field subtraction, input LongFelem directly.
666 * Input:
667 * a[] < 2^128 - 2^124,b[] <= 2^124 - 2^68 - 2^52
668 * Output:
669 * out[] <= 2^124 + a[] < 2^128
670 */
LongFelemSub(LongFelem * out,const LongFelem * a,const LongFelem * b)671 static void LongFelemSub(LongFelem *out, const LongFelem *a, const LongFelem *b)
672 {
673 // p shift left (8 + 56 * 3)
674 static const LongFelem zeroBase = {{
675 ((uint128_t)1 << 112) << 12,
676 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 12,
677 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 12,
678 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 12,
679 (((uint128_t)1 << 112) - ((uint128_t)1 << 56) + 1) << 12, // 1
680 (((uint128_t)1 << 112) - ((uint128_t)1 << 56) - ((uint128_t)1 << 40)) << 12, // -2^96
681 (((uint128_t)1 << 112) - ((uint128_t)1 << 56)) << 12, // 2^224
682 }};
683
684 out->data[0] = zeroBase.data[0] + a->data[0] - b->data[0]; // out->data[0] takes the value
685 out->data[1] = zeroBase.data[1] + a->data[1] - b->data[1]; // out->data[1] takes the value
686 out->data[2] = zeroBase.data[2] + a->data[2] - b->data[2]; // out->data[2] takes the value
687 out->data[3] = zeroBase.data[3] + a->data[3] - b->data[3]; // out->data[3] takes the value
688 out->data[4] = zeroBase.data[4] + a->data[4] - b->data[4]; // out->data[4] takes the value
689 out->data[5] = zeroBase.data[5] + a->data[5] - b->data[5]; // out->data[5] takes the value
690 out->data[6] = zeroBase.data[6] + a->data[6] - b->data[6]; // out->data[6] takes the value
691 }
692
693 /*
694 * field element magnification
695 * Use only a small magnification factor to ensure that in[]*scalar does not overflow.
696 */
FelemScale(Felem * out,const Felem * in,const uint32_t scalar)697 static inline void FelemScale(Felem *out, const Felem *in, const uint32_t scalar)
698 {
699 out->data[0] = in->data[0] * scalar; // out->data[0] takes the value
700 out->data[1] = in->data[1] * scalar; // out->data[1] takes the value
701 out->data[2] = in->data[2] * scalar; // out->data[2] takes the value
702 out->data[3] = in->data[3] * scalar; // out->data[3] takes the value
703 }
704
705 /*
706 * field element magnification, input LongFelem directly.
707 * Use only a small magnification factor to ensure that in[]*scalar does not overflow.
708 */
LongFelemScale(LongFelem * out,const LongFelem * in,const uint32_t scalar)709 static inline void LongFelemScale(LongFelem *out, const LongFelem *in, const uint32_t scalar)
710 {
711 out->data[0] = in->data[0] * scalar; // out->data[0] takes the value
712 out->data[1] = in->data[1] * scalar; // out->data[1] takes the value
713 out->data[2] = in->data[2] * scalar; // out->data[2] takes the value
714 out->data[3] = in->data[3] * scalar; // out->data[3] takes the value
715 out->data[4] = in->data[4] * scalar; // out->data[4] takes the value
716 out->data[5] = in->data[5] * scalar; // out->data[5] takes the value
717 out->data[6] = in->data[6] * scalar; // out->data[6] takes the value
718 }
719
720 /*
721 * field Multiplication
722 * Input:
723 * a[] < 2^62, b[] < 2^62
724 * output:
725 * out[] < a[] * b[] * 4 < 2^126
726 * - out[0] < 2^124
727 * - out[1] < 2^124 * 2
728 * - out[2] < 2^124 * 3
729 * - out[3] < 2^124 * 4
730 * - out[4] < 2^124 * 3
731 * - out[5] < 2^124 * 2
732 * - out[6] < 2^124
733 */
FelemMul(LongFelem * out,const Felem * a,const Felem * b)734 static void FelemMul(LongFelem *out, const Felem *a, const Felem *b)
735 {
736 // out[0] = a[0]*b[0]
737 // out[1] = a[0]*b[1] + a[1]*b[0]
738 // out[2] = a[0]*b[2] + a[1]*b[1] + a[2]*b[0]
739 // out[3] = a[0]*b[3] + a[1]*b[2] + a[2]*b[1] + a[3]*b[0]
740 // out[4] = a[1]*b[3] + a[2]*b[2] + a[3]*b[1]
741 // out[5] = a[2]*b[3] + a[3]*b[2]
742 // out[6] = a[3]*b[3]
743
744 uint128_t *po = out->data;
745 const uint64_t *pa = a->data;
746 const uint64_t *pb = b->data;
747
748 po[0] = (uint128_t)pa[0] * pb[0];
749 po[1] = (uint128_t)pa[0] * pb[1] + (uint128_t)pa[1] * pb[0];
750 po[2] = (uint128_t)pa[0] * pb[2] + (uint128_t)pa[1] * pb[1] + (uint128_t)pa[2] * pb[0];
751 po[3] = (uint128_t)pa[0] * pb[3] + (uint128_t)pa[1] * pb[2] + (uint128_t)pa[2] * pb[1] + (uint128_t)pa[3] * pb[0];
752 po[4] = (uint128_t)pa[1] * pb[3] + (uint128_t)pa[2] * pb[2] + (uint128_t)pa[3] * pb[1];
753 po[5] = (uint128_t)pa[2] * pb[3] + (uint128_t)pa[3] * pb[2];
754 po[6] = (uint128_t)pa[3] * pb[3];
755 }
756
757 /*
758 * field Square
759 * Input:
760 * a[] < 2^62
761 * output:
762 * out[] < a[] * a[] * 4 < 2^126
763 * - out[0] < 2^124
764 * - out[1] < 2^124 * 2
765 * - out[2] < 2^124 * 3
766 * - out[3] < 2^124 * 4
767 * - out[4] < 2^124 * 3
768 * - out[5] < 2^124 * 2
769 * - out[6] < 2^124
770 */
FelemSqr(LongFelem * out,const Felem * a)771 static void FelemSqr(LongFelem *out, const Felem *a)
772 {
773 // out[0] = a[0]*a[0]
774 // out[1] = a[0]*a[1]*2
775 // out[2] = a[0]*a[2]*2 + a[1]*a[1]
776 // out[3] = a[0]*a[3]*2 + a[1]*a[2]*2
777 // out[4] = a[1]*a[3]*2 + a[2]*a[2]
778 // out[5] = a[2]*a[3]*2
779 // out[6] = a[3]*a[3]
780
781 const uint64_t *pa = a->data;
782
783 uint64_t a1x2 = pa[1] << 1;
784 uint64_t a2x2 = pa[2] << 1;
785
786 out->data[0] = (uint128_t)pa[0] * pa[0];
787 out->data[1] = (uint128_t)pa[0] * a1x2;
788 out->data[2] = (uint128_t)pa[0] * a2x2 + (uint128_t)pa[1] * pa[1];
789 out->data[3] = ((uint128_t)pa[0] * pa[3] + (uint128_t)pa[1] * pa[2]) << 1;
790 out->data[4] = (uint128_t)pa[3] * a1x2 + (uint128_t)pa[2] * pa[2];
791 out->data[5] = (uint128_t)pa[3] * a2x2;
792 out->data[6] = (uint128_t)pa[3] * pa[3];
793 }
794
FelemMulReduce(Felem * out,const Felem * a,const Felem * b)795 static inline void FelemMulReduce(Felem *out, const Felem *a, const Felem *b)
796 {
797 LongFelem ltmp;
798 FelemMul(<mp, a, b);
799 LongFelemReduce(out, <mp);
800 }
801
FelemSqrReduce(Felem * out,const Felem * in)802 static inline void FelemSqrReduce(Felem *out, const Felem *in)
803 {
804 LongFelem ltmp;
805 FelemSqr(<mp, in);
806 LongFelemReduce(out, <mp);
807 }
808
809 /*
810 * field element inversion
811 * From Fermat's little theorem, in^(p - 2) = in^(-1) (mod p)
812 * in^(-1) = in^(2^224 - 2^96 + 1 - 2) (mod p)
813 * Input:
814 * in[i] < 2^63
815 * Output:
816 * reduce(out[i])
817 */
FelemInv(Felem * out,const Felem * in)818 static void FelemInv(Felem *out, const Felem *in)
819 {
820 Felem inE1, inE6, inE24, inE96; // inEx indicates in^(2^(0)+2^(1)+'''+2^(x-1))
821 uint32_t i;
822
823 // Construct inE1
824 FelemReduce(out, in);
825 FelemAssign(&inE1, out);
826
827 // Construct inE2
828 FelemSqrReduce(out, out);
829 FelemMulReduce(out, out, &inE1);
830
831 // Construct inE4, and store it in inE6
832 FelemAssign(&inE6, out);
833 FelemSqrReduce(&inE6, &inE6);
834 FelemSqrReduce(&inE6, &inE6);
835 FelemMulReduce(&inE6, out, &inE6); // inE6 is temporarily stored in inE4
836
837 // Construct in^6
838 FelemSqrReduce(&inE6, &inE6);
839 FelemSqrReduce(&inE6, &inE6);
840 FelemMulReduce(&inE6, out, &inE6);
841
842 // Construct inE12
843 FelemAssign(out, &inE6);
844 for (i = 0; i < 6; ++i) { // Moves the out by 6 digits to the left.
845 FelemSqrReduce(out, out);
846 }
847 FelemMulReduce(out, &inE6, out);
848
849 // Construct inE24
850 FelemAssign(&inE96, out);
851 for (i = 0; i < 12; ++i) { // Moves the out by 12 digits to the left.
852 FelemSqrReduce(out, out);
853 }
854 FelemMulReduce(&inE24, &inE96, out);
855
856 // Construct inE48
857 FelemAssign(out, &inE24);
858 for (i = 0; i < 24; ++i) { // Moves the out by 24 digits to the left.
859 FelemSqrReduce(out, out);
860 }
861 FelemMulReduce(out, &inE24, out);
862
863 // Construct inE96
864 FelemAssign(&inE96, out);
865 for (i = 0; i < 48; ++i) { // Moves the out by 48 digits to the left.
866 FelemSqrReduce(out, out);
867 }
868 FelemMulReduce(&inE96, &inE96, out);
869
870 // Construct inE7, and store it in inE6
871 FelemSqrReduce(&inE6, &inE6);
872 FelemMulReduce(&inE6, &inE6, &inE1);
873
874 // Construct inE31, and store it in inE24
875 for (i = 0; i < 7; ++i) { // Moves the inE24 by 7 digits to the left.
876 FelemSqrReduce(&inE24, &inE24);
877 }
878 FelemMulReduce(&inE24, &inE6, &inE24);
879
880 // Construct inE127, and store it in inE24
881 FelemAssign(out, &inE96);
882 for (i = 0; i < 31; ++i) { // Moves the out by 31 digits to the left.
883 FelemSqrReduce(out, out);
884 }
885 FelemMulReduce(&inE24, &inE24, out);
886
887 // Move inE127 by 97 bits to the left and add inE97 to obtain inE(2^224 - 2^96 + 1 - 2)
888 for (i = 0; i < 97; ++i) { // shifts inE24 to the left by 97 bits, and inE24 stores inE127.
889 FelemSqrReduce(&inE24, &inE24);
890 }
891 FelemMulReduce(out, &inE96, &inE24);
892 }
893
894 /* --------------------------Point group operation-------------------------- */
PtAssign(Point * out,const Point * in)895 static inline void PtAssign(Point *out, const Point *in)
896 {
897 FelemAssign(&out->x, &in->x);
898 FelemAssign(&out->y, &in->y);
899 FelemAssign(&out->z, &in->z);
900 }
901
PtAssignWithMask(Point * out,const Point * in,const uint64_t mask)902 static inline void PtAssignWithMask(Point *out, const Point *in, const uint64_t mask)
903 {
904 FelemAssignWithMask(&out->x, &in->x, mask);
905 FelemAssignWithMask(&out->y, &in->y, mask);
906 FelemAssignWithMask(&out->z, &in->z, mask);
907 }
908
909 /*
910 * double the point
911 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
912 * Number of field operations: 3M + 5S
913 * delta = Z^2
914 * gamma = Y^2
915 * beta = X * gamma
916 * alpha = 3 * (X - delta) * (X + delta)
917 * X' = alpha^2 - 8 * beta
918 * Z' = (Y + Z)^2 - gamma - delta
919 * Y' = alpha * (4 * beta - X') - 8 * gamma^2
920 * Input:
921 * in->x[] < 2^59, in->y[] < 2^61, in->z[] < 2^61
922 * Output:
923 * reduce(out->x), reduce(out->y), out->z[] < 2^61
924 */
PtDouble(Point * out,const Point * in)925 static void PtDouble(Point *out, const Point *in)
926 {
927 Felem delta, gamma, beta, alpha;
928 Felem tmp, tmp2;
929 LongFelem ltmp, ltmp2;
930
931 // delta = Z^2
932 FelemSqrReduce(&delta, &in->z);
933
934 // gamma = Y^2
935 FelemSqrReduce(&gamma, &in->y);
936
937 // beta = X * gamma
938 FelemMulReduce(&beta, &in->x, &gamma);
939
940 // alpha = 3 * (X - delta) * (X + delta)
941 FelemAdd(&tmp, &in->x, &delta); // tmp[] < 2^59 + 2^57 < 2^60
942 FelemScale(&tmp, &tmp, 3); // tmp[] < 2^60 * 3 < 2^62
943 FelemSub(&tmp2, &in->x, &delta); // tmp2[] < 2^60 + 2^4 + 2^59 < 2^61
944 FelemMulReduce(&alpha, &tmp, &tmp2);
945
946 // X' = alpha^2 - 8 * beta
947 FelemSqrReduce(&tmp, &alpha); // alpha^2, tmp[] < 2^56 + 2^16
948 FelemScale(&tmp2, &beta, 8); // tmp2[] < (2^56 + 2^16) * 8 = 2^59 + 2^19
949 FelemSub(&out->x, &tmp, &tmp2); // xout[] < 2^60 + 2^4 + 2^56 + 2^16 < 2^61
950 FelemReduce(&out->x, &out->x); // xout[] < 2^56 + 2^7
951
952 // Z' = (Y + Z)^2 - gamma - delta
953 FelemAdd(&tmp, &in->y, &in->z); // < 2^61 + 2^61 = 2^62
954 FelemSqrReduce(&tmp, &tmp); // (Y + Z)^2, tmp[] < 2^56 + 2^16
955 FelemAdd(&tmp2, &gamma, &delta); // (gamma + delta), tmp2[] < (2^56 + 2^16) * 2 = 2^57 + 2^17
956 FelemSub(&out->z, &tmp, &tmp2); // zout[] < 2^60 + 2^4 + 2^56 + 2^16 < 2^61
957
958 // Y' = alpha * (4 * beta - X') - 8 * gamma^2
959 FelemScale(&tmp, &beta, 4); // tmp[] < (2^56 + 2^16) * 4 = 2^58 + 2^18
960 FelemSub(&tmp, &tmp, &out->x); // tmp[] < 2^60 + 2^4 + 2^58 + 2^18 < 2^61
961 FelemMul(<mp, &alpha, &tmp); // alpha * (4 * beta - X'), ltmp[] < 2^(61 * 2) * 4 = 2^124
962 FelemSqr(<mp2, &gamma); // gamma < 2^57, ltmp2[] < 2^(57 * 2) * 4 = 2^116
963 LongFelemScale(<mp2, <mp2, 8); // 8 * gamma^2, ltmp2[] < 2^116 * 8 = 2^119
964 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < 2^124 + 2^124 < 2^125
965 LongFelemReduce(&out->y, <mp); // yout[] < 2^56 + 2^16
966 }
967
968 /*
969 * point addition
970 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
971 * Infinity point calculation is not supported.
972 * Number of field operations: 11M + 5S
973 * Z1Z1 = Z1^2
974 * Z2Z2 = Z2^2
975 * U1 = X1 * Z2Z2
976 * U2 = X2 * Z1Z1
977 * S1 = Y1 * Z2 * Z2Z2
978 * S2 = Y2 * Z1 * Z1Z1
979 * H = U2 - U1
980 * I = (2 * H)^2
981 * J = H * I
982 * r = 2 * (S2 - S1)
983 * V = U1 * I
984 * X3 = r^2 - J - 2 * V
985 * Y3 = r * (V - X3) - 2 * S1 * J
986 * Z3 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2) * H
987 * Input:
988 * a->x[] < 2^64, a->y[] < 2^64, a->z[] < 2^61
989 * reduce(b->x), b->y[] < 2^58, reduce(b->z)
990 * Output:
991 * out->x[] < max(reduce(out->x), a->x[])
992 * out->y[] < max(reduce(out->y), a->y[], b->y[])
993 * out->z[] < max(reduce(out->z), a->z[])
994 */
PtAdd(Point * out,const Point * a,const Point * b)995 static void PtAdd(Point *out, const Point *a, const Point *b)
996 {
997 Point result;
998 Felem z1sqr, z2sqr, u1, u2, s1, s2, h, i, j, r, v;
999 Felem tmp;
1000 LongFelem ltmp, ltmp2;
1001 uint64_t isZ1Zero, isZ2Zero;
1002
1003 // Z1Z1 = Z1^2
1004 FelemSqrReduce(&z1sqr, &a->z);
1005
1006 // Z2Z2 = Z2^2
1007 FelemSqrReduce(&z2sqr, &b->z);
1008
1009 isZ1Zero = 0 - FelemIsZero(&z1sqr);
1010 isZ2Zero = 0 - FelemIsZero(&z2sqr);
1011
1012 // U1 = X1 * Z2Z2
1013 FelemMulReduce(&u1, &a->x, &z2sqr);
1014
1015 // U2 = X2 * Z1Z1
1016 FelemMulReduce(&u2, &b->x, &z1sqr);
1017
1018 // S1 = Y1 * Z2 * Z2Z2
1019 FelemMulReduce(&s1, &b->z, &z2sqr); // Z2 * Z2Z2
1020 FelemMulReduce(&s1, &a->y, &s1);
1021
1022 // S2 = Y2 * Z1 * Z1Z1
1023 FelemMulReduce(&s2, &a->z, &z1sqr); // Z1 * Z1Z1
1024 FelemMulReduce(&s2, &b->y, &s2);
1025
1026 // H = U2 - U1
1027 FelemSub(&h, &u2, &u1);
1028 FelemReduce(&h, &h);
1029
1030 // r = 2 * (S2 - S1)
1031 FelemSub(&r, &s2, &s1);
1032 FelemScale(&r, &r, 2); // 2 * (S2 - S1)
1033 FelemReduce(&r, &r);
1034
1035 // H and r can determine whether x and y of the affine coordinates of two points are equal.
1036 // If the values are equal, double the values.
1037 if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
1038 // Use the smaller b point
1039 PtDouble(out, b);
1040 return;
1041 }
1042
1043 // I = (2 * H)^2
1044 FelemScale(&tmp, &h, 2);
1045 FelemSqrReduce(&i, &tmp);
1046
1047 // J = H * I
1048 FelemMulReduce(&j, &h, &i);
1049
1050 // V = U1 * I
1051 FelemMulReduce(&v, &u1, &i);
1052
1053 // X3 = r^2 - (J + 2 * V)
1054 FelemSqrReduce(&result.x, &r); // r^2
1055 FelemScale(&tmp, &v, 2); // tmp[] < (2^56 + 2^16) * 2 = 2^57 + 2^17
1056 FelemAdd(&tmp, &tmp, &j); // J + 2 * V, tmp[] < (2^57 + 2^17) + (2^56 + 2^16) < 2^58
1057 FelemSub(&result.x, &result.x, &tmp); // result.x[] < (2^60 + 2^4) + (2^56 + 2^16) < 2^61
1058 FelemReduce(&result.x, &result.x); // result.x[] < 2^56 + 2^7
1059
1060 // Y3 = r * (V - X3) - 2 * S1 * J
1061 FelemSub(&tmp, &v, &result.x); // tmp < (2^60 + 2^4) + (2^56 + 2^16) < 2^61
1062 FelemMul(<mp, &r, &tmp); // r * (V - X3), ltmp[] < 2^57 * 2^61 * 4 = 2^120
1063 FelemMul(<mp2, &s1, &j); // ltmp2[] < 2^57 * 2^57 * 4 = 2^116
1064 LongFelemScale(<mp2, <mp2, 2); // 2 * S1 * J, ltmp2[] < 2^117
1065 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < 2^124 + 2^120 < 2^125
1066 LongFelemReduce(&result.y, <mp); // result.y[] < 2^56 + 2^16
1067
1068 // Z3 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2) * H
1069 FelemAdd(&result.z, &a->z, &b->z); // Z1 + Z2, result.z[] < 2^61 + 2^57 < 2^62
1070 FelemSqrReduce(&result.z, &result.z); // (Z1 + Z2)^2
1071 FelemAdd(&tmp, &z1sqr, &z2sqr); // Z1Z1 + Z2Z2
1072 FelemSub(&result.z, &result.z, &tmp); // ((Z1 + Z2)^2 - Z1Z1 - Z2Z2)
1073 FelemMulReduce(&result.z, &result.z, &h); // result.z[] < 2^56 + 2^16
1074
1075 // Special case processing for infinity points
1076 PtAssignWithMask(&result, a, isZ2Zero);
1077 PtAssignWithMask(&result, b, isZ1Zero);
1078 PtAssign(out, &result);
1079 }
1080
1081 /*
1082 * mixed addition of point
1083 * Algorithm reference: http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-madd-2007-bl
1084 * Infinity point calculation is not supported.
1085 * Number of field operations: 7M + 4S
1086 * Z1Z1 = Z1^2
1087 * U2 = X2 * Z1Z1
1088 * S2 = Y2 * Z1 * Z1Z1
1089 * H = U2 - X1
1090 * HH = H^2
1091 * I = 4 * HH
1092 * J = H * I
1093 * r = 2 * (S2 - Y1)
1094 * V = X1 * I
1095 * X3 = r^2 - J - 2 * V
1096 * Y3 = r * (V - X3) - 2 * Y1 * J
1097 * Z3 = (Z1 + H)^2 - Z1Z1 - HH
1098 * Input:
1099 * a->x[] <= 2^60 - 2^44 - 2^4, a->y[] <= 2^60 - 2^44 - 2^4, a->z[] < 2^61
1100 * b->x < p, b->y < p, b->z = 0 or 1
1101 * Output:
1102 * out->x[] < max(reduce(out->x), a->x[])
1103 * out->y[] < max(reduce(out->y), a->y[])
1104 * out->z[] < 2^61
1105 */
PtAddMixed(Point * out,const Point * a,const Point * b)1106 static void PtAddMixed(Point *out, const Point *a, const Point *b)
1107 {
1108 Point result;
1109 Felem z1sqr, u2, s2, h, hsqr, i, j, r, v;
1110 Felem tmp;
1111 LongFelem ltmp, ltmp2;
1112 uint64_t isZ1Zero, isZ2Zero;
1113
1114 // Z1Z1 = Z1^2
1115 FelemSqrReduce(&z1sqr, &a->z);
1116
1117 isZ1Zero = 0 - FelemIsZero(&z1sqr);
1118 // The Z coordinate of point b can only be 0 or 1, that is, the bit 0 can only be 0 or 1, and the other bits are 0.
1119 isZ2Zero = b->z.data[0] - 1;
1120
1121 // U2 = X2 * Z1Z1
1122 FelemMulReduce(&u2, &b->x, &z1sqr);
1123
1124 // S2 = Y2 * Z1 * Z1Z1
1125 FelemMulReduce(&s2, &a->z, &z1sqr); // Z1 * Z1Z1
1126 FelemMulReduce(&s2, &b->y, &s2);
1127
1128 // H = U2 - X1
1129 FelemSub(&h, &u2, &a->x);
1130 FelemReduce(&h, &h);
1131
1132 // r = 2 * (S2 - Y1)
1133 FelemSub(&r, &s2, &a->y); // r[] < (2^60 + 2^4) + (2^56 + 2^16) < 2^61
1134 FelemScale(&r, &r, 2); // r[] < 2^62
1135 FelemReduce(&r, &r);
1136
1137 // H and r can determine whether x and y of the affine coordinates of two points are equal.
1138 // If they are equal, double the point.
1139 if (isZ1Zero == 0 && isZ2Zero == 0 && FelemIsZero(&h) != 0 && FelemIsZero(&r) != 0) {
1140 // Use the smaller b point
1141 PtDouble(out, b);
1142 return;
1143 }
1144
1145 // HH = H^2
1146 FelemSqrReduce(&hsqr, &h);
1147
1148 // I = 4 * HH
1149 FelemScale(&i, &hsqr, 4); // i[] < (2^56 + 2^16) * 4 = 2^58 + 2^18
1150
1151 // J = H * I
1152 FelemMulReduce(&j, &h, &i);
1153
1154 // V = X1 * I
1155 FelemMulReduce(&v, &a->x, &i);
1156
1157 // X3 = r^2 - J - 2 * V
1158 FelemSqrReduce(&result.x, &r); // r^2
1159 FelemScale(&tmp, &v, 2); // 2 * V, tmp[] < (2^56 + 2^16) * 2 = 2^57 + 2^17
1160 FelemAdd(&tmp, &j, &tmp); // J + 2 * V, tmp[] < (2^56 + 2^16) + (2^57 + 2^17) < 2^58
1161 FelemSub(&result.x, &result.x, &tmp); // result.x[] < (2^60 + 2^4) + (2^56 + 2^16) < 2^61
1162 FelemReduce(&result.x, &result.x); // result.x[] < 2^56 + 2^7
1163
1164 // Y3 = r * (V - X3) - 2 * Y1 * J
1165 FelemSub(&tmp, &v, &result.x); // V - X3, tmp[] < (2^60 + 2^4) + (2^56 + 2^16) < 2^61
1166 FelemMul(<mp, &r, &tmp); // r * (V - X3), ltmp[] < 2^57 * 2^61 * 4 = 2^120
1167 FelemMul(<mp2, &a->y, &j); // Y1 * J, ltmp2[] < 2^61 * 2^57 * 4 = 2^120
1168 LongFelemScale(<mp2, <mp2, 2); // 2 * Y1 * J, ltmp2[] < 2^120 * 2 = 2^121
1169 LongFelemSub(<mp, <mp, <mp2); // ltmp[] < 2^124 + 2^120 < 2^125
1170 LongFelemReduce(&result.y, <mp); // result.y[] < 2^56 + 2^16
1171
1172 // Z3 = (Z1 + H)^2 - Z1Z1 - HH
1173 FelemAdd(&result.z, &a->z, &h); // Z1 + H, result.z[] < 2^61 + (2^56 + 2^7) = 2^62
1174 FelemSqrReduce(&result.z, &result.z);
1175 FelemAdd(&tmp, &z1sqr, &hsqr); // Z1Z1 + HH, tmp[] < (2^56 + 2^16) + (2^56 + 2^16) < 2^58
1176 FelemSub(&result.z, &result.z, &tmp); // result.z[] < (2^60 + 2^4) + 2^58 < 2^61
1177
1178 // Special case processing for infinity points
1179 PtAssignWithMask(&result, a, isZ2Zero);
1180 PtAssignWithMask(&result, b, isZ1Zero);
1181 PtAssign(out, &result);
1182 }
1183
1184 /* Select the point that subscript is index in the table and place it in the Point *point.
1185 The anti-side channel processing exists. */
GetPointFromTable(Point * point,const Point table[],uint32_t pointNum,const uint32_t index)1186 static inline void GetPointFromTable(Point *point, const Point table[], uint32_t pointNum, const uint32_t index)
1187 {
1188 uint64_t mask;
1189 for (uint32_t i = 0; i < pointNum; i++) {
1190 /* If i is equal to index, the last mask is all Fs. Otherwise, the last mask is all 0s. */
1191 mask = (0 - (i ^ index)) >> 31; // shifted rightwards by 31 bits to get the most significant bit
1192 mask--;
1193 /* Conditional value assignment, valid only when i == index */
1194 PtAssignWithMask(point, &table[i], mask);
1195 }
1196 }
1197
1198 /*
1199 * Input:
1200 * k1 < n
1201 * 0 <= i < 28
1202 * Output:
1203 * out->x < p, out->y < p, out->z = 0 OR 1
1204 */
GetUpperPrecomputePtOfG(Point * out,const Felem * k1,int32_t curBit)1205 static inline void GetUpperPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
1206 {
1207 uint32_t bits;
1208 uint32_t i = (uint32_t)curBit;
1209
1210 // The i bit of the upper half of digit 0. (BASE_BITS/2) is half-wide.
1211 bits = (uint32_t)(k1->data[0] >> (i + BASE_BITS / 2)) & 1;
1212 // The i bit of the upper half of digit 1. (BASE_BITS/2) is half-wide.
1213 bits |= ((uint32_t)(k1->data[1] >> (i + BASE_BITS / 2)) & 1) << 1;
1214 // The i bit of the upper half of digit 2. (BASE_BITS/2) is half-wide.
1215 bits |= ((uint32_t)(k1->data[2] >> (i + BASE_BITS / 2)) & 1) << 2;
1216 // The i bit of the upper half of digit 3. (BASE_BITS/2) is half-wide.
1217 bits |= ((uint32_t)(k1->data[3] >> (i + BASE_BITS / 2)) & 1) << 3;
1218
1219 GetPointFromTable(out, PRE_MUL_G2, TABLE_G_SIZE, bits);
1220 }
1221
1222 /*
1223 * Input:
1224 * k1 < n
1225 * 0 <= i < 28
1226 * Output:
1227 * out->x < p, out->y < p, out->z = 0 or 1
1228 */
GetLowerPrecomputePtOfG(Point * out,const Felem * k1,int32_t curBit)1229 static inline void GetLowerPrecomputePtOfG(Point *out, const Felem *k1, int32_t curBit)
1230 {
1231 uint32_t bits;
1232 uint32_t i = (uint32_t)curBit;
1233
1234 bits = (uint32_t)(k1->data[0] >> i) & 1; // The i bit of the lower half of digit 0.
1235 bits |= ((uint32_t)(k1->data[1] >> i) & 1) << 1; // The i bit of the lower half of digit 1.
1236 bits |= ((uint32_t)(k1->data[2] >> i) & 1) << 2; // The i bit of the lower half of digit 2.
1237 bits |= ((uint32_t)(k1->data[3] >> i) & 1) << 3; // The i bit of the lower half of digit 3.
1238
1239 GetPointFromTable(out, PRE_MUL_G, TABLE_G_SIZE, bits);
1240 }
1241
1242 /*
1243 * Input:
1244 * k2 < n
1245 * 0 <= i <= 220
1246 * The coordinates of each point of preMulPt are reduced.
1247 * Output:
1248 * reduce(out->x)
1249 * out->y[] < max(reduce(out->y), negY)
1250 * reduce(out->z)
1251 */
GetPrecomputePtOfP(Point * out,const Felem * k2,int32_t curBit,const Point preMulPt[TABLE_P_SIZE])1252 static inline void GetPrecomputePtOfP(Point *out, const Felem *k2, int32_t curBit, const Point preMulPt[TABLE_P_SIZE])
1253 {
1254 uint32_t bits;
1255 uint32_t sign, value; // the grouping sign and actual value.
1256 Felem negY;
1257 // Obtain the 5-bit signed code and read the sign bits of the next group of numbers
1258 // to determine whether there is a carry. The total length is 6.
1259 bits = (uint32_t)FelemGetBits(k2, curBit - 1, WINDOW_SIZE + 1);
1260 DecodeScalarCode(&sign, &value, bits);
1261
1262 GetPointFromTable(out, preMulPt, TABLE_P_SIZE, value);
1263
1264 // out->y < 2^56 + 2^16
1265 FelemNeg(&negY, &out->y); // negY[] < (2^57 + 2^1) + (2^56 + 2^16) < 2^58
1266 FelemAssignWithMask(&out->y, &negY, (uint64_t)0 - sign);
1267 }
1268
1269 /*
1270 * Calculate k1 * G + k2 * P
1271 * Input:
1272 * k1 < n
1273 * k2 < n
1274 * The coordinates of each point of preMulPt are reduced.
1275 * Output:
1276 * out->x < p, out->y < p, out->z < p
1277 */
PtMul(Point * out,const Felem * k1,const Felem * k2,const Point preMulPt[TABLE_P_SIZE])1278 static void PtMul(Point *out, const Felem *k1, const Felem *k2, const Point preMulPt[TABLE_P_SIZE])
1279 {
1280 Point ptQ = {}; // ptQ stores result
1281 Point ptPre = {}; // ptPre stores the points obtained from the table.
1282 bool isGMul = k1 != NULL;
1283 bool isPtMul = k2 != NULL && preMulPt != NULL;
1284 int32_t curBit;
1285
1286 // Initialize the Q point.
1287 if (isPtMul) {
1288 curBit = 220; // Start from 220th bit.
1289 // From k2's (_, 223, 222, 221, 220, 219) bit coding to select the initial point
1290 GetPrecomputePtOfP(&ptQ, k2, curBit, preMulPt);
1291 } else if (isGMul) {
1292 curBit = 27; // Start from 27th.
1293 // From k1's (195, 139, 83, 27) bit coding to select the initial point
1294 GetLowerPrecomputePtOfG(&ptQ, k1, curBit);
1295 // From k1's (195 + 28, 139 + 28, 83 + 28, 27 + 28) bit to select the pre-calculation point
1296 // and adds it to the point Q.
1297 GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
1298 PtAddMixed(&ptQ, &ptQ, &ptPre);
1299 } else {
1300 // k1 and k2 are all NULL, and the infinite point is output.
1301 (void)memset_s((void *)out, sizeof(Point), 0, sizeof(Point));
1302 return;
1303 }
1304
1305 // Operation chain: point Q output range:
1306 // x[] y[] z[]
1307 // Initialization the value reduced reduced < 2^61
1308 // ↓
1309 // Double ←↑ reduced reduced < 2^61
1310 // ↓ ↑
1311 // mixed add ↑ reduced reduced < 2^61
1312 // ↓ ↑
1313 // mixed add →↑ reduced reduced < 2^61
1314 // ↓ ↑
1315 // negation of Y ↑ reduced < 2^58 < 2^61
1316 // ↓ ↑
1317 // add →↑ reduced < 2^58 < 2^61
1318
1319 while (--curBit >= 0) {
1320 // Start to shift right bit by bit. Due to the initialization of the most significant bit,
1321 // common point multiplication starts from 219th bit and base point multiplication starts from 26th bit.
1322 // Whether G-point multiplication is performed in the current cycle,
1323 // calculated once in each cycle starting from bit 27.
1324 bool isStepGMul = curBit <= 27;
1325 // Whether the current cycle is a common point multiplication, calculated once every 5 cycles
1326 bool isStepPtMul = curBit % WINDOW_SIZE == 0;
1327
1328 PtDouble(&ptQ, &ptQ);
1329
1330 // Generator G multiplication part
1331 // Divide k1 into eight segments, from high bits to low bits,
1332 // select bits from each segment and combine them together, and read the pre-computation table.
1333 // To reduce the precomputation table,
1334 // the divided eight segments are combined according to the upper half and the lower half
1335 if (isGMul && isStepGMul) {
1336 // Add the point multiplication result of the current bit of the eight-segment to the point Q
1337 GetLowerPrecomputePtOfG(&ptPre, k1, curBit);
1338 PtAddMixed(&ptQ, &ptQ, &ptPre);
1339
1340 GetUpperPrecomputePtOfG(&ptPre, k1, curBit);
1341 PtAddMixed(&ptQ, &ptQ, &ptPre);
1342 }
1343
1344 // Common point multiplication part
1345 // Use the sliding window signed encoding method
1346 // to group the most significant bits to the least significant bits every five bits.
1347 // Each group of numbers is regarded as a signed number. 00000 to 01111 are decimal numbers 0 to 15,
1348 // and 10000 to 11111 are decimal numbers (32-16) to (32-1).
1349 // This is equivalent to the set the number in complement form after the number carries 1.
1350 // for example
1351 // 11011(Complement) = 100000 - 00101
1352 if (isPtMul && isStepPtMul) {
1353 // Add the point multiplication result of the current group to the point Q.
1354 GetPrecomputePtOfP(&ptPre, k2, curBit, preMulPt);
1355 PtAdd(&ptQ, &ptQ, &ptPre);
1356 }
1357 }
1358
1359 // Refer to the output range of the operation chain. Reduce the Y and Z coordinates.
1360 FelemReduce(&ptQ.y, &ptQ.y);
1361 FelemReduce(&ptQ.z, &ptQ.z);
1362 // do the modulo operation then output.
1363 FelemContract(&ptQ.x, &ptQ.x);
1364 FelemContract(&ptQ.y, &ptQ.y);
1365 FelemContract(&ptQ.z, &ptQ.z);
1366
1367 PtAssign(out, &ptQ);
1368 }
1369
1370 /*
1371 * Convert Jacobian coordinates to affine coordinates by a given module inverse
1372 */
PtMakeAffineWithInv(Point * out,const Point * in,const Felem * zInv)1373 static void PtMakeAffineWithInv(Point *out, const Point *in, const Felem *zInv)
1374 {
1375 Felem tmp;
1376
1377 // 1/Z^2
1378 FelemSqrReduce(&tmp, zInv);
1379
1380 // X/Z^2
1381 FelemMulReduce(&out->x, &in->x, &tmp);
1382 FelemContract(&out->x, &out->x);
1383
1384 // 1/Z^3
1385 FelemMulReduce(&tmp, &tmp, zInv);
1386
1387 // Y/Z^3
1388 FelemMulReduce(&out->y, &in->y, &tmp);
1389 FelemContract(&out->y, &out->y);
1390
1391 FelemSetLimb(&out->z, 1);
1392 }
1393
1394 /*
1395 * Obtain the pre-multiplication table of the input point pt, including 0pt-16pt.
1396 * The coordinates of all points are reduced.
1397 */
GetPreMulPt(Point preMulPt[TABLE_P_SIZE],const ECC_Point * pt)1398 static int32_t GetPreMulPt(Point preMulPt[TABLE_P_SIZE], const ECC_Point *pt)
1399 {
1400 int32_t ret;
1401
1402 // 0pt
1403 (void)memset_s((void *)&preMulPt[0], sizeof(Point), 0, sizeof(Point));
1404 // 1pt
1405 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].x, pt->x), ret);
1406 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].y, pt->y), ret);
1407 GOTO_ERR_IF_EX(BN2Felem(&preMulPt[1].z, pt->z), ret);
1408 // 2pt ~ 15pt
1409 for (uint32_t i = 2; i < 15; i += 2) {
1410 PtDouble(&preMulPt[i], &preMulPt[i >> 1]);
1411 // Z coordinate after the doubled point is reduced.
1412 FelemReduce(&preMulPt[i].z, &preMulPt[i].z);
1413
1414 PtAdd(&preMulPt[i + 1], &preMulPt[i], &preMulPt[1]);
1415 }
1416 // 16pt
1417 PtDouble(&preMulPt[16], &preMulPt[16 >> 1]);
1418 // Z coordinate of the 16pt after the doubled point 16pt is reduced.
1419 FelemReduce(&preMulPt[16].z, &preMulPt[16].z);
1420 ERR:
1421 return ret;
1422 }
1423
ECP224_PointMulAdd(ECC_Para * para,ECC_Point * r,const BN_BigNum * k1,const BN_BigNum * k2,const ECC_Point * pt)1424 int32_t ECP224_PointMulAdd(
1425 ECC_Para *para, ECC_Point *r, const BN_BigNum *k1, const BN_BigNum *k2, const ECC_Point *pt)
1426 {
1427 int32_t retVal;
1428 Felem fK1;
1429 Felem fK2;
1430 Point preMulPt[TABLE_P_SIZE];
1431 Point out;
1432
1433 // Check the input parameters.
1434 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP224), retVal);
1435 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP224), retVal);
1436 GOTO_ERR_IF(CheckBnValid(k1, FELEM_BITS), retVal);
1437 GOTO_ERR_IF(CheckBnValid(k2, FELEM_BITS), retVal);
1438 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP224), retVal);
1439 // Special treatment of infinity points
1440 if (BN_IsZero(pt->z)) {
1441 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1442 return CRYPT_ECC_POINT_AT_INFINITY;
1443 }
1444
1445 GOTO_ERR_IF_EX(BN2Felem(&fK1, k1), retVal);
1446 GOTO_ERR_IF_EX(BN2Felem(&fK2, k2), retVal);
1447 GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), retVal);
1448
1449 PtMul(&out, &fK1, &fK2, preMulPt);
1450 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), retVal);
1451 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), retVal);
1452 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), retVal);
1453 ERR:
1454 return retVal;
1455 }
1456
ECP224_PointMul(ECC_Para * para,ECC_Point * r,const BN_BigNum * k,const ECC_Point * pt)1457 int32_t ECP224_PointMul(ECC_Para *para, ECC_Point *r, const BN_BigNum *k, const ECC_Point *pt)
1458 {
1459 int32_t retVal;
1460 Felem felemK;
1461 Point preMulPt[TABLE_P_SIZE];
1462 Point out;
1463
1464 // Check the input parameters.
1465 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP224), retVal);
1466 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP224), retVal);
1467 GOTO_ERR_IF(CheckBnValid(k, FELEM_BITS), retVal);
1468 if (pt != NULL) {
1469 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP224), retVal);
1470 // Special treatment of infinity points
1471 if (BN_IsZero(pt->z)) {
1472 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1473 return CRYPT_ECC_POINT_AT_INFINITY;
1474 }
1475 }
1476
1477 GOTO_ERR_IF_EX(BN2Felem(&felemK, k), retVal);
1478 // When pt is NULL, r = k * G
1479 if (pt == NULL) {
1480 PtMul(&out, &felemK, NULL, NULL);
1481 } else { // If pt is not null, r = k * pt
1482 GOTO_ERR_IF_EX(GetPreMulPt(preMulPt, pt), retVal);
1483 PtMul(&out, NULL, &felemK, preMulPt);
1484 }
1485
1486 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), retVal);
1487 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), retVal);
1488 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), retVal);
1489 ERR:
1490 return retVal;
1491 }
1492
ECP224_Point2Affine(const ECC_Para * para,ECC_Point * r,const ECC_Point * pt)1493 int32_t ECP224_Point2Affine(const ECC_Para *para, ECC_Point *r, const ECC_Point *pt)
1494 {
1495 int32_t retVal;
1496 Point out;
1497 Felem zInv;
1498
1499 // Check the input parameters.
1500 GOTO_ERR_IF(CheckParaValid(para, CRYPT_ECC_NISTP224), retVal);
1501 GOTO_ERR_IF(CheckPointValid(r, CRYPT_ECC_NISTP224), retVal);
1502 GOTO_ERR_IF(CheckPointValid(pt, CRYPT_ECC_NISTP224), retVal);
1503 // Special treatment of infinity points
1504 if (BN_IsZero(pt->z)) {
1505 BSL_ERR_PUSH_ERROR(CRYPT_ECC_POINT_AT_INFINITY);
1506 return CRYPT_ECC_POINT_AT_INFINITY;
1507 }
1508
1509 GOTO_ERR_IF_EX(BN2Felem(&out.x, pt->x), retVal);
1510 GOTO_ERR_IF_EX(BN2Felem(&out.y, pt->y), retVal);
1511 GOTO_ERR_IF_EX(BN2Felem(&out.z, pt->z), retVal);
1512
1513 FelemInv(&zInv, &out.z);
1514 PtMakeAffineWithInv(&out, &out, &zInv);
1515
1516 GOTO_ERR_IF_EX(Felem2BN(r->x, &out.x), retVal);
1517 GOTO_ERR_IF_EX(Felem2BN(r->y, &out.y), retVal);
1518 GOTO_ERR_IF_EX(Felem2BN(r->z, &out.z), retVal);
1519 ERR:
1520 return retVal;
1521 }
1522
1523
1524 #endif /* defined(HITLS_CRYPTO_CURVE_NISTP224) && defined(HITLS_CRYPTO_NIST_USE_ACCEL) */
1525