1 /* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 64 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
2 /* curve description: p256 */
3 /* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
4 /* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
5 /* machine_wordsize = 64 (from "64") */
6 /* */
7 /* NOTE: In addition to the bounds specified above each function, all */
8 /* functions synthesized for this Montgomery arithmetic require the */
9 /* input to be strictly less than the prime modulus (m), and also */
10 /* require the input to be in the unique saturated representation. */
11 /* All functions also ensure that these two properties are true of */
12 /* return values. */
13
14 #include <stdint.h>
15 typedef unsigned char fiat_p256_uint1;
16 typedef signed char fiat_p256_int1;
17 typedef signed __int128 fiat_p256_int128;
18 typedef unsigned __int128 fiat_p256_uint128;
19
20 #if (-1 & 3) != 3
21 #error "This code only works on a two's complement system"
22 #endif
23
24
25 /*
26 * The function fiat_p256_addcarryx_u64 is an addition with carry.
27 * Postconditions:
28 * out1 = (arg1 + arg2 + arg3) mod 2^64
29 * out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
30 *
31 * Input Bounds:
32 * arg1: [0x0 ~> 0x1]
33 * arg2: [0x0 ~> 0xffffffffffffffff]
34 * arg3: [0x0 ~> 0xffffffffffffffff]
35 * Output Bounds:
36 * out1: [0x0 ~> 0xffffffffffffffff]
37 * out2: [0x0 ~> 0x1]
38 */
fiat_p256_addcarryx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)39 static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
40 fiat_p256_uint128 x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
41 uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
42 fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 64);
43 *out1 = x2;
44 *out2 = x3;
45 }
46
47 /*
48 * The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
49 * Postconditions:
50 * out1 = (-arg1 + arg2 + -arg3) mod 2^64
51 * out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
52 *
53 * Input Bounds:
54 * arg1: [0x0 ~> 0x1]
55 * arg2: [0x0 ~> 0xffffffffffffffff]
56 * arg3: [0x0 ~> 0xffffffffffffffff]
57 * Output Bounds:
58 * out1: [0x0 ~> 0xffffffffffffffff]
59 * out2: [0x0 ~> 0x1]
60 */
fiat_p256_subborrowx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)61 static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
62 fiat_p256_int128 x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
63 fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 64);
64 uint64_t x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
65 *out1 = x3;
66 *out2 = (fiat_p256_uint1)(0x0 - x2);
67 }
68
69 /*
70 * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
71 * Postconditions:
72 * out1 = (arg1 * arg2) mod 2^64
73 * out2 = ⌊arg1 * arg2 / 2^64⌋
74 *
75 * Input Bounds:
76 * arg1: [0x0 ~> 0xffffffffffffffff]
77 * arg2: [0x0 ~> 0xffffffffffffffff]
78 * Output Bounds:
79 * out1: [0x0 ~> 0xffffffffffffffff]
80 * out2: [0x0 ~> 0xffffffffffffffff]
81 */
fiat_p256_mulx_u64(uint64_t * out1,uint64_t * out2,uint64_t arg1,uint64_t arg2)82 static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
83 fiat_p256_uint128 x1 = ((fiat_p256_uint128)arg1 * arg2);
84 uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
85 uint64_t x3 = (uint64_t)(x1 >> 64);
86 *out1 = x2;
87 *out2 = x3;
88 }
89
90 /*
91 * The function fiat_p256_cmovznz_u64 is a single-word conditional move.
92 * Postconditions:
93 * out1 = (if arg1 = 0 then arg2 else arg3)
94 *
95 * Input Bounds:
96 * arg1: [0x0 ~> 0x1]
97 * arg2: [0x0 ~> 0xffffffffffffffff]
98 * arg3: [0x0 ~> 0xffffffffffffffff]
99 * Output Bounds:
100 * out1: [0x0 ~> 0xffffffffffffffff]
101 */
fiat_p256_cmovznz_u64(uint64_t * out1,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)102 static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
103 fiat_p256_uint1 x1 = (!(!arg1));
104 uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
105 // Note this line has been patched from the synthesized code to add value
106 // barriers.
107 //
108 // Clang recognizes this pattern as a select. While it usually transforms it
109 // to a cmov, it sometimes further transforms it into a branch, which we do
110 // not want.
111 uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
112 *out1 = x3;
113 }
114
115 /*
116 * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
117 * Preconditions:
118 * 0 ≤ eval arg1 < m
119 * 0 ≤ eval arg2 < m
120 * Postconditions:
121 * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
122 * 0 ≤ eval out1 < m
123 *
124 * Input Bounds:
125 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
126 * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
127 * Output Bounds:
128 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
129 */
fiat_p256_mul(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])130 static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
131 uint64_t x1 = (arg1[1]);
132 uint64_t x2 = (arg1[2]);
133 uint64_t x3 = (arg1[3]);
134 uint64_t x4 = (arg1[0]);
135 uint64_t x5;
136 uint64_t x6;
137 fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
138 uint64_t x7;
139 uint64_t x8;
140 fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
141 uint64_t x9;
142 uint64_t x10;
143 fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
144 uint64_t x11;
145 uint64_t x12;
146 fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
147 uint64_t x13;
148 fiat_p256_uint1 x14;
149 fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
150 uint64_t x15;
151 fiat_p256_uint1 x16;
152 fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
153 uint64_t x17;
154 fiat_p256_uint1 x18;
155 fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
156 uint64_t x19 = (x18 + x6);
157 uint64_t x20;
158 uint64_t x21;
159 fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
160 uint64_t x22;
161 uint64_t x23;
162 fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
163 uint64_t x24;
164 uint64_t x25;
165 fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
166 uint64_t x26;
167 fiat_p256_uint1 x27;
168 fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
169 uint64_t x28 = (x27 + x23);
170 uint64_t x29;
171 fiat_p256_uint1 x30;
172 fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
173 uint64_t x31;
174 fiat_p256_uint1 x32;
175 fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
176 uint64_t x33;
177 fiat_p256_uint1 x34;
178 fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
179 uint64_t x35;
180 fiat_p256_uint1 x36;
181 fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
182 uint64_t x37;
183 fiat_p256_uint1 x38;
184 fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
185 uint64_t x39;
186 uint64_t x40;
187 fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
188 uint64_t x41;
189 uint64_t x42;
190 fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
191 uint64_t x43;
192 uint64_t x44;
193 fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
194 uint64_t x45;
195 uint64_t x46;
196 fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
197 uint64_t x47;
198 fiat_p256_uint1 x48;
199 fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
200 uint64_t x49;
201 fiat_p256_uint1 x50;
202 fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
203 uint64_t x51;
204 fiat_p256_uint1 x52;
205 fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
206 uint64_t x53 = (x52 + x40);
207 uint64_t x54;
208 fiat_p256_uint1 x55;
209 fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
210 uint64_t x56;
211 fiat_p256_uint1 x57;
212 fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
213 uint64_t x58;
214 fiat_p256_uint1 x59;
215 fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
216 uint64_t x60;
217 fiat_p256_uint1 x61;
218 fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
219 uint64_t x62;
220 fiat_p256_uint1 x63;
221 fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
222 uint64_t x64;
223 uint64_t x65;
224 fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
225 uint64_t x66;
226 uint64_t x67;
227 fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
228 uint64_t x68;
229 uint64_t x69;
230 fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
231 uint64_t x70;
232 fiat_p256_uint1 x71;
233 fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
234 uint64_t x72 = (x71 + x67);
235 uint64_t x73;
236 fiat_p256_uint1 x74;
237 fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
238 uint64_t x75;
239 fiat_p256_uint1 x76;
240 fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
241 uint64_t x77;
242 fiat_p256_uint1 x78;
243 fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
244 uint64_t x79;
245 fiat_p256_uint1 x80;
246 fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
247 uint64_t x81;
248 fiat_p256_uint1 x82;
249 fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
250 uint64_t x83 = ((uint64_t)x82 + x63);
251 uint64_t x84;
252 uint64_t x85;
253 fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
254 uint64_t x86;
255 uint64_t x87;
256 fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
257 uint64_t x88;
258 uint64_t x89;
259 fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
260 uint64_t x90;
261 uint64_t x91;
262 fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
263 uint64_t x92;
264 fiat_p256_uint1 x93;
265 fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
266 uint64_t x94;
267 fiat_p256_uint1 x95;
268 fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
269 uint64_t x96;
270 fiat_p256_uint1 x97;
271 fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
272 uint64_t x98 = (x97 + x85);
273 uint64_t x99;
274 fiat_p256_uint1 x100;
275 fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
276 uint64_t x101;
277 fiat_p256_uint1 x102;
278 fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
279 uint64_t x103;
280 fiat_p256_uint1 x104;
281 fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
282 uint64_t x105;
283 fiat_p256_uint1 x106;
284 fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
285 uint64_t x107;
286 fiat_p256_uint1 x108;
287 fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
288 uint64_t x109;
289 uint64_t x110;
290 fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
291 uint64_t x111;
292 uint64_t x112;
293 fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
294 uint64_t x113;
295 uint64_t x114;
296 fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
297 uint64_t x115;
298 fiat_p256_uint1 x116;
299 fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
300 uint64_t x117 = (x116 + x112);
301 uint64_t x118;
302 fiat_p256_uint1 x119;
303 fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
304 uint64_t x120;
305 fiat_p256_uint1 x121;
306 fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
307 uint64_t x122;
308 fiat_p256_uint1 x123;
309 fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
310 uint64_t x124;
311 fiat_p256_uint1 x125;
312 fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
313 uint64_t x126;
314 fiat_p256_uint1 x127;
315 fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
316 uint64_t x128 = ((uint64_t)x127 + x108);
317 uint64_t x129;
318 uint64_t x130;
319 fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
320 uint64_t x131;
321 uint64_t x132;
322 fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
323 uint64_t x133;
324 uint64_t x134;
325 fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
326 uint64_t x135;
327 uint64_t x136;
328 fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
329 uint64_t x137;
330 fiat_p256_uint1 x138;
331 fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
332 uint64_t x139;
333 fiat_p256_uint1 x140;
334 fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
335 uint64_t x141;
336 fiat_p256_uint1 x142;
337 fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
338 uint64_t x143 = (x142 + x130);
339 uint64_t x144;
340 fiat_p256_uint1 x145;
341 fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
342 uint64_t x146;
343 fiat_p256_uint1 x147;
344 fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
345 uint64_t x148;
346 fiat_p256_uint1 x149;
347 fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
348 uint64_t x150;
349 fiat_p256_uint1 x151;
350 fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
351 uint64_t x152;
352 fiat_p256_uint1 x153;
353 fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
354 uint64_t x154;
355 uint64_t x155;
356 fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
357 uint64_t x156;
358 uint64_t x157;
359 fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
360 uint64_t x158;
361 uint64_t x159;
362 fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
363 uint64_t x160;
364 fiat_p256_uint1 x161;
365 fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
366 uint64_t x162 = (x161 + x157);
367 uint64_t x163;
368 fiat_p256_uint1 x164;
369 fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
370 uint64_t x165;
371 fiat_p256_uint1 x166;
372 fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
373 uint64_t x167;
374 fiat_p256_uint1 x168;
375 fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
376 uint64_t x169;
377 fiat_p256_uint1 x170;
378 fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
379 uint64_t x171;
380 fiat_p256_uint1 x172;
381 fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
382 uint64_t x173 = ((uint64_t)x172 + x153);
383 uint64_t x174;
384 fiat_p256_uint1 x175;
385 fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
386 uint64_t x176;
387 fiat_p256_uint1 x177;
388 fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
389 uint64_t x178;
390 fiat_p256_uint1 x179;
391 fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
392 uint64_t x180;
393 fiat_p256_uint1 x181;
394 fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
395 uint64_t x182;
396 fiat_p256_uint1 x183;
397 fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
398 uint64_t x184;
399 fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
400 uint64_t x185;
401 fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
402 uint64_t x186;
403 fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
404 uint64_t x187;
405 fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
406 out1[0] = x184;
407 out1[1] = x185;
408 out1[2] = x186;
409 out1[3] = x187;
410 }
411
412 /*
413 * The function fiat_p256_square squares a field element in the Montgomery domain.
414 * Preconditions:
415 * 0 ≤ eval arg1 < m
416 * Postconditions:
417 * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
418 * 0 ≤ eval out1 < m
419 *
420 * Input Bounds:
421 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
422 * Output Bounds:
423 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
424 */
fiat_p256_square(uint64_t out1[4],const uint64_t arg1[4])425 static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) {
426 uint64_t x1 = (arg1[1]);
427 uint64_t x2 = (arg1[2]);
428 uint64_t x3 = (arg1[3]);
429 uint64_t x4 = (arg1[0]);
430 uint64_t x5;
431 uint64_t x6;
432 fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
433 uint64_t x7;
434 uint64_t x8;
435 fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
436 uint64_t x9;
437 uint64_t x10;
438 fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
439 uint64_t x11;
440 uint64_t x12;
441 fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
442 uint64_t x13;
443 fiat_p256_uint1 x14;
444 fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
445 uint64_t x15;
446 fiat_p256_uint1 x16;
447 fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
448 uint64_t x17;
449 fiat_p256_uint1 x18;
450 fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
451 uint64_t x19 = (x18 + x6);
452 uint64_t x20;
453 uint64_t x21;
454 fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
455 uint64_t x22;
456 uint64_t x23;
457 fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
458 uint64_t x24;
459 uint64_t x25;
460 fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
461 uint64_t x26;
462 fiat_p256_uint1 x27;
463 fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
464 uint64_t x28 = (x27 + x23);
465 uint64_t x29;
466 fiat_p256_uint1 x30;
467 fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
468 uint64_t x31;
469 fiat_p256_uint1 x32;
470 fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
471 uint64_t x33;
472 fiat_p256_uint1 x34;
473 fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
474 uint64_t x35;
475 fiat_p256_uint1 x36;
476 fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
477 uint64_t x37;
478 fiat_p256_uint1 x38;
479 fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
480 uint64_t x39;
481 uint64_t x40;
482 fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
483 uint64_t x41;
484 uint64_t x42;
485 fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
486 uint64_t x43;
487 uint64_t x44;
488 fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
489 uint64_t x45;
490 uint64_t x46;
491 fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
492 uint64_t x47;
493 fiat_p256_uint1 x48;
494 fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
495 uint64_t x49;
496 fiat_p256_uint1 x50;
497 fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
498 uint64_t x51;
499 fiat_p256_uint1 x52;
500 fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
501 uint64_t x53 = (x52 + x40);
502 uint64_t x54;
503 fiat_p256_uint1 x55;
504 fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
505 uint64_t x56;
506 fiat_p256_uint1 x57;
507 fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
508 uint64_t x58;
509 fiat_p256_uint1 x59;
510 fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
511 uint64_t x60;
512 fiat_p256_uint1 x61;
513 fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
514 uint64_t x62;
515 fiat_p256_uint1 x63;
516 fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
517 uint64_t x64;
518 uint64_t x65;
519 fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
520 uint64_t x66;
521 uint64_t x67;
522 fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
523 uint64_t x68;
524 uint64_t x69;
525 fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
526 uint64_t x70;
527 fiat_p256_uint1 x71;
528 fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
529 uint64_t x72 = (x71 + x67);
530 uint64_t x73;
531 fiat_p256_uint1 x74;
532 fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
533 uint64_t x75;
534 fiat_p256_uint1 x76;
535 fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
536 uint64_t x77;
537 fiat_p256_uint1 x78;
538 fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
539 uint64_t x79;
540 fiat_p256_uint1 x80;
541 fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
542 uint64_t x81;
543 fiat_p256_uint1 x82;
544 fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
545 uint64_t x83 = ((uint64_t)x82 + x63);
546 uint64_t x84;
547 uint64_t x85;
548 fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
549 uint64_t x86;
550 uint64_t x87;
551 fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
552 uint64_t x88;
553 uint64_t x89;
554 fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
555 uint64_t x90;
556 uint64_t x91;
557 fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
558 uint64_t x92;
559 fiat_p256_uint1 x93;
560 fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
561 uint64_t x94;
562 fiat_p256_uint1 x95;
563 fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
564 uint64_t x96;
565 fiat_p256_uint1 x97;
566 fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
567 uint64_t x98 = (x97 + x85);
568 uint64_t x99;
569 fiat_p256_uint1 x100;
570 fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
571 uint64_t x101;
572 fiat_p256_uint1 x102;
573 fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
574 uint64_t x103;
575 fiat_p256_uint1 x104;
576 fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
577 uint64_t x105;
578 fiat_p256_uint1 x106;
579 fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
580 uint64_t x107;
581 fiat_p256_uint1 x108;
582 fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
583 uint64_t x109;
584 uint64_t x110;
585 fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
586 uint64_t x111;
587 uint64_t x112;
588 fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
589 uint64_t x113;
590 uint64_t x114;
591 fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
592 uint64_t x115;
593 fiat_p256_uint1 x116;
594 fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
595 uint64_t x117 = (x116 + x112);
596 uint64_t x118;
597 fiat_p256_uint1 x119;
598 fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
599 uint64_t x120;
600 fiat_p256_uint1 x121;
601 fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
602 uint64_t x122;
603 fiat_p256_uint1 x123;
604 fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
605 uint64_t x124;
606 fiat_p256_uint1 x125;
607 fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
608 uint64_t x126;
609 fiat_p256_uint1 x127;
610 fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
611 uint64_t x128 = ((uint64_t)x127 + x108);
612 uint64_t x129;
613 uint64_t x130;
614 fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
615 uint64_t x131;
616 uint64_t x132;
617 fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
618 uint64_t x133;
619 uint64_t x134;
620 fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
621 uint64_t x135;
622 uint64_t x136;
623 fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
624 uint64_t x137;
625 fiat_p256_uint1 x138;
626 fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
627 uint64_t x139;
628 fiat_p256_uint1 x140;
629 fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
630 uint64_t x141;
631 fiat_p256_uint1 x142;
632 fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
633 uint64_t x143 = (x142 + x130);
634 uint64_t x144;
635 fiat_p256_uint1 x145;
636 fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
637 uint64_t x146;
638 fiat_p256_uint1 x147;
639 fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
640 uint64_t x148;
641 fiat_p256_uint1 x149;
642 fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
643 uint64_t x150;
644 fiat_p256_uint1 x151;
645 fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
646 uint64_t x152;
647 fiat_p256_uint1 x153;
648 fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
649 uint64_t x154;
650 uint64_t x155;
651 fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
652 uint64_t x156;
653 uint64_t x157;
654 fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
655 uint64_t x158;
656 uint64_t x159;
657 fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
658 uint64_t x160;
659 fiat_p256_uint1 x161;
660 fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
661 uint64_t x162 = (x161 + x157);
662 uint64_t x163;
663 fiat_p256_uint1 x164;
664 fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
665 uint64_t x165;
666 fiat_p256_uint1 x166;
667 fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
668 uint64_t x167;
669 fiat_p256_uint1 x168;
670 fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
671 uint64_t x169;
672 fiat_p256_uint1 x170;
673 fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
674 uint64_t x171;
675 fiat_p256_uint1 x172;
676 fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
677 uint64_t x173 = ((uint64_t)x172 + x153);
678 uint64_t x174;
679 fiat_p256_uint1 x175;
680 fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
681 uint64_t x176;
682 fiat_p256_uint1 x177;
683 fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
684 uint64_t x178;
685 fiat_p256_uint1 x179;
686 fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
687 uint64_t x180;
688 fiat_p256_uint1 x181;
689 fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
690 uint64_t x182;
691 fiat_p256_uint1 x183;
692 fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
693 uint64_t x184;
694 fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
695 uint64_t x185;
696 fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
697 uint64_t x186;
698 fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
699 uint64_t x187;
700 fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
701 out1[0] = x184;
702 out1[1] = x185;
703 out1[2] = x186;
704 out1[3] = x187;
705 }
706
707 /*
708 * The function fiat_p256_add adds two field elements in the Montgomery domain.
709 * Preconditions:
710 * 0 ≤ eval arg1 < m
711 * 0 ≤ eval arg2 < m
712 * Postconditions:
713 * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
714 * 0 ≤ eval out1 < m
715 *
716 * Input Bounds:
717 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
718 * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
719 * Output Bounds:
720 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
721 */
fiat_p256_add(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])722 static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
723 uint64_t x1;
724 fiat_p256_uint1 x2;
725 fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
726 uint64_t x3;
727 fiat_p256_uint1 x4;
728 fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
729 uint64_t x5;
730 fiat_p256_uint1 x6;
731 fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
732 uint64_t x7;
733 fiat_p256_uint1 x8;
734 fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
735 uint64_t x9;
736 fiat_p256_uint1 x10;
737 fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
738 uint64_t x11;
739 fiat_p256_uint1 x12;
740 fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
741 uint64_t x13;
742 fiat_p256_uint1 x14;
743 fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
744 uint64_t x15;
745 fiat_p256_uint1 x16;
746 fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
747 uint64_t x17;
748 fiat_p256_uint1 x18;
749 fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
750 uint64_t x19;
751 fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
752 uint64_t x20;
753 fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
754 uint64_t x21;
755 fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
756 uint64_t x22;
757 fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
758 out1[0] = x19;
759 out1[1] = x20;
760 out1[2] = x21;
761 out1[3] = x22;
762 }
763
764 /*
765 * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
766 * Preconditions:
767 * 0 ≤ eval arg1 < m
768 * 0 ≤ eval arg2 < m
769 * Postconditions:
770 * eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
771 * 0 ≤ eval out1 < m
772 *
773 * Input Bounds:
774 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
775 * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
776 * Output Bounds:
777 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
778 */
fiat_p256_sub(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])779 static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
780 uint64_t x1;
781 fiat_p256_uint1 x2;
782 fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
783 uint64_t x3;
784 fiat_p256_uint1 x4;
785 fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
786 uint64_t x5;
787 fiat_p256_uint1 x6;
788 fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
789 uint64_t x7;
790 fiat_p256_uint1 x8;
791 fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
792 uint64_t x9;
793 fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
794 uint64_t x10;
795 fiat_p256_uint1 x11;
796 fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
797 uint64_t x12;
798 fiat_p256_uint1 x13;
799 fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
800 uint64_t x14;
801 fiat_p256_uint1 x15;
802 fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
803 uint64_t x16;
804 fiat_p256_uint1 x17;
805 fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
806 out1[0] = x10;
807 out1[1] = x12;
808 out1[2] = x14;
809 out1[3] = x16;
810 }
811
812 /*
813 * The function fiat_p256_opp negates a field element in the Montgomery domain.
814 * Preconditions:
815 * 0 ≤ eval arg1 < m
816 * Postconditions:
817 * eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
818 * 0 ≤ eval out1 < m
819 *
820 * Input Bounds:
821 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
822 * Output Bounds:
823 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
824 */
fiat_p256_opp(uint64_t out1[4],const uint64_t arg1[4])825 static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) {
826 uint64_t x1;
827 fiat_p256_uint1 x2;
828 fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
829 uint64_t x3;
830 fiat_p256_uint1 x4;
831 fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
832 uint64_t x5;
833 fiat_p256_uint1 x6;
834 fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
835 uint64_t x7;
836 fiat_p256_uint1 x8;
837 fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
838 uint64_t x9;
839 fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
840 uint64_t x10;
841 fiat_p256_uint1 x11;
842 fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
843 uint64_t x12;
844 fiat_p256_uint1 x13;
845 fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
846 uint64_t x14;
847 fiat_p256_uint1 x15;
848 fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
849 uint64_t x16;
850 fiat_p256_uint1 x17;
851 fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
852 out1[0] = x10;
853 out1[1] = x12;
854 out1[2] = x14;
855 out1[3] = x16;
856 }
857
858 /*
859 * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
860 * Preconditions:
861 * 0 ≤ eval arg1 < m
862 * Postconditions:
863 * eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
864 * 0 ≤ eval out1 < m
865 *
866 * Input Bounds:
867 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
868 * Output Bounds:
869 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
870 */
fiat_p256_from_montgomery(uint64_t out1[4],const uint64_t arg1[4])871 static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) {
872 uint64_t x1 = (arg1[0]);
873 uint64_t x2;
874 uint64_t x3;
875 fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001));
876 uint64_t x4;
877 uint64_t x5;
878 fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff));
879 uint64_t x6;
880 uint64_t x7;
881 fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff));
882 uint64_t x8;
883 fiat_p256_uint1 x9;
884 fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4);
885 uint64_t x10;
886 fiat_p256_uint1 x11;
887 fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6);
888 uint64_t x12;
889 fiat_p256_uint1 x13;
890 fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8);
891 uint64_t x14;
892 fiat_p256_uint1 x15;
893 fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1]));
894 uint64_t x16;
895 uint64_t x17;
896 fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001));
897 uint64_t x18;
898 uint64_t x19;
899 fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff));
900 uint64_t x20;
901 uint64_t x21;
902 fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff));
903 uint64_t x22;
904 fiat_p256_uint1 x23;
905 fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18);
906 uint64_t x24;
907 fiat_p256_uint1 x25;
908 fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20);
909 uint64_t x26;
910 fiat_p256_uint1 x27;
911 fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22);
912 uint64_t x28;
913 fiat_p256_uint1 x29;
914 fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19));
915 uint64_t x30;
916 fiat_p256_uint1 x31;
917 fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16);
918 uint64_t x32;
919 fiat_p256_uint1 x33;
920 fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2]));
921 uint64_t x34;
922 fiat_p256_uint1 x35;
923 fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0);
924 uint64_t x36;
925 fiat_p256_uint1 x37;
926 fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0);
927 uint64_t x38;
928 uint64_t x39;
929 fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001));
930 uint64_t x40;
931 uint64_t x41;
932 fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff));
933 uint64_t x42;
934 uint64_t x43;
935 fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff));
936 uint64_t x44;
937 fiat_p256_uint1 x45;
938 fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
939 uint64_t x46;
940 fiat_p256_uint1 x47;
941 fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42);
942 uint64_t x48;
943 fiat_p256_uint1 x49;
944 fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44);
945 uint64_t x50;
946 fiat_p256_uint1 x51;
947 fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41));
948 uint64_t x52;
949 fiat_p256_uint1 x53;
950 fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38);
951 uint64_t x54;
952 fiat_p256_uint1 x55;
953 fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3]));
954 uint64_t x56;
955 fiat_p256_uint1 x57;
956 fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0);
957 uint64_t x58;
958 fiat_p256_uint1 x59;
959 fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0);
960 uint64_t x60;
961 uint64_t x61;
962 fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001));
963 uint64_t x62;
964 uint64_t x63;
965 fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff));
966 uint64_t x64;
967 uint64_t x65;
968 fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff));
969 uint64_t x66;
970 fiat_p256_uint1 x67;
971 fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
972 uint64_t x68;
973 fiat_p256_uint1 x69;
974 fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64);
975 uint64_t x70;
976 fiat_p256_uint1 x71;
977 fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66);
978 uint64_t x72;
979 fiat_p256_uint1 x73;
980 fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63));
981 uint64_t x74;
982 fiat_p256_uint1 x75;
983 fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60);
984 uint64_t x76 = (x75 + x61);
985 uint64_t x77;
986 fiat_p256_uint1 x78;
987 fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff));
988 uint64_t x79;
989 fiat_p256_uint1 x80;
990 fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff));
991 uint64_t x81;
992 fiat_p256_uint1 x82;
993 fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0);
994 uint64_t x83;
995 fiat_p256_uint1 x84;
996 fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001));
997 uint64_t x85;
998 fiat_p256_uint1 x86;
999 fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0);
1000 uint64_t x87;
1001 fiat_p256_cmovznz_u64(&x87, x86, x77, x70);
1002 uint64_t x88;
1003 fiat_p256_cmovznz_u64(&x88, x86, x79, x72);
1004 uint64_t x89;
1005 fiat_p256_cmovznz_u64(&x89, x86, x81, x74);
1006 uint64_t x90;
1007 fiat_p256_cmovznz_u64(&x90, x86, x83, x76);
1008 out1[0] = x87;
1009 out1[1] = x88;
1010 out1[2] = x89;
1011 out1[3] = x90;
1012 }
1013
1014 /*
1015 * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
1016 * Preconditions:
1017 * 0 ≤ eval arg1 < m
1018 * Postconditions:
1019 * out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
1020 *
1021 * Input Bounds:
1022 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1023 * Output Bounds:
1024 * out1: [0x0 ~> 0xffffffffffffffff]
1025 */
fiat_p256_nonzero(uint64_t * out1,const uint64_t arg1[4])1026 static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
1027 uint64_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | (uint64_t)0x0))));
1028 *out1 = x1;
1029 }
1030
1031 /*
1032 * The function fiat_p256_selectznz is a multi-limb conditional select.
1033 * Postconditions:
1034 * eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
1035 *
1036 * Input Bounds:
1037 * arg1: [0x0 ~> 0x1]
1038 * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1039 * arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1040 * Output Bounds:
1041 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1042 */
fiat_p256_selectznz(uint64_t out1[4],fiat_p256_uint1 arg1,const uint64_t arg2[4],const uint64_t arg3[4])1043 static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
1044 uint64_t x1;
1045 fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
1046 uint64_t x2;
1047 fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
1048 uint64_t x3;
1049 fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
1050 uint64_t x4;
1051 fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
1052 out1[0] = x1;
1053 out1[1] = x2;
1054 out1[2] = x3;
1055 out1[3] = x4;
1056 }
1057
1058 /*
1059 * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order.
1060 * Preconditions:
1061 * 0 ≤ eval arg1 < m
1062 * Postconditions:
1063 * out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
1064 *
1065 * Input Bounds:
1066 * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1067 * Output Bounds:
1068 * out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1069 */
fiat_p256_to_bytes(uint8_t out1[32],const uint64_t arg1[4])1070 static void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) {
1071 uint64_t x1 = (arg1[3]);
1072 uint64_t x2 = (arg1[2]);
1073 uint64_t x3 = (arg1[1]);
1074 uint64_t x4 = (arg1[0]);
1075 uint64_t x5 = (x4 >> 8);
1076 uint8_t x6 = (uint8_t)(x4 & UINT8_C(0xff));
1077 uint64_t x7 = (x5 >> 8);
1078 uint8_t x8 = (uint8_t)(x5 & UINT8_C(0xff));
1079 uint64_t x9 = (x7 >> 8);
1080 uint8_t x10 = (uint8_t)(x7 & UINT8_C(0xff));
1081 uint64_t x11 = (x9 >> 8);
1082 uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff));
1083 uint64_t x13 = (x11 >> 8);
1084 uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff));
1085 uint64_t x15 = (x13 >> 8);
1086 uint8_t x16 = (uint8_t)(x13 & UINT8_C(0xff));
1087 uint8_t x17 = (uint8_t)(x15 >> 8);
1088 uint8_t x18 = (uint8_t)(x15 & UINT8_C(0xff));
1089 uint8_t x19 = (uint8_t)(x17 & UINT8_C(0xff));
1090 uint64_t x20 = (x3 >> 8);
1091 uint8_t x21 = (uint8_t)(x3 & UINT8_C(0xff));
1092 uint64_t x22 = (x20 >> 8);
1093 uint8_t x23 = (uint8_t)(x20 & UINT8_C(0xff));
1094 uint64_t x24 = (x22 >> 8);
1095 uint8_t x25 = (uint8_t)(x22 & UINT8_C(0xff));
1096 uint64_t x26 = (x24 >> 8);
1097 uint8_t x27 = (uint8_t)(x24 & UINT8_C(0xff));
1098 uint64_t x28 = (x26 >> 8);
1099 uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff));
1100 uint64_t x30 = (x28 >> 8);
1101 uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff));
1102 uint8_t x32 = (uint8_t)(x30 >> 8);
1103 uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
1104 uint8_t x34 = (uint8_t)(x32 & UINT8_C(0xff));
1105 uint64_t x35 = (x2 >> 8);
1106 uint8_t x36 = (uint8_t)(x2 & UINT8_C(0xff));
1107 uint64_t x37 = (x35 >> 8);
1108 uint8_t x38 = (uint8_t)(x35 & UINT8_C(0xff));
1109 uint64_t x39 = (x37 >> 8);
1110 uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff));
1111 uint64_t x41 = (x39 >> 8);
1112 uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
1113 uint64_t x43 = (x41 >> 8);
1114 uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff));
1115 uint64_t x45 = (x43 >> 8);
1116 uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff));
1117 uint8_t x47 = (uint8_t)(x45 >> 8);
1118 uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff));
1119 uint8_t x49 = (uint8_t)(x47 & UINT8_C(0xff));
1120 uint64_t x50 = (x1 >> 8);
1121 uint8_t x51 = (uint8_t)(x1 & UINT8_C(0xff));
1122 uint64_t x52 = (x50 >> 8);
1123 uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
1124 uint64_t x54 = (x52 >> 8);
1125 uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
1126 uint64_t x56 = (x54 >> 8);
1127 uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff));
1128 uint64_t x58 = (x56 >> 8);
1129 uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff));
1130 uint64_t x60 = (x58 >> 8);
1131 uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
1132 uint8_t x62 = (uint8_t)(x60 >> 8);
1133 uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
1134 out1[0] = x6;
1135 out1[1] = x8;
1136 out1[2] = x10;
1137 out1[3] = x12;
1138 out1[4] = x14;
1139 out1[5] = x16;
1140 out1[6] = x18;
1141 out1[7] = x19;
1142 out1[8] = x21;
1143 out1[9] = x23;
1144 out1[10] = x25;
1145 out1[11] = x27;
1146 out1[12] = x29;
1147 out1[13] = x31;
1148 out1[14] = x33;
1149 out1[15] = x34;
1150 out1[16] = x36;
1151 out1[17] = x38;
1152 out1[18] = x40;
1153 out1[19] = x42;
1154 out1[20] = x44;
1155 out1[21] = x46;
1156 out1[22] = x48;
1157 out1[23] = x49;
1158 out1[24] = x51;
1159 out1[25] = x53;
1160 out1[26] = x55;
1161 out1[27] = x57;
1162 out1[28] = x59;
1163 out1[29] = x61;
1164 out1[30] = x63;
1165 out1[31] = x62;
1166 }
1167
1168 /*
1169 * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order.
1170 * Preconditions:
1171 * 0 ≤ bytes_eval arg1 < m
1172 * Postconditions:
1173 * eval out1 mod m = bytes_eval arg1 mod m
1174 * 0 ≤ eval out1 < m
1175 *
1176 * Input Bounds:
1177 * arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1178 * Output Bounds:
1179 * out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1180 */
fiat_p256_from_bytes(uint64_t out1[4],const uint8_t arg1[32])1181 static void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) {
1182 uint64_t x1 = ((uint64_t)(arg1[31]) << 56);
1183 uint64_t x2 = ((uint64_t)(arg1[30]) << 48);
1184 uint64_t x3 = ((uint64_t)(arg1[29]) << 40);
1185 uint64_t x4 = ((uint64_t)(arg1[28]) << 32);
1186 uint64_t x5 = ((uint64_t)(arg1[27]) << 24);
1187 uint64_t x6 = ((uint64_t)(arg1[26]) << 16);
1188 uint64_t x7 = ((uint64_t)(arg1[25]) << 8);
1189 uint8_t x8 = (arg1[24]);
1190 uint64_t x9 = ((uint64_t)(arg1[23]) << 56);
1191 uint64_t x10 = ((uint64_t)(arg1[22]) << 48);
1192 uint64_t x11 = ((uint64_t)(arg1[21]) << 40);
1193 uint64_t x12 = ((uint64_t)(arg1[20]) << 32);
1194 uint64_t x13 = ((uint64_t)(arg1[19]) << 24);
1195 uint64_t x14 = ((uint64_t)(arg1[18]) << 16);
1196 uint64_t x15 = ((uint64_t)(arg1[17]) << 8);
1197 uint8_t x16 = (arg1[16]);
1198 uint64_t x17 = ((uint64_t)(arg1[15]) << 56);
1199 uint64_t x18 = ((uint64_t)(arg1[14]) << 48);
1200 uint64_t x19 = ((uint64_t)(arg1[13]) << 40);
1201 uint64_t x20 = ((uint64_t)(arg1[12]) << 32);
1202 uint64_t x21 = ((uint64_t)(arg1[11]) << 24);
1203 uint64_t x22 = ((uint64_t)(arg1[10]) << 16);
1204 uint64_t x23 = ((uint64_t)(arg1[9]) << 8);
1205 uint8_t x24 = (arg1[8]);
1206 uint64_t x25 = ((uint64_t)(arg1[7]) << 56);
1207 uint64_t x26 = ((uint64_t)(arg1[6]) << 48);
1208 uint64_t x27 = ((uint64_t)(arg1[5]) << 40);
1209 uint64_t x28 = ((uint64_t)(arg1[4]) << 32);
1210 uint64_t x29 = ((uint64_t)(arg1[3]) << 24);
1211 uint64_t x30 = ((uint64_t)(arg1[2]) << 16);
1212 uint64_t x31 = ((uint64_t)(arg1[1]) << 8);
1213 uint8_t x32 = (arg1[0]);
1214 uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + (x26 + x25)))))));
1215 uint64_t x34 = (x33 & UINT64_C(0xffffffffffffffff));
1216 uint64_t x35 = (x8 + (x7 + (x6 + (x5 + (x4 + (x3 + (x2 + x1)))))));
1217 uint64_t x36 = (x16 + (x15 + (x14 + (x13 + (x12 + (x11 + (x10 + x9)))))));
1218 uint64_t x37 = (x24 + (x23 + (x22 + (x21 + (x20 + (x19 + (x18 + x17)))))));
1219 uint64_t x38 = (x37 & UINT64_C(0xffffffffffffffff));
1220 uint64_t x39 = (x36 & UINT64_C(0xffffffffffffffff));
1221 out1[0] = x34;
1222 out1[1] = x38;
1223 out1[2] = x39;
1224 out1[3] = x35;
1225 }
1226
1227