• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 64 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
2 /* curve description: p256 */
3 /* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
4 /* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
5 /* machine_wordsize = 64 (from "64") */
6 /*                                                                    */
7 /* NOTE: In addition to the bounds specified above each function, all */
8 /*   functions synthesized for this Montgomery arithmetic require the */
9 /*   input to be strictly less than the prime modulus (m), and also   */
10 /*   require the input to be in the unique saturated representation.  */
11 /*   All functions also ensure that these two properties are true of  */
12 /*   return values.                                                   */
13 
14 #include <stdint.h>
15 typedef unsigned char fiat_p256_uint1;
16 typedef signed char fiat_p256_int1;
17 typedef signed __int128 fiat_p256_int128;
18 typedef unsigned __int128 fiat_p256_uint128;
19 
20 #if (-1 & 3) != 3
21 #error "This code only works on a two's complement system"
22 #endif
23 
24 
25 /*
26  * The function fiat_p256_addcarryx_u64 is an addition with carry.
27  * Postconditions:
28  *   out1 = (arg1 + arg2 + arg3) mod 2^64
29  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
30  *
31  * Input Bounds:
32  *   arg1: [0x0 ~> 0x1]
33  *   arg2: [0x0 ~> 0xffffffffffffffff]
34  *   arg3: [0x0 ~> 0xffffffffffffffff]
35  * Output Bounds:
36  *   out1: [0x0 ~> 0xffffffffffffffff]
37  *   out2: [0x0 ~> 0x1]
38  */
fiat_p256_addcarryx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)39 static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
40   fiat_p256_uint128 x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
41   uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
42   fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 64);
43   *out1 = x2;
44   *out2 = x3;
45 }
46 
47 /*
48  * The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
49  * Postconditions:
50  *   out1 = (-arg1 + arg2 + -arg3) mod 2^64
51  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
52  *
53  * Input Bounds:
54  *   arg1: [0x0 ~> 0x1]
55  *   arg2: [0x0 ~> 0xffffffffffffffff]
56  *   arg3: [0x0 ~> 0xffffffffffffffff]
57  * Output Bounds:
58  *   out1: [0x0 ~> 0xffffffffffffffff]
59  *   out2: [0x0 ~> 0x1]
60  */
fiat_p256_subborrowx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)61 static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
62   fiat_p256_int128 x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
63   fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 64);
64   uint64_t x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
65   *out1 = x3;
66   *out2 = (fiat_p256_uint1)(0x0 - x2);
67 }
68 
69 /*
70  * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
71  * Postconditions:
72  *   out1 = (arg1 * arg2) mod 2^64
73  *   out2 = ⌊arg1 * arg2 / 2^64⌋
74  *
75  * Input Bounds:
76  *   arg1: [0x0 ~> 0xffffffffffffffff]
77  *   arg2: [0x0 ~> 0xffffffffffffffff]
78  * Output Bounds:
79  *   out1: [0x0 ~> 0xffffffffffffffff]
80  *   out2: [0x0 ~> 0xffffffffffffffff]
81  */
fiat_p256_mulx_u64(uint64_t * out1,uint64_t * out2,uint64_t arg1,uint64_t arg2)82 static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
83   fiat_p256_uint128 x1 = ((fiat_p256_uint128)arg1 * arg2);
84   uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
85   uint64_t x3 = (uint64_t)(x1 >> 64);
86   *out1 = x2;
87   *out2 = x3;
88 }
89 
90 /*
91  * The function fiat_p256_cmovznz_u64 is a single-word conditional move.
92  * Postconditions:
93  *   out1 = (if arg1 = 0 then arg2 else arg3)
94  *
95  * Input Bounds:
96  *   arg1: [0x0 ~> 0x1]
97  *   arg2: [0x0 ~> 0xffffffffffffffff]
98  *   arg3: [0x0 ~> 0xffffffffffffffff]
99  * Output Bounds:
100  *   out1: [0x0 ~> 0xffffffffffffffff]
101  */
fiat_p256_cmovznz_u64(uint64_t * out1,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)102 static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
103   fiat_p256_uint1 x1 = (!(!arg1));
104   uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
105   // Note this line has been patched from the synthesized code to add value
106   // barriers.
107   //
108   // Clang recognizes this pattern as a select. While it usually transforms it
109   // to a cmov, it sometimes further transforms it into a branch, which we do
110   // not want.
111   uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
112   *out1 = x3;
113 }
114 
115 /*
116  * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
117  * Preconditions:
118  *   0 ≤ eval arg1 < m
119  *   0 ≤ eval arg2 < m
120  * Postconditions:
121  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
122  *   0 ≤ eval out1 < m
123  *
124  * Input Bounds:
125  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
126  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
127  * Output Bounds:
128  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
129  */
fiat_p256_mul(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])130 static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
131   uint64_t x1 = (arg1[1]);
132   uint64_t x2 = (arg1[2]);
133   uint64_t x3 = (arg1[3]);
134   uint64_t x4 = (arg1[0]);
135   uint64_t x5;
136   uint64_t x6;
137   fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
138   uint64_t x7;
139   uint64_t x8;
140   fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
141   uint64_t x9;
142   uint64_t x10;
143   fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
144   uint64_t x11;
145   uint64_t x12;
146   fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
147   uint64_t x13;
148   fiat_p256_uint1 x14;
149   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
150   uint64_t x15;
151   fiat_p256_uint1 x16;
152   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
153   uint64_t x17;
154   fiat_p256_uint1 x18;
155   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
156   uint64_t x19 = (x18 + x6);
157   uint64_t x20;
158   uint64_t x21;
159   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
160   uint64_t x22;
161   uint64_t x23;
162   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
163   uint64_t x24;
164   uint64_t x25;
165   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
166   uint64_t x26;
167   fiat_p256_uint1 x27;
168   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
169   uint64_t x28 = (x27 + x23);
170   uint64_t x29;
171   fiat_p256_uint1 x30;
172   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
173   uint64_t x31;
174   fiat_p256_uint1 x32;
175   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
176   uint64_t x33;
177   fiat_p256_uint1 x34;
178   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
179   uint64_t x35;
180   fiat_p256_uint1 x36;
181   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
182   uint64_t x37;
183   fiat_p256_uint1 x38;
184   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
185   uint64_t x39;
186   uint64_t x40;
187   fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
188   uint64_t x41;
189   uint64_t x42;
190   fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
191   uint64_t x43;
192   uint64_t x44;
193   fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
194   uint64_t x45;
195   uint64_t x46;
196   fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
197   uint64_t x47;
198   fiat_p256_uint1 x48;
199   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
200   uint64_t x49;
201   fiat_p256_uint1 x50;
202   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
203   uint64_t x51;
204   fiat_p256_uint1 x52;
205   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
206   uint64_t x53 = (x52 + x40);
207   uint64_t x54;
208   fiat_p256_uint1 x55;
209   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
210   uint64_t x56;
211   fiat_p256_uint1 x57;
212   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
213   uint64_t x58;
214   fiat_p256_uint1 x59;
215   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
216   uint64_t x60;
217   fiat_p256_uint1 x61;
218   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
219   uint64_t x62;
220   fiat_p256_uint1 x63;
221   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
222   uint64_t x64;
223   uint64_t x65;
224   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
225   uint64_t x66;
226   uint64_t x67;
227   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
228   uint64_t x68;
229   uint64_t x69;
230   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
231   uint64_t x70;
232   fiat_p256_uint1 x71;
233   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
234   uint64_t x72 = (x71 + x67);
235   uint64_t x73;
236   fiat_p256_uint1 x74;
237   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
238   uint64_t x75;
239   fiat_p256_uint1 x76;
240   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
241   uint64_t x77;
242   fiat_p256_uint1 x78;
243   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
244   uint64_t x79;
245   fiat_p256_uint1 x80;
246   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
247   uint64_t x81;
248   fiat_p256_uint1 x82;
249   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
250   uint64_t x83 = ((uint64_t)x82 + x63);
251   uint64_t x84;
252   uint64_t x85;
253   fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
254   uint64_t x86;
255   uint64_t x87;
256   fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
257   uint64_t x88;
258   uint64_t x89;
259   fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
260   uint64_t x90;
261   uint64_t x91;
262   fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
263   uint64_t x92;
264   fiat_p256_uint1 x93;
265   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
266   uint64_t x94;
267   fiat_p256_uint1 x95;
268   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
269   uint64_t x96;
270   fiat_p256_uint1 x97;
271   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
272   uint64_t x98 = (x97 + x85);
273   uint64_t x99;
274   fiat_p256_uint1 x100;
275   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
276   uint64_t x101;
277   fiat_p256_uint1 x102;
278   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
279   uint64_t x103;
280   fiat_p256_uint1 x104;
281   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
282   uint64_t x105;
283   fiat_p256_uint1 x106;
284   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
285   uint64_t x107;
286   fiat_p256_uint1 x108;
287   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
288   uint64_t x109;
289   uint64_t x110;
290   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
291   uint64_t x111;
292   uint64_t x112;
293   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
294   uint64_t x113;
295   uint64_t x114;
296   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
297   uint64_t x115;
298   fiat_p256_uint1 x116;
299   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
300   uint64_t x117 = (x116 + x112);
301   uint64_t x118;
302   fiat_p256_uint1 x119;
303   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
304   uint64_t x120;
305   fiat_p256_uint1 x121;
306   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
307   uint64_t x122;
308   fiat_p256_uint1 x123;
309   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
310   uint64_t x124;
311   fiat_p256_uint1 x125;
312   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
313   uint64_t x126;
314   fiat_p256_uint1 x127;
315   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
316   uint64_t x128 = ((uint64_t)x127 + x108);
317   uint64_t x129;
318   uint64_t x130;
319   fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
320   uint64_t x131;
321   uint64_t x132;
322   fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
323   uint64_t x133;
324   uint64_t x134;
325   fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
326   uint64_t x135;
327   uint64_t x136;
328   fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
329   uint64_t x137;
330   fiat_p256_uint1 x138;
331   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
332   uint64_t x139;
333   fiat_p256_uint1 x140;
334   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
335   uint64_t x141;
336   fiat_p256_uint1 x142;
337   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
338   uint64_t x143 = (x142 + x130);
339   uint64_t x144;
340   fiat_p256_uint1 x145;
341   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
342   uint64_t x146;
343   fiat_p256_uint1 x147;
344   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
345   uint64_t x148;
346   fiat_p256_uint1 x149;
347   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
348   uint64_t x150;
349   fiat_p256_uint1 x151;
350   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
351   uint64_t x152;
352   fiat_p256_uint1 x153;
353   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
354   uint64_t x154;
355   uint64_t x155;
356   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
357   uint64_t x156;
358   uint64_t x157;
359   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
360   uint64_t x158;
361   uint64_t x159;
362   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
363   uint64_t x160;
364   fiat_p256_uint1 x161;
365   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
366   uint64_t x162 = (x161 + x157);
367   uint64_t x163;
368   fiat_p256_uint1 x164;
369   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
370   uint64_t x165;
371   fiat_p256_uint1 x166;
372   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
373   uint64_t x167;
374   fiat_p256_uint1 x168;
375   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
376   uint64_t x169;
377   fiat_p256_uint1 x170;
378   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
379   uint64_t x171;
380   fiat_p256_uint1 x172;
381   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
382   uint64_t x173 = ((uint64_t)x172 + x153);
383   uint64_t x174;
384   fiat_p256_uint1 x175;
385   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
386   uint64_t x176;
387   fiat_p256_uint1 x177;
388   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
389   uint64_t x178;
390   fiat_p256_uint1 x179;
391   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
392   uint64_t x180;
393   fiat_p256_uint1 x181;
394   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
395   uint64_t x182;
396   fiat_p256_uint1 x183;
397   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
398   uint64_t x184;
399   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
400   uint64_t x185;
401   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
402   uint64_t x186;
403   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
404   uint64_t x187;
405   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
406   out1[0] = x184;
407   out1[1] = x185;
408   out1[2] = x186;
409   out1[3] = x187;
410 }
411 
412 /*
413  * The function fiat_p256_square squares a field element in the Montgomery domain.
414  * Preconditions:
415  *   0 ≤ eval arg1 < m
416  * Postconditions:
417  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
418  *   0 ≤ eval out1 < m
419  *
420  * Input Bounds:
421  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
422  * Output Bounds:
423  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
424  */
fiat_p256_square(uint64_t out1[4],const uint64_t arg1[4])425 static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) {
426   uint64_t x1 = (arg1[1]);
427   uint64_t x2 = (arg1[2]);
428   uint64_t x3 = (arg1[3]);
429   uint64_t x4 = (arg1[0]);
430   uint64_t x5;
431   uint64_t x6;
432   fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
433   uint64_t x7;
434   uint64_t x8;
435   fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
436   uint64_t x9;
437   uint64_t x10;
438   fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
439   uint64_t x11;
440   uint64_t x12;
441   fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
442   uint64_t x13;
443   fiat_p256_uint1 x14;
444   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
445   uint64_t x15;
446   fiat_p256_uint1 x16;
447   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
448   uint64_t x17;
449   fiat_p256_uint1 x18;
450   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
451   uint64_t x19 = (x18 + x6);
452   uint64_t x20;
453   uint64_t x21;
454   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
455   uint64_t x22;
456   uint64_t x23;
457   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
458   uint64_t x24;
459   uint64_t x25;
460   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
461   uint64_t x26;
462   fiat_p256_uint1 x27;
463   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
464   uint64_t x28 = (x27 + x23);
465   uint64_t x29;
466   fiat_p256_uint1 x30;
467   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
468   uint64_t x31;
469   fiat_p256_uint1 x32;
470   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
471   uint64_t x33;
472   fiat_p256_uint1 x34;
473   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
474   uint64_t x35;
475   fiat_p256_uint1 x36;
476   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
477   uint64_t x37;
478   fiat_p256_uint1 x38;
479   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
480   uint64_t x39;
481   uint64_t x40;
482   fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
483   uint64_t x41;
484   uint64_t x42;
485   fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
486   uint64_t x43;
487   uint64_t x44;
488   fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
489   uint64_t x45;
490   uint64_t x46;
491   fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
492   uint64_t x47;
493   fiat_p256_uint1 x48;
494   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
495   uint64_t x49;
496   fiat_p256_uint1 x50;
497   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
498   uint64_t x51;
499   fiat_p256_uint1 x52;
500   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
501   uint64_t x53 = (x52 + x40);
502   uint64_t x54;
503   fiat_p256_uint1 x55;
504   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
505   uint64_t x56;
506   fiat_p256_uint1 x57;
507   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
508   uint64_t x58;
509   fiat_p256_uint1 x59;
510   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
511   uint64_t x60;
512   fiat_p256_uint1 x61;
513   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
514   uint64_t x62;
515   fiat_p256_uint1 x63;
516   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
517   uint64_t x64;
518   uint64_t x65;
519   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
520   uint64_t x66;
521   uint64_t x67;
522   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
523   uint64_t x68;
524   uint64_t x69;
525   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
526   uint64_t x70;
527   fiat_p256_uint1 x71;
528   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
529   uint64_t x72 = (x71 + x67);
530   uint64_t x73;
531   fiat_p256_uint1 x74;
532   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
533   uint64_t x75;
534   fiat_p256_uint1 x76;
535   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
536   uint64_t x77;
537   fiat_p256_uint1 x78;
538   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
539   uint64_t x79;
540   fiat_p256_uint1 x80;
541   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
542   uint64_t x81;
543   fiat_p256_uint1 x82;
544   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
545   uint64_t x83 = ((uint64_t)x82 + x63);
546   uint64_t x84;
547   uint64_t x85;
548   fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
549   uint64_t x86;
550   uint64_t x87;
551   fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
552   uint64_t x88;
553   uint64_t x89;
554   fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
555   uint64_t x90;
556   uint64_t x91;
557   fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
558   uint64_t x92;
559   fiat_p256_uint1 x93;
560   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
561   uint64_t x94;
562   fiat_p256_uint1 x95;
563   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
564   uint64_t x96;
565   fiat_p256_uint1 x97;
566   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
567   uint64_t x98 = (x97 + x85);
568   uint64_t x99;
569   fiat_p256_uint1 x100;
570   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
571   uint64_t x101;
572   fiat_p256_uint1 x102;
573   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
574   uint64_t x103;
575   fiat_p256_uint1 x104;
576   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
577   uint64_t x105;
578   fiat_p256_uint1 x106;
579   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
580   uint64_t x107;
581   fiat_p256_uint1 x108;
582   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
583   uint64_t x109;
584   uint64_t x110;
585   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
586   uint64_t x111;
587   uint64_t x112;
588   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
589   uint64_t x113;
590   uint64_t x114;
591   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
592   uint64_t x115;
593   fiat_p256_uint1 x116;
594   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
595   uint64_t x117 = (x116 + x112);
596   uint64_t x118;
597   fiat_p256_uint1 x119;
598   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
599   uint64_t x120;
600   fiat_p256_uint1 x121;
601   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
602   uint64_t x122;
603   fiat_p256_uint1 x123;
604   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
605   uint64_t x124;
606   fiat_p256_uint1 x125;
607   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
608   uint64_t x126;
609   fiat_p256_uint1 x127;
610   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
611   uint64_t x128 = ((uint64_t)x127 + x108);
612   uint64_t x129;
613   uint64_t x130;
614   fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
615   uint64_t x131;
616   uint64_t x132;
617   fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
618   uint64_t x133;
619   uint64_t x134;
620   fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
621   uint64_t x135;
622   uint64_t x136;
623   fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
624   uint64_t x137;
625   fiat_p256_uint1 x138;
626   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
627   uint64_t x139;
628   fiat_p256_uint1 x140;
629   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
630   uint64_t x141;
631   fiat_p256_uint1 x142;
632   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
633   uint64_t x143 = (x142 + x130);
634   uint64_t x144;
635   fiat_p256_uint1 x145;
636   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
637   uint64_t x146;
638   fiat_p256_uint1 x147;
639   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
640   uint64_t x148;
641   fiat_p256_uint1 x149;
642   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
643   uint64_t x150;
644   fiat_p256_uint1 x151;
645   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
646   uint64_t x152;
647   fiat_p256_uint1 x153;
648   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
649   uint64_t x154;
650   uint64_t x155;
651   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
652   uint64_t x156;
653   uint64_t x157;
654   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
655   uint64_t x158;
656   uint64_t x159;
657   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
658   uint64_t x160;
659   fiat_p256_uint1 x161;
660   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
661   uint64_t x162 = (x161 + x157);
662   uint64_t x163;
663   fiat_p256_uint1 x164;
664   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
665   uint64_t x165;
666   fiat_p256_uint1 x166;
667   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
668   uint64_t x167;
669   fiat_p256_uint1 x168;
670   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
671   uint64_t x169;
672   fiat_p256_uint1 x170;
673   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
674   uint64_t x171;
675   fiat_p256_uint1 x172;
676   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
677   uint64_t x173 = ((uint64_t)x172 + x153);
678   uint64_t x174;
679   fiat_p256_uint1 x175;
680   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
681   uint64_t x176;
682   fiat_p256_uint1 x177;
683   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
684   uint64_t x178;
685   fiat_p256_uint1 x179;
686   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
687   uint64_t x180;
688   fiat_p256_uint1 x181;
689   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
690   uint64_t x182;
691   fiat_p256_uint1 x183;
692   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
693   uint64_t x184;
694   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
695   uint64_t x185;
696   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
697   uint64_t x186;
698   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
699   uint64_t x187;
700   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
701   out1[0] = x184;
702   out1[1] = x185;
703   out1[2] = x186;
704   out1[3] = x187;
705 }
706 
707 /*
708  * The function fiat_p256_add adds two field elements in the Montgomery domain.
709  * Preconditions:
710  *   0 ≤ eval arg1 < m
711  *   0 ≤ eval arg2 < m
712  * Postconditions:
713  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
714  *   0 ≤ eval out1 < m
715  *
716  * Input Bounds:
717  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
718  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
719  * Output Bounds:
720  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
721  */
fiat_p256_add(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])722 static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
723   uint64_t x1;
724   fiat_p256_uint1 x2;
725   fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
726   uint64_t x3;
727   fiat_p256_uint1 x4;
728   fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
729   uint64_t x5;
730   fiat_p256_uint1 x6;
731   fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
732   uint64_t x7;
733   fiat_p256_uint1 x8;
734   fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
735   uint64_t x9;
736   fiat_p256_uint1 x10;
737   fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
738   uint64_t x11;
739   fiat_p256_uint1 x12;
740   fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
741   uint64_t x13;
742   fiat_p256_uint1 x14;
743   fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
744   uint64_t x15;
745   fiat_p256_uint1 x16;
746   fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
747   uint64_t x17;
748   fiat_p256_uint1 x18;
749   fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
750   uint64_t x19;
751   fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
752   uint64_t x20;
753   fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
754   uint64_t x21;
755   fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
756   uint64_t x22;
757   fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
758   out1[0] = x19;
759   out1[1] = x20;
760   out1[2] = x21;
761   out1[3] = x22;
762 }
763 
764 /*
765  * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
766  * Preconditions:
767  *   0 ≤ eval arg1 < m
768  *   0 ≤ eval arg2 < m
769  * Postconditions:
770  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
771  *   0 ≤ eval out1 < m
772  *
773  * Input Bounds:
774  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
775  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
776  * Output Bounds:
777  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
778  */
fiat_p256_sub(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])779 static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
780   uint64_t x1;
781   fiat_p256_uint1 x2;
782   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
783   uint64_t x3;
784   fiat_p256_uint1 x4;
785   fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
786   uint64_t x5;
787   fiat_p256_uint1 x6;
788   fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
789   uint64_t x7;
790   fiat_p256_uint1 x8;
791   fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
792   uint64_t x9;
793   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
794   uint64_t x10;
795   fiat_p256_uint1 x11;
796   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
797   uint64_t x12;
798   fiat_p256_uint1 x13;
799   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
800   uint64_t x14;
801   fiat_p256_uint1 x15;
802   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
803   uint64_t x16;
804   fiat_p256_uint1 x17;
805   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
806   out1[0] = x10;
807   out1[1] = x12;
808   out1[2] = x14;
809   out1[3] = x16;
810 }
811 
812 /*
813  * The function fiat_p256_opp negates a field element in the Montgomery domain.
814  * Preconditions:
815  *   0 ≤ eval arg1 < m
816  * Postconditions:
817  *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
818  *   0 ≤ eval out1 < m
819  *
820  * Input Bounds:
821  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
822  * Output Bounds:
823  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
824  */
fiat_p256_opp(uint64_t out1[4],const uint64_t arg1[4])825 static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) {
826   uint64_t x1;
827   fiat_p256_uint1 x2;
828   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
829   uint64_t x3;
830   fiat_p256_uint1 x4;
831   fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
832   uint64_t x5;
833   fiat_p256_uint1 x6;
834   fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
835   uint64_t x7;
836   fiat_p256_uint1 x8;
837   fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
838   uint64_t x9;
839   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
840   uint64_t x10;
841   fiat_p256_uint1 x11;
842   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
843   uint64_t x12;
844   fiat_p256_uint1 x13;
845   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
846   uint64_t x14;
847   fiat_p256_uint1 x15;
848   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
849   uint64_t x16;
850   fiat_p256_uint1 x17;
851   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
852   out1[0] = x10;
853   out1[1] = x12;
854   out1[2] = x14;
855   out1[3] = x16;
856 }
857 
858 /*
859  * The function fiat_p256_from_montgomery translates a field element out of the Montgomery domain.
860  * Preconditions:
861  *   0 ≤ eval arg1 < m
862  * Postconditions:
863  *   eval out1 mod m = (eval arg1 * ((2^64)⁻¹ mod m)^4) mod m
864  *   0 ≤ eval out1 < m
865  *
866  * Input Bounds:
867  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
868  * Output Bounds:
869  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
870  */
fiat_p256_from_montgomery(uint64_t out1[4],const uint64_t arg1[4])871 static void fiat_p256_from_montgomery(uint64_t out1[4], const uint64_t arg1[4]) {
872   uint64_t x1 = (arg1[0]);
873   uint64_t x2;
874   uint64_t x3;
875   fiat_p256_mulx_u64(&x2, &x3, x1, UINT64_C(0xffffffff00000001));
876   uint64_t x4;
877   uint64_t x5;
878   fiat_p256_mulx_u64(&x4, &x5, x1, UINT32_C(0xffffffff));
879   uint64_t x6;
880   uint64_t x7;
881   fiat_p256_mulx_u64(&x6, &x7, x1, UINT64_C(0xffffffffffffffff));
882   uint64_t x8;
883   fiat_p256_uint1 x9;
884   fiat_p256_addcarryx_u64(&x8, &x9, 0x0, x7, x4);
885   uint64_t x10;
886   fiat_p256_uint1 x11;
887   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, x6);
888   uint64_t x12;
889   fiat_p256_uint1 x13;
890   fiat_p256_addcarryx_u64(&x12, &x13, x11, 0x0, x8);
891   uint64_t x14;
892   fiat_p256_uint1 x15;
893   fiat_p256_addcarryx_u64(&x14, &x15, 0x0, x12, (arg1[1]));
894   uint64_t x16;
895   uint64_t x17;
896   fiat_p256_mulx_u64(&x16, &x17, x14, UINT64_C(0xffffffff00000001));
897   uint64_t x18;
898   uint64_t x19;
899   fiat_p256_mulx_u64(&x18, &x19, x14, UINT32_C(0xffffffff));
900   uint64_t x20;
901   uint64_t x21;
902   fiat_p256_mulx_u64(&x20, &x21, x14, UINT64_C(0xffffffffffffffff));
903   uint64_t x22;
904   fiat_p256_uint1 x23;
905   fiat_p256_addcarryx_u64(&x22, &x23, 0x0, x21, x18);
906   uint64_t x24;
907   fiat_p256_uint1 x25;
908   fiat_p256_addcarryx_u64(&x24, &x25, 0x0, x14, x20);
909   uint64_t x26;
910   fiat_p256_uint1 x27;
911   fiat_p256_addcarryx_u64(&x26, &x27, x25, (x15 + (x13 + (x9 + x5))), x22);
912   uint64_t x28;
913   fiat_p256_uint1 x29;
914   fiat_p256_addcarryx_u64(&x28, &x29, x27, x2, (x23 + x19));
915   uint64_t x30;
916   fiat_p256_uint1 x31;
917   fiat_p256_addcarryx_u64(&x30, &x31, x29, x3, x16);
918   uint64_t x32;
919   fiat_p256_uint1 x33;
920   fiat_p256_addcarryx_u64(&x32, &x33, 0x0, x26, (arg1[2]));
921   uint64_t x34;
922   fiat_p256_uint1 x35;
923   fiat_p256_addcarryx_u64(&x34, &x35, x33, x28, 0x0);
924   uint64_t x36;
925   fiat_p256_uint1 x37;
926   fiat_p256_addcarryx_u64(&x36, &x37, x35, x30, 0x0);
927   uint64_t x38;
928   uint64_t x39;
929   fiat_p256_mulx_u64(&x38, &x39, x32, UINT64_C(0xffffffff00000001));
930   uint64_t x40;
931   uint64_t x41;
932   fiat_p256_mulx_u64(&x40, &x41, x32, UINT32_C(0xffffffff));
933   uint64_t x42;
934   uint64_t x43;
935   fiat_p256_mulx_u64(&x42, &x43, x32, UINT64_C(0xffffffffffffffff));
936   uint64_t x44;
937   fiat_p256_uint1 x45;
938   fiat_p256_addcarryx_u64(&x44, &x45, 0x0, x43, x40);
939   uint64_t x46;
940   fiat_p256_uint1 x47;
941   fiat_p256_addcarryx_u64(&x46, &x47, 0x0, x32, x42);
942   uint64_t x48;
943   fiat_p256_uint1 x49;
944   fiat_p256_addcarryx_u64(&x48, &x49, x47, x34, x44);
945   uint64_t x50;
946   fiat_p256_uint1 x51;
947   fiat_p256_addcarryx_u64(&x50, &x51, x49, x36, (x45 + x41));
948   uint64_t x52;
949   fiat_p256_uint1 x53;
950   fiat_p256_addcarryx_u64(&x52, &x53, x51, (x37 + (x31 + x17)), x38);
951   uint64_t x54;
952   fiat_p256_uint1 x55;
953   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x48, (arg1[3]));
954   uint64_t x56;
955   fiat_p256_uint1 x57;
956   fiat_p256_addcarryx_u64(&x56, &x57, x55, x50, 0x0);
957   uint64_t x58;
958   fiat_p256_uint1 x59;
959   fiat_p256_addcarryx_u64(&x58, &x59, x57, x52, 0x0);
960   uint64_t x60;
961   uint64_t x61;
962   fiat_p256_mulx_u64(&x60, &x61, x54, UINT64_C(0xffffffff00000001));
963   uint64_t x62;
964   uint64_t x63;
965   fiat_p256_mulx_u64(&x62, &x63, x54, UINT32_C(0xffffffff));
966   uint64_t x64;
967   uint64_t x65;
968   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffffffffffff));
969   uint64_t x66;
970   fiat_p256_uint1 x67;
971   fiat_p256_addcarryx_u64(&x66, &x67, 0x0, x65, x62);
972   uint64_t x68;
973   fiat_p256_uint1 x69;
974   fiat_p256_addcarryx_u64(&x68, &x69, 0x0, x54, x64);
975   uint64_t x70;
976   fiat_p256_uint1 x71;
977   fiat_p256_addcarryx_u64(&x70, &x71, x69, x56, x66);
978   uint64_t x72;
979   fiat_p256_uint1 x73;
980   fiat_p256_addcarryx_u64(&x72, &x73, x71, x58, (x67 + x63));
981   uint64_t x74;
982   fiat_p256_uint1 x75;
983   fiat_p256_addcarryx_u64(&x74, &x75, x73, (x59 + (x53 + x39)), x60);
984   uint64_t x76 = (x75 + x61);
985   uint64_t x77;
986   fiat_p256_uint1 x78;
987   fiat_p256_subborrowx_u64(&x77, &x78, 0x0, x70, UINT64_C(0xffffffffffffffff));
988   uint64_t x79;
989   fiat_p256_uint1 x80;
990   fiat_p256_subborrowx_u64(&x79, &x80, x78, x72, UINT32_C(0xffffffff));
991   uint64_t x81;
992   fiat_p256_uint1 x82;
993   fiat_p256_subborrowx_u64(&x81, &x82, x80, x74, 0x0);
994   uint64_t x83;
995   fiat_p256_uint1 x84;
996   fiat_p256_subborrowx_u64(&x83, &x84, x82, x76, UINT64_C(0xffffffff00000001));
997   uint64_t x85;
998   fiat_p256_uint1 x86;
999   fiat_p256_subborrowx_u64(&x85, &x86, x84, 0x0, 0x0);
1000   uint64_t x87;
1001   fiat_p256_cmovznz_u64(&x87, x86, x77, x70);
1002   uint64_t x88;
1003   fiat_p256_cmovznz_u64(&x88, x86, x79, x72);
1004   uint64_t x89;
1005   fiat_p256_cmovznz_u64(&x89, x86, x81, x74);
1006   uint64_t x90;
1007   fiat_p256_cmovznz_u64(&x90, x86, x83, x76);
1008   out1[0] = x87;
1009   out1[1] = x88;
1010   out1[2] = x89;
1011   out1[3] = x90;
1012 }
1013 
1014 /*
1015  * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
1016  * Preconditions:
1017  *   0 ≤ eval arg1 < m
1018  * Postconditions:
1019  *   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
1020  *
1021  * Input Bounds:
1022  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1023  * Output Bounds:
1024  *   out1: [0x0 ~> 0xffffffffffffffff]
1025  */
fiat_p256_nonzero(uint64_t * out1,const uint64_t arg1[4])1026 static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
1027   uint64_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | (uint64_t)0x0))));
1028   *out1 = x1;
1029 }
1030 
1031 /*
1032  * The function fiat_p256_selectznz is a multi-limb conditional select.
1033  * Postconditions:
1034  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
1035  *
1036  * Input Bounds:
1037  *   arg1: [0x0 ~> 0x1]
1038  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1039  *   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1040  * Output Bounds:
1041  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1042  */
fiat_p256_selectznz(uint64_t out1[4],fiat_p256_uint1 arg1,const uint64_t arg2[4],const uint64_t arg3[4])1043 static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
1044   uint64_t x1;
1045   fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
1046   uint64_t x2;
1047   fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
1048   uint64_t x3;
1049   fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
1050   uint64_t x4;
1051   fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
1052   out1[0] = x1;
1053   out1[1] = x2;
1054   out1[2] = x3;
1055   out1[3] = x4;
1056 }
1057 
1058 /*
1059  * The function fiat_p256_to_bytes serializes a field element in the Montgomery domain to bytes in little-endian order.
1060  * Preconditions:
1061  *   0 ≤ eval arg1 < m
1062  * Postconditions:
1063  *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
1064  *
1065  * Input Bounds:
1066  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1067  * Output Bounds:
1068  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1069  */
fiat_p256_to_bytes(uint8_t out1[32],const uint64_t arg1[4])1070 static void fiat_p256_to_bytes(uint8_t out1[32], const uint64_t arg1[4]) {
1071   uint64_t x1 = (arg1[3]);
1072   uint64_t x2 = (arg1[2]);
1073   uint64_t x3 = (arg1[1]);
1074   uint64_t x4 = (arg1[0]);
1075   uint64_t x5 = (x4 >> 8);
1076   uint8_t x6 = (uint8_t)(x4 & UINT8_C(0xff));
1077   uint64_t x7 = (x5 >> 8);
1078   uint8_t x8 = (uint8_t)(x5 & UINT8_C(0xff));
1079   uint64_t x9 = (x7 >> 8);
1080   uint8_t x10 = (uint8_t)(x7 & UINT8_C(0xff));
1081   uint64_t x11 = (x9 >> 8);
1082   uint8_t x12 = (uint8_t)(x9 & UINT8_C(0xff));
1083   uint64_t x13 = (x11 >> 8);
1084   uint8_t x14 = (uint8_t)(x11 & UINT8_C(0xff));
1085   uint64_t x15 = (x13 >> 8);
1086   uint8_t x16 = (uint8_t)(x13 & UINT8_C(0xff));
1087   uint8_t x17 = (uint8_t)(x15 >> 8);
1088   uint8_t x18 = (uint8_t)(x15 & UINT8_C(0xff));
1089   uint8_t x19 = (uint8_t)(x17 & UINT8_C(0xff));
1090   uint64_t x20 = (x3 >> 8);
1091   uint8_t x21 = (uint8_t)(x3 & UINT8_C(0xff));
1092   uint64_t x22 = (x20 >> 8);
1093   uint8_t x23 = (uint8_t)(x20 & UINT8_C(0xff));
1094   uint64_t x24 = (x22 >> 8);
1095   uint8_t x25 = (uint8_t)(x22 & UINT8_C(0xff));
1096   uint64_t x26 = (x24 >> 8);
1097   uint8_t x27 = (uint8_t)(x24 & UINT8_C(0xff));
1098   uint64_t x28 = (x26 >> 8);
1099   uint8_t x29 = (uint8_t)(x26 & UINT8_C(0xff));
1100   uint64_t x30 = (x28 >> 8);
1101   uint8_t x31 = (uint8_t)(x28 & UINT8_C(0xff));
1102   uint8_t x32 = (uint8_t)(x30 >> 8);
1103   uint8_t x33 = (uint8_t)(x30 & UINT8_C(0xff));
1104   uint8_t x34 = (uint8_t)(x32 & UINT8_C(0xff));
1105   uint64_t x35 = (x2 >> 8);
1106   uint8_t x36 = (uint8_t)(x2 & UINT8_C(0xff));
1107   uint64_t x37 = (x35 >> 8);
1108   uint8_t x38 = (uint8_t)(x35 & UINT8_C(0xff));
1109   uint64_t x39 = (x37 >> 8);
1110   uint8_t x40 = (uint8_t)(x37 & UINT8_C(0xff));
1111   uint64_t x41 = (x39 >> 8);
1112   uint8_t x42 = (uint8_t)(x39 & UINT8_C(0xff));
1113   uint64_t x43 = (x41 >> 8);
1114   uint8_t x44 = (uint8_t)(x41 & UINT8_C(0xff));
1115   uint64_t x45 = (x43 >> 8);
1116   uint8_t x46 = (uint8_t)(x43 & UINT8_C(0xff));
1117   uint8_t x47 = (uint8_t)(x45 >> 8);
1118   uint8_t x48 = (uint8_t)(x45 & UINT8_C(0xff));
1119   uint8_t x49 = (uint8_t)(x47 & UINT8_C(0xff));
1120   uint64_t x50 = (x1 >> 8);
1121   uint8_t x51 = (uint8_t)(x1 & UINT8_C(0xff));
1122   uint64_t x52 = (x50 >> 8);
1123   uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
1124   uint64_t x54 = (x52 >> 8);
1125   uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
1126   uint64_t x56 = (x54 >> 8);
1127   uint8_t x57 = (uint8_t)(x54 & UINT8_C(0xff));
1128   uint64_t x58 = (x56 >> 8);
1129   uint8_t x59 = (uint8_t)(x56 & UINT8_C(0xff));
1130   uint64_t x60 = (x58 >> 8);
1131   uint8_t x61 = (uint8_t)(x58 & UINT8_C(0xff));
1132   uint8_t x62 = (uint8_t)(x60 >> 8);
1133   uint8_t x63 = (uint8_t)(x60 & UINT8_C(0xff));
1134   out1[0] = x6;
1135   out1[1] = x8;
1136   out1[2] = x10;
1137   out1[3] = x12;
1138   out1[4] = x14;
1139   out1[5] = x16;
1140   out1[6] = x18;
1141   out1[7] = x19;
1142   out1[8] = x21;
1143   out1[9] = x23;
1144   out1[10] = x25;
1145   out1[11] = x27;
1146   out1[12] = x29;
1147   out1[13] = x31;
1148   out1[14] = x33;
1149   out1[15] = x34;
1150   out1[16] = x36;
1151   out1[17] = x38;
1152   out1[18] = x40;
1153   out1[19] = x42;
1154   out1[20] = x44;
1155   out1[21] = x46;
1156   out1[22] = x48;
1157   out1[23] = x49;
1158   out1[24] = x51;
1159   out1[25] = x53;
1160   out1[26] = x55;
1161   out1[27] = x57;
1162   out1[28] = x59;
1163   out1[29] = x61;
1164   out1[30] = x63;
1165   out1[31] = x62;
1166 }
1167 
1168 /*
1169  * The function fiat_p256_from_bytes deserializes a field element in the Montgomery domain from bytes in little-endian order.
1170  * Preconditions:
1171  *   0 ≤ bytes_eval arg1 < m
1172  * Postconditions:
1173  *   eval out1 mod m = bytes_eval arg1 mod m
1174  *   0 ≤ eval out1 < m
1175  *
1176  * Input Bounds:
1177  *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff]]
1178  * Output Bounds:
1179  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
1180  */
fiat_p256_from_bytes(uint64_t out1[4],const uint8_t arg1[32])1181 static void fiat_p256_from_bytes(uint64_t out1[4], const uint8_t arg1[32]) {
1182   uint64_t x1 = ((uint64_t)(arg1[31]) << 56);
1183   uint64_t x2 = ((uint64_t)(arg1[30]) << 48);
1184   uint64_t x3 = ((uint64_t)(arg1[29]) << 40);
1185   uint64_t x4 = ((uint64_t)(arg1[28]) << 32);
1186   uint64_t x5 = ((uint64_t)(arg1[27]) << 24);
1187   uint64_t x6 = ((uint64_t)(arg1[26]) << 16);
1188   uint64_t x7 = ((uint64_t)(arg1[25]) << 8);
1189   uint8_t x8 = (arg1[24]);
1190   uint64_t x9 = ((uint64_t)(arg1[23]) << 56);
1191   uint64_t x10 = ((uint64_t)(arg1[22]) << 48);
1192   uint64_t x11 = ((uint64_t)(arg1[21]) << 40);
1193   uint64_t x12 = ((uint64_t)(arg1[20]) << 32);
1194   uint64_t x13 = ((uint64_t)(arg1[19]) << 24);
1195   uint64_t x14 = ((uint64_t)(arg1[18]) << 16);
1196   uint64_t x15 = ((uint64_t)(arg1[17]) << 8);
1197   uint8_t x16 = (arg1[16]);
1198   uint64_t x17 = ((uint64_t)(arg1[15]) << 56);
1199   uint64_t x18 = ((uint64_t)(arg1[14]) << 48);
1200   uint64_t x19 = ((uint64_t)(arg1[13]) << 40);
1201   uint64_t x20 = ((uint64_t)(arg1[12]) << 32);
1202   uint64_t x21 = ((uint64_t)(arg1[11]) << 24);
1203   uint64_t x22 = ((uint64_t)(arg1[10]) << 16);
1204   uint64_t x23 = ((uint64_t)(arg1[9]) << 8);
1205   uint8_t x24 = (arg1[8]);
1206   uint64_t x25 = ((uint64_t)(arg1[7]) << 56);
1207   uint64_t x26 = ((uint64_t)(arg1[6]) << 48);
1208   uint64_t x27 = ((uint64_t)(arg1[5]) << 40);
1209   uint64_t x28 = ((uint64_t)(arg1[4]) << 32);
1210   uint64_t x29 = ((uint64_t)(arg1[3]) << 24);
1211   uint64_t x30 = ((uint64_t)(arg1[2]) << 16);
1212   uint64_t x31 = ((uint64_t)(arg1[1]) << 8);
1213   uint8_t x32 = (arg1[0]);
1214   uint64_t x33 = (x32 + (x31 + (x30 + (x29 + (x28 + (x27 + (x26 + x25)))))));
1215   uint64_t x34 = (x33 & UINT64_C(0xffffffffffffffff));
1216   uint64_t x35 = (x8 + (x7 + (x6 + (x5 + (x4 + (x3 + (x2 + x1)))))));
1217   uint64_t x36 = (x16 + (x15 + (x14 + (x13 + (x12 + (x11 + (x10 + x9)))))));
1218   uint64_t x37 = (x24 + (x23 + (x22 + (x21 + (x20 + (x19 + (x18 + x17)))))));
1219   uint64_t x38 = (x37 & UINT64_C(0xffffffffffffffff));
1220   uint64_t x39 = (x36 & UINT64_C(0xffffffffffffffff));
1221   out1[0] = x34;
1222   out1[1] = x38;
1223   out1[2] = x39;
1224   out1[3] = x35;
1225 }
1226 
1227