• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Autogenerated: src/ExtractionOCaml/word_by_word_montgomery --static p256 '2^256 - 2^224 + 2^192 + 2^96 - 1' 64 mul square add sub opp from_montgomery nonzero selectznz to_bytes from_bytes */
2 /* curve description: p256 */
3 /* requested operations: mul, square, add, sub, opp, from_montgomery, nonzero, selectznz, to_bytes, from_bytes */
4 /* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */
5 /* machine_wordsize = 64 (from "64") */
6 /*                                                                    */
7 /* NOTE: In addition to the bounds specified above each function, all */
8 /*   functions synthesized for this Montgomery arithmetic require the */
9 /*   input to be strictly less than the prime modulus (m), and also   */
10 /*   require the input to be in the unique saturated representation.  */
11 /*   All functions also ensure that these two properties are true of  */
12 /*   return values.                                                   */
13 
14 #include <stdint.h>
15 typedef unsigned char fiat_p256_uint1;
16 typedef signed char fiat_p256_int1;
17 typedef signed __int128 fiat_p256_int128;
18 typedef unsigned __int128 fiat_p256_uint128;
19 
20 #if (-1 & 3) != 3
21 #error "This code only works on a two's complement system"
22 #endif
23 
24 
25 /*
26  * The function fiat_p256_addcarryx_u64 is an addition with carry.
27  * Postconditions:
28  *   out1 = (arg1 + arg2 + arg3) mod 2^64
29  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^64⌋
30  *
31  * Input Bounds:
32  *   arg1: [0x0 ~> 0x1]
33  *   arg2: [0x0 ~> 0xffffffffffffffff]
34  *   arg3: [0x0 ~> 0xffffffffffffffff]
35  * Output Bounds:
36  *   out1: [0x0 ~> 0xffffffffffffffff]
37  *   out2: [0x0 ~> 0x1]
38  */
fiat_p256_addcarryx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)39 static void fiat_p256_addcarryx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
40   fiat_p256_uint128 x1 = ((arg1 + (fiat_p256_uint128)arg2) + arg3);
41   uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
42   fiat_p256_uint1 x3 = (fiat_p256_uint1)(x1 >> 64);
43   *out1 = x2;
44   *out2 = x3;
45 }
46 
47 /*
48  * The function fiat_p256_subborrowx_u64 is a subtraction with borrow.
49  * Postconditions:
50  *   out1 = (-arg1 + arg2 + -arg3) mod 2^64
51  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^64⌋
52  *
53  * Input Bounds:
54  *   arg1: [0x0 ~> 0x1]
55  *   arg2: [0x0 ~> 0xffffffffffffffff]
56  *   arg3: [0x0 ~> 0xffffffffffffffff]
57  * Output Bounds:
58  *   out1: [0x0 ~> 0xffffffffffffffff]
59  *   out2: [0x0 ~> 0x1]
60  */
fiat_p256_subborrowx_u64(uint64_t * out1,fiat_p256_uint1 * out2,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)61 static void fiat_p256_subborrowx_u64(uint64_t* out1, fiat_p256_uint1* out2, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
62   fiat_p256_int128 x1 = ((arg2 - (fiat_p256_int128)arg1) - arg3);
63   fiat_p256_int1 x2 = (fiat_p256_int1)(x1 >> 64);
64   uint64_t x3 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
65   *out1 = x3;
66   *out2 = (fiat_p256_uint1)(0x0 - x2);
67 }
68 
69 /*
70  * The function fiat_p256_mulx_u64 is a multiplication, returning the full double-width result.
71  * Postconditions:
72  *   out1 = (arg1 * arg2) mod 2^64
73  *   out2 = ⌊arg1 * arg2 / 2^64⌋
74  *
75  * Input Bounds:
76  *   arg1: [0x0 ~> 0xffffffffffffffff]
77  *   arg2: [0x0 ~> 0xffffffffffffffff]
78  * Output Bounds:
79  *   out1: [0x0 ~> 0xffffffffffffffff]
80  *   out2: [0x0 ~> 0xffffffffffffffff]
81  */
fiat_p256_mulx_u64(uint64_t * out1,uint64_t * out2,uint64_t arg1,uint64_t arg2)82 static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, uint64_t arg2) {
83   fiat_p256_uint128 x1 = ((fiat_p256_uint128)arg1 * arg2);
84   uint64_t x2 = (uint64_t)(x1 & UINT64_C(0xffffffffffffffff));
85   uint64_t x3 = (uint64_t)(x1 >> 64);
86   *out1 = x2;
87   *out2 = x3;
88 }
89 
90 /*
91  * The function fiat_p256_cmovznz_u64 is a single-word conditional move.
92  * Postconditions:
93  *   out1 = (if arg1 = 0 then arg2 else arg3)
94  *
95  * Input Bounds:
96  *   arg1: [0x0 ~> 0x1]
97  *   arg2: [0x0 ~> 0xffffffffffffffff]
98  *   arg3: [0x0 ~> 0xffffffffffffffff]
99  * Output Bounds:
100  *   out1: [0x0 ~> 0xffffffffffffffff]
101  */
fiat_p256_cmovznz_u64(uint64_t * out1,fiat_p256_uint1 arg1,uint64_t arg2,uint64_t arg3)102 static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) {
103   fiat_p256_uint1 x1 = (!(!arg1));
104   uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff));
105   // Note this line has been patched from the synthesized code to add value
106   // barriers.
107   //
108   // Clang recognizes this pattern as a select. While it usually transforms it
109   // to a cmov, it sometimes further transforms it into a branch, which we do
110   // not want.
111   uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2));
112   *out1 = x3;
113 }
114 
115 /*
116  * The function fiat_p256_mul multiplies two field elements in the Montgomery domain.
117  * Preconditions:
118  *   0 ≤ eval arg1 < m
119  *   0 ≤ eval arg2 < m
120  * Postconditions:
121  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg2)) mod m
122  *   0 ≤ eval out1 < m
123  *
124  * Input Bounds:
125  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
126  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
127  * Output Bounds:
128  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
129  */
fiat_p256_mul(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])130 static void fiat_p256_mul(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
131   uint64_t x1 = (arg1[1]);
132   uint64_t x2 = (arg1[2]);
133   uint64_t x3 = (arg1[3]);
134   uint64_t x4 = (arg1[0]);
135   uint64_t x5;
136   uint64_t x6;
137   fiat_p256_mulx_u64(&x5, &x6, x4, (arg2[3]));
138   uint64_t x7;
139   uint64_t x8;
140   fiat_p256_mulx_u64(&x7, &x8, x4, (arg2[2]));
141   uint64_t x9;
142   uint64_t x10;
143   fiat_p256_mulx_u64(&x9, &x10, x4, (arg2[1]));
144   uint64_t x11;
145   uint64_t x12;
146   fiat_p256_mulx_u64(&x11, &x12, x4, (arg2[0]));
147   uint64_t x13;
148   fiat_p256_uint1 x14;
149   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
150   uint64_t x15;
151   fiat_p256_uint1 x16;
152   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
153   uint64_t x17;
154   fiat_p256_uint1 x18;
155   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
156   uint64_t x19 = (x18 + x6);
157   uint64_t x20;
158   uint64_t x21;
159   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
160   uint64_t x22;
161   uint64_t x23;
162   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
163   uint64_t x24;
164   uint64_t x25;
165   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
166   uint64_t x26;
167   fiat_p256_uint1 x27;
168   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
169   uint64_t x28 = (x27 + x23);
170   uint64_t x29;
171   fiat_p256_uint1 x30;
172   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
173   uint64_t x31;
174   fiat_p256_uint1 x32;
175   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
176   uint64_t x33;
177   fiat_p256_uint1 x34;
178   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
179   uint64_t x35;
180   fiat_p256_uint1 x36;
181   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
182   uint64_t x37;
183   fiat_p256_uint1 x38;
184   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
185   uint64_t x39;
186   uint64_t x40;
187   fiat_p256_mulx_u64(&x39, &x40, x1, (arg2[3]));
188   uint64_t x41;
189   uint64_t x42;
190   fiat_p256_mulx_u64(&x41, &x42, x1, (arg2[2]));
191   uint64_t x43;
192   uint64_t x44;
193   fiat_p256_mulx_u64(&x43, &x44, x1, (arg2[1]));
194   uint64_t x45;
195   uint64_t x46;
196   fiat_p256_mulx_u64(&x45, &x46, x1, (arg2[0]));
197   uint64_t x47;
198   fiat_p256_uint1 x48;
199   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
200   uint64_t x49;
201   fiat_p256_uint1 x50;
202   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
203   uint64_t x51;
204   fiat_p256_uint1 x52;
205   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
206   uint64_t x53 = (x52 + x40);
207   uint64_t x54;
208   fiat_p256_uint1 x55;
209   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
210   uint64_t x56;
211   fiat_p256_uint1 x57;
212   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
213   uint64_t x58;
214   fiat_p256_uint1 x59;
215   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
216   uint64_t x60;
217   fiat_p256_uint1 x61;
218   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
219   uint64_t x62;
220   fiat_p256_uint1 x63;
221   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
222   uint64_t x64;
223   uint64_t x65;
224   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
225   uint64_t x66;
226   uint64_t x67;
227   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
228   uint64_t x68;
229   uint64_t x69;
230   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
231   uint64_t x70;
232   fiat_p256_uint1 x71;
233   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
234   uint64_t x72 = (x71 + x67);
235   uint64_t x73;
236   fiat_p256_uint1 x74;
237   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
238   uint64_t x75;
239   fiat_p256_uint1 x76;
240   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
241   uint64_t x77;
242   fiat_p256_uint1 x78;
243   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
244   uint64_t x79;
245   fiat_p256_uint1 x80;
246   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
247   uint64_t x81;
248   fiat_p256_uint1 x82;
249   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
250   uint64_t x83 = ((uint64_t)x82 + x63);
251   uint64_t x84;
252   uint64_t x85;
253   fiat_p256_mulx_u64(&x84, &x85, x2, (arg2[3]));
254   uint64_t x86;
255   uint64_t x87;
256   fiat_p256_mulx_u64(&x86, &x87, x2, (arg2[2]));
257   uint64_t x88;
258   uint64_t x89;
259   fiat_p256_mulx_u64(&x88, &x89, x2, (arg2[1]));
260   uint64_t x90;
261   uint64_t x91;
262   fiat_p256_mulx_u64(&x90, &x91, x2, (arg2[0]));
263   uint64_t x92;
264   fiat_p256_uint1 x93;
265   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
266   uint64_t x94;
267   fiat_p256_uint1 x95;
268   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
269   uint64_t x96;
270   fiat_p256_uint1 x97;
271   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
272   uint64_t x98 = (x97 + x85);
273   uint64_t x99;
274   fiat_p256_uint1 x100;
275   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
276   uint64_t x101;
277   fiat_p256_uint1 x102;
278   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
279   uint64_t x103;
280   fiat_p256_uint1 x104;
281   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
282   uint64_t x105;
283   fiat_p256_uint1 x106;
284   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
285   uint64_t x107;
286   fiat_p256_uint1 x108;
287   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
288   uint64_t x109;
289   uint64_t x110;
290   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
291   uint64_t x111;
292   uint64_t x112;
293   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
294   uint64_t x113;
295   uint64_t x114;
296   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
297   uint64_t x115;
298   fiat_p256_uint1 x116;
299   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
300   uint64_t x117 = (x116 + x112);
301   uint64_t x118;
302   fiat_p256_uint1 x119;
303   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
304   uint64_t x120;
305   fiat_p256_uint1 x121;
306   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
307   uint64_t x122;
308   fiat_p256_uint1 x123;
309   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
310   uint64_t x124;
311   fiat_p256_uint1 x125;
312   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
313   uint64_t x126;
314   fiat_p256_uint1 x127;
315   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
316   uint64_t x128 = ((uint64_t)x127 + x108);
317   uint64_t x129;
318   uint64_t x130;
319   fiat_p256_mulx_u64(&x129, &x130, x3, (arg2[3]));
320   uint64_t x131;
321   uint64_t x132;
322   fiat_p256_mulx_u64(&x131, &x132, x3, (arg2[2]));
323   uint64_t x133;
324   uint64_t x134;
325   fiat_p256_mulx_u64(&x133, &x134, x3, (arg2[1]));
326   uint64_t x135;
327   uint64_t x136;
328   fiat_p256_mulx_u64(&x135, &x136, x3, (arg2[0]));
329   uint64_t x137;
330   fiat_p256_uint1 x138;
331   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
332   uint64_t x139;
333   fiat_p256_uint1 x140;
334   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
335   uint64_t x141;
336   fiat_p256_uint1 x142;
337   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
338   uint64_t x143 = (x142 + x130);
339   uint64_t x144;
340   fiat_p256_uint1 x145;
341   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
342   uint64_t x146;
343   fiat_p256_uint1 x147;
344   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
345   uint64_t x148;
346   fiat_p256_uint1 x149;
347   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
348   uint64_t x150;
349   fiat_p256_uint1 x151;
350   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
351   uint64_t x152;
352   fiat_p256_uint1 x153;
353   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
354   uint64_t x154;
355   uint64_t x155;
356   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
357   uint64_t x156;
358   uint64_t x157;
359   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
360   uint64_t x158;
361   uint64_t x159;
362   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
363   uint64_t x160;
364   fiat_p256_uint1 x161;
365   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
366   uint64_t x162 = (x161 + x157);
367   uint64_t x163;
368   fiat_p256_uint1 x164;
369   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
370   uint64_t x165;
371   fiat_p256_uint1 x166;
372   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
373   uint64_t x167;
374   fiat_p256_uint1 x168;
375   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
376   uint64_t x169;
377   fiat_p256_uint1 x170;
378   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
379   uint64_t x171;
380   fiat_p256_uint1 x172;
381   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
382   uint64_t x173 = ((uint64_t)x172 + x153);
383   uint64_t x174;
384   fiat_p256_uint1 x175;
385   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
386   uint64_t x176;
387   fiat_p256_uint1 x177;
388   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
389   uint64_t x178;
390   fiat_p256_uint1 x179;
391   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
392   uint64_t x180;
393   fiat_p256_uint1 x181;
394   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
395   uint64_t x182;
396   fiat_p256_uint1 x183;
397   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
398   uint64_t x184;
399   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
400   uint64_t x185;
401   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
402   uint64_t x186;
403   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
404   uint64_t x187;
405   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
406   out1[0] = x184;
407   out1[1] = x185;
408   out1[2] = x186;
409   out1[3] = x187;
410 }
411 
412 /*
413  * The function fiat_p256_square squares a field element in the Montgomery domain.
414  * Preconditions:
415  *   0 ≤ eval arg1 < m
416  * Postconditions:
417  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) * eval (from_montgomery arg1)) mod m
418  *   0 ≤ eval out1 < m
419  *
420  * Input Bounds:
421  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
422  * Output Bounds:
423  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
424  */
fiat_p256_square(uint64_t out1[4],const uint64_t arg1[4])425 static void fiat_p256_square(uint64_t out1[4], const uint64_t arg1[4]) {
426   uint64_t x1 = (arg1[1]);
427   uint64_t x2 = (arg1[2]);
428   uint64_t x3 = (arg1[3]);
429   uint64_t x4 = (arg1[0]);
430   uint64_t x5;
431   uint64_t x6;
432   fiat_p256_mulx_u64(&x5, &x6, x4, (arg1[3]));
433   uint64_t x7;
434   uint64_t x8;
435   fiat_p256_mulx_u64(&x7, &x8, x4, (arg1[2]));
436   uint64_t x9;
437   uint64_t x10;
438   fiat_p256_mulx_u64(&x9, &x10, x4, (arg1[1]));
439   uint64_t x11;
440   uint64_t x12;
441   fiat_p256_mulx_u64(&x11, &x12, x4, (arg1[0]));
442   uint64_t x13;
443   fiat_p256_uint1 x14;
444   fiat_p256_addcarryx_u64(&x13, &x14, 0x0, x12, x9);
445   uint64_t x15;
446   fiat_p256_uint1 x16;
447   fiat_p256_addcarryx_u64(&x15, &x16, x14, x10, x7);
448   uint64_t x17;
449   fiat_p256_uint1 x18;
450   fiat_p256_addcarryx_u64(&x17, &x18, x16, x8, x5);
451   uint64_t x19 = (x18 + x6);
452   uint64_t x20;
453   uint64_t x21;
454   fiat_p256_mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001));
455   uint64_t x22;
456   uint64_t x23;
457   fiat_p256_mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff));
458   uint64_t x24;
459   uint64_t x25;
460   fiat_p256_mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff));
461   uint64_t x26;
462   fiat_p256_uint1 x27;
463   fiat_p256_addcarryx_u64(&x26, &x27, 0x0, x25, x22);
464   uint64_t x28 = (x27 + x23);
465   uint64_t x29;
466   fiat_p256_uint1 x30;
467   fiat_p256_addcarryx_u64(&x29, &x30, 0x0, x11, x24);
468   uint64_t x31;
469   fiat_p256_uint1 x32;
470   fiat_p256_addcarryx_u64(&x31, &x32, x30, x13, x26);
471   uint64_t x33;
472   fiat_p256_uint1 x34;
473   fiat_p256_addcarryx_u64(&x33, &x34, x32, x15, x28);
474   uint64_t x35;
475   fiat_p256_uint1 x36;
476   fiat_p256_addcarryx_u64(&x35, &x36, x34, x17, x20);
477   uint64_t x37;
478   fiat_p256_uint1 x38;
479   fiat_p256_addcarryx_u64(&x37, &x38, x36, x19, x21);
480   uint64_t x39;
481   uint64_t x40;
482   fiat_p256_mulx_u64(&x39, &x40, x1, (arg1[3]));
483   uint64_t x41;
484   uint64_t x42;
485   fiat_p256_mulx_u64(&x41, &x42, x1, (arg1[2]));
486   uint64_t x43;
487   uint64_t x44;
488   fiat_p256_mulx_u64(&x43, &x44, x1, (arg1[1]));
489   uint64_t x45;
490   uint64_t x46;
491   fiat_p256_mulx_u64(&x45, &x46, x1, (arg1[0]));
492   uint64_t x47;
493   fiat_p256_uint1 x48;
494   fiat_p256_addcarryx_u64(&x47, &x48, 0x0, x46, x43);
495   uint64_t x49;
496   fiat_p256_uint1 x50;
497   fiat_p256_addcarryx_u64(&x49, &x50, x48, x44, x41);
498   uint64_t x51;
499   fiat_p256_uint1 x52;
500   fiat_p256_addcarryx_u64(&x51, &x52, x50, x42, x39);
501   uint64_t x53 = (x52 + x40);
502   uint64_t x54;
503   fiat_p256_uint1 x55;
504   fiat_p256_addcarryx_u64(&x54, &x55, 0x0, x31, x45);
505   uint64_t x56;
506   fiat_p256_uint1 x57;
507   fiat_p256_addcarryx_u64(&x56, &x57, x55, x33, x47);
508   uint64_t x58;
509   fiat_p256_uint1 x59;
510   fiat_p256_addcarryx_u64(&x58, &x59, x57, x35, x49);
511   uint64_t x60;
512   fiat_p256_uint1 x61;
513   fiat_p256_addcarryx_u64(&x60, &x61, x59, x37, x51);
514   uint64_t x62;
515   fiat_p256_uint1 x63;
516   fiat_p256_addcarryx_u64(&x62, &x63, x61, x38, x53);
517   uint64_t x64;
518   uint64_t x65;
519   fiat_p256_mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001));
520   uint64_t x66;
521   uint64_t x67;
522   fiat_p256_mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff));
523   uint64_t x68;
524   uint64_t x69;
525   fiat_p256_mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff));
526   uint64_t x70;
527   fiat_p256_uint1 x71;
528   fiat_p256_addcarryx_u64(&x70, &x71, 0x0, x69, x66);
529   uint64_t x72 = (x71 + x67);
530   uint64_t x73;
531   fiat_p256_uint1 x74;
532   fiat_p256_addcarryx_u64(&x73, &x74, 0x0, x54, x68);
533   uint64_t x75;
534   fiat_p256_uint1 x76;
535   fiat_p256_addcarryx_u64(&x75, &x76, x74, x56, x70);
536   uint64_t x77;
537   fiat_p256_uint1 x78;
538   fiat_p256_addcarryx_u64(&x77, &x78, x76, x58, x72);
539   uint64_t x79;
540   fiat_p256_uint1 x80;
541   fiat_p256_addcarryx_u64(&x79, &x80, x78, x60, x64);
542   uint64_t x81;
543   fiat_p256_uint1 x82;
544   fiat_p256_addcarryx_u64(&x81, &x82, x80, x62, x65);
545   uint64_t x83 = ((uint64_t)x82 + x63);
546   uint64_t x84;
547   uint64_t x85;
548   fiat_p256_mulx_u64(&x84, &x85, x2, (arg1[3]));
549   uint64_t x86;
550   uint64_t x87;
551   fiat_p256_mulx_u64(&x86, &x87, x2, (arg1[2]));
552   uint64_t x88;
553   uint64_t x89;
554   fiat_p256_mulx_u64(&x88, &x89, x2, (arg1[1]));
555   uint64_t x90;
556   uint64_t x91;
557   fiat_p256_mulx_u64(&x90, &x91, x2, (arg1[0]));
558   uint64_t x92;
559   fiat_p256_uint1 x93;
560   fiat_p256_addcarryx_u64(&x92, &x93, 0x0, x91, x88);
561   uint64_t x94;
562   fiat_p256_uint1 x95;
563   fiat_p256_addcarryx_u64(&x94, &x95, x93, x89, x86);
564   uint64_t x96;
565   fiat_p256_uint1 x97;
566   fiat_p256_addcarryx_u64(&x96, &x97, x95, x87, x84);
567   uint64_t x98 = (x97 + x85);
568   uint64_t x99;
569   fiat_p256_uint1 x100;
570   fiat_p256_addcarryx_u64(&x99, &x100, 0x0, x75, x90);
571   uint64_t x101;
572   fiat_p256_uint1 x102;
573   fiat_p256_addcarryx_u64(&x101, &x102, x100, x77, x92);
574   uint64_t x103;
575   fiat_p256_uint1 x104;
576   fiat_p256_addcarryx_u64(&x103, &x104, x102, x79, x94);
577   uint64_t x105;
578   fiat_p256_uint1 x106;
579   fiat_p256_addcarryx_u64(&x105, &x106, x104, x81, x96);
580   uint64_t x107;
581   fiat_p256_uint1 x108;
582   fiat_p256_addcarryx_u64(&x107, &x108, x106, x83, x98);
583   uint64_t x109;
584   uint64_t x110;
585   fiat_p256_mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001));
586   uint64_t x111;
587   uint64_t x112;
588   fiat_p256_mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff));
589   uint64_t x113;
590   uint64_t x114;
591   fiat_p256_mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff));
592   uint64_t x115;
593   fiat_p256_uint1 x116;
594   fiat_p256_addcarryx_u64(&x115, &x116, 0x0, x114, x111);
595   uint64_t x117 = (x116 + x112);
596   uint64_t x118;
597   fiat_p256_uint1 x119;
598   fiat_p256_addcarryx_u64(&x118, &x119, 0x0, x99, x113);
599   uint64_t x120;
600   fiat_p256_uint1 x121;
601   fiat_p256_addcarryx_u64(&x120, &x121, x119, x101, x115);
602   uint64_t x122;
603   fiat_p256_uint1 x123;
604   fiat_p256_addcarryx_u64(&x122, &x123, x121, x103, x117);
605   uint64_t x124;
606   fiat_p256_uint1 x125;
607   fiat_p256_addcarryx_u64(&x124, &x125, x123, x105, x109);
608   uint64_t x126;
609   fiat_p256_uint1 x127;
610   fiat_p256_addcarryx_u64(&x126, &x127, x125, x107, x110);
611   uint64_t x128 = ((uint64_t)x127 + x108);
612   uint64_t x129;
613   uint64_t x130;
614   fiat_p256_mulx_u64(&x129, &x130, x3, (arg1[3]));
615   uint64_t x131;
616   uint64_t x132;
617   fiat_p256_mulx_u64(&x131, &x132, x3, (arg1[2]));
618   uint64_t x133;
619   uint64_t x134;
620   fiat_p256_mulx_u64(&x133, &x134, x3, (arg1[1]));
621   uint64_t x135;
622   uint64_t x136;
623   fiat_p256_mulx_u64(&x135, &x136, x3, (arg1[0]));
624   uint64_t x137;
625   fiat_p256_uint1 x138;
626   fiat_p256_addcarryx_u64(&x137, &x138, 0x0, x136, x133);
627   uint64_t x139;
628   fiat_p256_uint1 x140;
629   fiat_p256_addcarryx_u64(&x139, &x140, x138, x134, x131);
630   uint64_t x141;
631   fiat_p256_uint1 x142;
632   fiat_p256_addcarryx_u64(&x141, &x142, x140, x132, x129);
633   uint64_t x143 = (x142 + x130);
634   uint64_t x144;
635   fiat_p256_uint1 x145;
636   fiat_p256_addcarryx_u64(&x144, &x145, 0x0, x120, x135);
637   uint64_t x146;
638   fiat_p256_uint1 x147;
639   fiat_p256_addcarryx_u64(&x146, &x147, x145, x122, x137);
640   uint64_t x148;
641   fiat_p256_uint1 x149;
642   fiat_p256_addcarryx_u64(&x148, &x149, x147, x124, x139);
643   uint64_t x150;
644   fiat_p256_uint1 x151;
645   fiat_p256_addcarryx_u64(&x150, &x151, x149, x126, x141);
646   uint64_t x152;
647   fiat_p256_uint1 x153;
648   fiat_p256_addcarryx_u64(&x152, &x153, x151, x128, x143);
649   uint64_t x154;
650   uint64_t x155;
651   fiat_p256_mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001));
652   uint64_t x156;
653   uint64_t x157;
654   fiat_p256_mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff));
655   uint64_t x158;
656   uint64_t x159;
657   fiat_p256_mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff));
658   uint64_t x160;
659   fiat_p256_uint1 x161;
660   fiat_p256_addcarryx_u64(&x160, &x161, 0x0, x159, x156);
661   uint64_t x162 = (x161 + x157);
662   uint64_t x163;
663   fiat_p256_uint1 x164;
664   fiat_p256_addcarryx_u64(&x163, &x164, 0x0, x144, x158);
665   uint64_t x165;
666   fiat_p256_uint1 x166;
667   fiat_p256_addcarryx_u64(&x165, &x166, x164, x146, x160);
668   uint64_t x167;
669   fiat_p256_uint1 x168;
670   fiat_p256_addcarryx_u64(&x167, &x168, x166, x148, x162);
671   uint64_t x169;
672   fiat_p256_uint1 x170;
673   fiat_p256_addcarryx_u64(&x169, &x170, x168, x150, x154);
674   uint64_t x171;
675   fiat_p256_uint1 x172;
676   fiat_p256_addcarryx_u64(&x171, &x172, x170, x152, x155);
677   uint64_t x173 = ((uint64_t)x172 + x153);
678   uint64_t x174;
679   fiat_p256_uint1 x175;
680   fiat_p256_subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff));
681   uint64_t x176;
682   fiat_p256_uint1 x177;
683   fiat_p256_subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff));
684   uint64_t x178;
685   fiat_p256_uint1 x179;
686   fiat_p256_subborrowx_u64(&x178, &x179, x177, x169, 0x0);
687   uint64_t x180;
688   fiat_p256_uint1 x181;
689   fiat_p256_subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001));
690   uint64_t x182;
691   fiat_p256_uint1 x183;
692   fiat_p256_subborrowx_u64(&x182, &x183, x181, x173, 0x0);
693   uint64_t x184;
694   fiat_p256_cmovznz_u64(&x184, x183, x174, x165);
695   uint64_t x185;
696   fiat_p256_cmovznz_u64(&x185, x183, x176, x167);
697   uint64_t x186;
698   fiat_p256_cmovznz_u64(&x186, x183, x178, x169);
699   uint64_t x187;
700   fiat_p256_cmovznz_u64(&x187, x183, x180, x171);
701   out1[0] = x184;
702   out1[1] = x185;
703   out1[2] = x186;
704   out1[3] = x187;
705 }
706 
707 /*
708  * The function fiat_p256_add adds two field elements in the Montgomery domain.
709  * Preconditions:
710  *   0 ≤ eval arg1 < m
711  *   0 ≤ eval arg2 < m
712  * Postconditions:
713  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) + eval (from_montgomery arg2)) mod m
714  *   0 ≤ eval out1 < m
715  *
716  * Input Bounds:
717  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
718  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
719  * Output Bounds:
720  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
721  */
fiat_p256_add(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])722 static void fiat_p256_add(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
723   uint64_t x1;
724   fiat_p256_uint1 x2;
725   fiat_p256_addcarryx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
726   uint64_t x3;
727   fiat_p256_uint1 x4;
728   fiat_p256_addcarryx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
729   uint64_t x5;
730   fiat_p256_uint1 x6;
731   fiat_p256_addcarryx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
732   uint64_t x7;
733   fiat_p256_uint1 x8;
734   fiat_p256_addcarryx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
735   uint64_t x9;
736   fiat_p256_uint1 x10;
737   fiat_p256_subborrowx_u64(&x9, &x10, 0x0, x1, UINT64_C(0xffffffffffffffff));
738   uint64_t x11;
739   fiat_p256_uint1 x12;
740   fiat_p256_subborrowx_u64(&x11, &x12, x10, x3, UINT32_C(0xffffffff));
741   uint64_t x13;
742   fiat_p256_uint1 x14;
743   fiat_p256_subborrowx_u64(&x13, &x14, x12, x5, 0x0);
744   uint64_t x15;
745   fiat_p256_uint1 x16;
746   fiat_p256_subborrowx_u64(&x15, &x16, x14, x7, UINT64_C(0xffffffff00000001));
747   uint64_t x17;
748   fiat_p256_uint1 x18;
749   fiat_p256_subborrowx_u64(&x17, &x18, x16, x8, 0x0);
750   uint64_t x19;
751   fiat_p256_cmovznz_u64(&x19, x18, x9, x1);
752   uint64_t x20;
753   fiat_p256_cmovznz_u64(&x20, x18, x11, x3);
754   uint64_t x21;
755   fiat_p256_cmovznz_u64(&x21, x18, x13, x5);
756   uint64_t x22;
757   fiat_p256_cmovznz_u64(&x22, x18, x15, x7);
758   out1[0] = x19;
759   out1[1] = x20;
760   out1[2] = x21;
761   out1[3] = x22;
762 }
763 
764 /*
765  * The function fiat_p256_sub subtracts two field elements in the Montgomery domain.
766  * Preconditions:
767  *   0 ≤ eval arg1 < m
768  *   0 ≤ eval arg2 < m
769  * Postconditions:
770  *   eval (from_montgomery out1) mod m = (eval (from_montgomery arg1) - eval (from_montgomery arg2)) mod m
771  *   0 ≤ eval out1 < m
772  *
773  * Input Bounds:
774  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
775  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
776  * Output Bounds:
777  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
778  */
fiat_p256_sub(uint64_t out1[4],const uint64_t arg1[4],const uint64_t arg2[4])779 static void fiat_p256_sub(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) {
780   uint64_t x1;
781   fiat_p256_uint1 x2;
782   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg1[0]), (arg2[0]));
783   uint64_t x3;
784   fiat_p256_uint1 x4;
785   fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg1[1]), (arg2[1]));
786   uint64_t x5;
787   fiat_p256_uint1 x6;
788   fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg1[2]), (arg2[2]));
789   uint64_t x7;
790   fiat_p256_uint1 x8;
791   fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg1[3]), (arg2[3]));
792   uint64_t x9;
793   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
794   uint64_t x10;
795   fiat_p256_uint1 x11;
796   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
797   uint64_t x12;
798   fiat_p256_uint1 x13;
799   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
800   uint64_t x14;
801   fiat_p256_uint1 x15;
802   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
803   uint64_t x16;
804   fiat_p256_uint1 x17;
805   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
806   out1[0] = x10;
807   out1[1] = x12;
808   out1[2] = x14;
809   out1[3] = x16;
810 }
811 
812 /*
813  * The function fiat_p256_opp negates a field element in the Montgomery domain.
814  * Preconditions:
815  *   0 ≤ eval arg1 < m
816  * Postconditions:
817  *   eval (from_montgomery out1) mod m = -eval (from_montgomery arg1) mod m
818  *   0 ≤ eval out1 < m
819  *
820  * Input Bounds:
821  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
822  * Output Bounds:
823  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
824  */
fiat_p256_opp(uint64_t out1[4],const uint64_t arg1[4])825 static void fiat_p256_opp(uint64_t out1[4], const uint64_t arg1[4]) {
826   uint64_t x1;
827   fiat_p256_uint1 x2;
828   fiat_p256_subborrowx_u64(&x1, &x2, 0x0, 0x0, (arg1[0]));
829   uint64_t x3;
830   fiat_p256_uint1 x4;
831   fiat_p256_subborrowx_u64(&x3, &x4, x2, 0x0, (arg1[1]));
832   uint64_t x5;
833   fiat_p256_uint1 x6;
834   fiat_p256_subborrowx_u64(&x5, &x6, x4, 0x0, (arg1[2]));
835   uint64_t x7;
836   fiat_p256_uint1 x8;
837   fiat_p256_subborrowx_u64(&x7, &x8, x6, 0x0, (arg1[3]));
838   uint64_t x9;
839   fiat_p256_cmovznz_u64(&x9, x8, 0x0, UINT64_C(0xffffffffffffffff));
840   uint64_t x10;
841   fiat_p256_uint1 x11;
842   fiat_p256_addcarryx_u64(&x10, &x11, 0x0, x1, (x9 & UINT64_C(0xffffffffffffffff)));
843   uint64_t x12;
844   fiat_p256_uint1 x13;
845   fiat_p256_addcarryx_u64(&x12, &x13, x11, x3, (x9 & UINT32_C(0xffffffff)));
846   uint64_t x14;
847   fiat_p256_uint1 x15;
848   fiat_p256_addcarryx_u64(&x14, &x15, x13, x5, 0x0);
849   uint64_t x16;
850   fiat_p256_uint1 x17;
851   fiat_p256_addcarryx_u64(&x16, &x17, x15, x7, (x9 & UINT64_C(0xffffffff00000001)));
852   out1[0] = x10;
853   out1[1] = x12;
854   out1[2] = x14;
855   out1[3] = x16;
856 }
857 
858 /*
859  * The function fiat_p256_nonzero outputs a single non-zero word if the input is non-zero and zero otherwise.
860  * Preconditions:
861  *   0 ≤ eval arg1 < m
862  * Postconditions:
863  *   out1 = 0 ↔ eval (from_montgomery arg1) mod m = 0
864  *
865  * Input Bounds:
866  *   arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
867  * Output Bounds:
868  *   out1: [0x0 ~> 0xffffffffffffffff]
869  */
fiat_p256_nonzero(uint64_t * out1,const uint64_t arg1[4])870 static void fiat_p256_nonzero(uint64_t* out1, const uint64_t arg1[4]) {
871   uint64_t x1 = ((arg1[0]) | ((arg1[1]) | ((arg1[2]) | ((arg1[3]) | (uint64_t)0x0))));
872   *out1 = x1;
873 }
874 
875 /*
876  * The function fiat_p256_selectznz is a multi-limb conditional select.
877  * Postconditions:
878  *   eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
879  *
880  * Input Bounds:
881  *   arg1: [0x0 ~> 0x1]
882  *   arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
883  *   arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
884  * Output Bounds:
885  *   out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
886  */
fiat_p256_selectznz(uint64_t out1[4],fiat_p256_uint1 arg1,const uint64_t arg2[4],const uint64_t arg3[4])887 static void fiat_p256_selectznz(uint64_t out1[4], fiat_p256_uint1 arg1, const uint64_t arg2[4], const uint64_t arg3[4]) {
888   uint64_t x1;
889   fiat_p256_cmovznz_u64(&x1, arg1, (arg2[0]), (arg3[0]));
890   uint64_t x2;
891   fiat_p256_cmovznz_u64(&x2, arg1, (arg2[1]), (arg3[1]));
892   uint64_t x3;
893   fiat_p256_cmovznz_u64(&x3, arg1, (arg2[2]), (arg3[2]));
894   uint64_t x4;
895   fiat_p256_cmovznz_u64(&x4, arg1, (arg2[3]), (arg3[3]));
896   out1[0] = x1;
897   out1[1] = x2;
898   out1[2] = x3;
899   out1[3] = x4;
900 }
901 
902