1 /*
2 * This file is part of the openHiTLS project.
3 *
4 * openHiTLS is licensed under the Mulan PSL v2.
5 * You can use this software according to the terms and conditions of the Mulan PSL v2.
6 * You may obtain a copy of Mulan PSL v2 at:
7 *
8 * http://license.coscl.org.cn/MulanPSL2
9 *
10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13 * See the Mulan PSL v2 for more details.
14 */
15
16
17 #include "hitls_build.h"
18 #ifdef HITLS_CRYPTO_X25519
19
20 #include "x25519_asm.h"
21 #include "securec.h"
22 #include "curve25519_local.h"
23 #ifdef HITLS_CRYPTO_X25519_X8664
24 #include "crypt_utils.h"
25 #endif
26 // X25519 alternative implementation, faster but require asm
27 #define CURVE25519_51BITS_MASK 0x7ffffffffffff
28 #define CURVE25519_51BITS 51
29
Fp51DataToPoly(Fp51 * out,const uint8_t in[32])30 static void Fp51DataToPoly(Fp51 *out, const uint8_t in[32])
31 {
32 uint64_t h[5];
33
34 CURVE25519_BYTES7_LOAD(h, in); // load 7 bytes
35
36 CURVE25519_BYTES6_LOAD(h + 1, in + 7); // load 6 bytes from in7 to h1
37 h[1] <<= 5; // shift 5 to fit 51 bits
38
39 CURVE25519_BYTES7_LOAD(h + 2, in + 13); // load 7 bytes from in13 to h2
40 h[2] <<= 2; // shift 2 to fit 51 bits
41
42 CURVE25519_BYTES6_LOAD(h + 3, in + 20); // load 6 bytes from in20 to h3
43 h[3] <<= 7; // shift 7 to fit 51 bits
44
45 CURVE25519_BYTES6_LOAD(h + 4, in + 26); // load 6 bytes from in26 to h4
46 h[4] &= 0x7fffffffffff; // 41 bits mask = 0x7fffffffffff
47 h[4] <<= 4; // shift 4 to fit 51 bits
48
49 h[1] |= h[0] >> CURVE25519_51BITS; // carry h[0] -> h[1]
50 h[0] &= CURVE25519_51BITS_MASK; // clear h[0]
51
52 h[2] |= h[1] >> CURVE25519_51BITS; // carry h[1] -> h[2]
53 h[1] &= CURVE25519_51BITS_MASK; // clear h[1]
54
55 h[3] |= h[2] >> CURVE25519_51BITS; // carry h[2] -> h[3]
56 h[2] &= CURVE25519_51BITS_MASK; // clear h[2]
57
58 h[4] |= h[3] >> CURVE25519_51BITS; // carry h[3] -> h[4]
59 h[3] &= CURVE25519_51BITS_MASK; // clear h[3]
60
61 out->data[0] = h[0]; // 0
62 out->data[1] = h[1]; // 1
63 out->data[2] = h[2]; // 2
64 out->data[3] = h[3]; // 3
65 out->data[4] = h[4]; // 4
66 }
67
Fp51UnloadTo8Bits(uint8_t out[32],uint64_t h[5])68 static void Fp51UnloadTo8Bits(uint8_t out[32], uint64_t h[5])
69 {
70 // load from uint64 to uint8, load 8 bits at a time
71 out[0] = (uint8_t)h[0];
72 out[1] = (uint8_t)(h[0] >> 8); // load from position 8 to out[1]
73 out[2] = (uint8_t)(h[0] >> 16); // load from position 16 to out[2]
74 out[3] = (uint8_t)(h[0] >> 24); // load from position 24 to out[3]
75 out[4] = (uint8_t)(h[0] >> 32); // load from position 32 to out[4]
76 out[5] = (uint8_t)(h[0] >> 40); // load from position 40 to out[5]
77 // load from position 48 from h[1] and (8-5)=3 bits from h[1] to out[6]
78 out[6] = (uint8_t)((h[0] >> 48) | (uint8_t)(h[1] << 3));
79 out[7] = (uint8_t)(h[1] >> 5); // load h[1] from position 5 to out[7]
80 out[8] = (uint8_t)(h[1] >> 13); // load h[1] from position 13 to out[8]
81 out[9] = (uint8_t)(h[1] >> 21); // load h[1] from position 21 to out[9]
82 out[10] = (uint8_t)(h[1] >> 29); // load h[1] from position 29 to out[10]
83 out[11] = (uint8_t)(h[1] >> 37); // load h[1] from position 37 to out[11]
84 // load from position 45 from h[1] and (8-2)=6 bits from h[2] to out[12]
85 out[12] = (uint8_t)((h[1] >> 45) | (uint8_t)(h[2] << 6));
86 out[13] = (uint8_t)(h[2] >> 2); // load h[2] from position 2 to out[13]
87 out[14] = (uint8_t)(h[2] >> 10); // load h[2] from position 10 to out[14]
88 out[15] = (uint8_t)(h[2] >> 18); // load h[2] from position 18 to out[15]
89 out[16] = (uint8_t)(h[2] >> 26); // load h[2] from position 26 to out[16]
90 out[17] = (uint8_t)(h[2] >> 34); // load h[2] from position 34 to out[17]
91 out[18] = (uint8_t)(h[2] >> 42); // load h[2] from position 42 to out[18]
92 // load from position 50 from h[2] and (8-1)=7 bits from h[3] to out[19]
93 out[19] = (uint8_t)((h[2] >> 50) | (uint8_t)(h[3] << 1));
94 out[20] = (uint8_t)(h[3] >> 7); // load h[3] from position 7 to out[20]
95 out[21] = (uint8_t)(h[3] >> 15); // load h[3] from position 15 to out[21]
96 out[22] = (uint8_t)(h[3] >> 23); // load h[3] from position 23 to out[22]
97 out[23] = (uint8_t)(h[3] >> 31); // load h[3] from position 31 to out[23]
98 out[24] = (uint8_t)(h[3] >> 39); // load h[3] from position 39 to out[24]
99 // load from position 47 from h[3] and (4-4)=4 bits from h[4] to out[25]
100 out[25] = (uint8_t)((h[3] >> 47) | (uint8_t)(h[4] << 4));
101 out[26] = (uint8_t)(h[4] >> 4); // load h[4] from position 4 to out[26]
102 out[27] = (uint8_t)(h[4] >> 12); // load h[4] from position 12 to out[27]
103 out[28] = (uint8_t)(h[4] >> 20); // load h[4] from position 20 to out[28]
104 out[29] = (uint8_t)(h[4] >> 28); // load h[4] from position 28 to out[29]
105 out[30] = (uint8_t)(h[4] >> 36); // load h[4] from position 36 to out[30]
106 out[31] = (uint8_t)(h[4] >> 44); // load h[4] from position 44 to out[31]
107 }
108
Fp51PolyToData(const Fp51 * in,uint8_t out[32])109 static void Fp51PolyToData(const Fp51 *in, uint8_t out[32])
110 {
111 uint64_t h[5];
112 h[0] = in->data[0]; // 0
113 h[1] = in->data[1]; // 1
114 h[2] = in->data[2]; // 2
115 h[3] = in->data[3]; // 3
116 h[4] = in->data[4]; // 4
117 uint64_t carry;
118
119 carry = (h[0] + 19) >> CURVE25519_51BITS; // plus 19 then calculate carry
120 carry = (h[1] + carry) >> CURVE25519_51BITS; // carry of h[1]
121 carry = (h[2] + carry) >> CURVE25519_51BITS; // carry of h[2]
122 carry = (h[3] + carry) >> CURVE25519_51BITS; // carry of h[3]
123 carry = (h[4] + carry) >> CURVE25519_51BITS; // carry of h[4]
124
125 h[0] += 19 * carry; // process carry h[4] -> h[0], h[0] += 19 * carry
126 h[1] += h[0] >> CURVE25519_51BITS; // process carry h[0] -> h[1]
127 h[0] &= CURVE25519_51BITS_MASK; // clear h[0]
128 h[2] += h[1] >> CURVE25519_51BITS; // process carry h[1] -> h[2]
129 h[1] &= CURVE25519_51BITS_MASK; // clear h[1]
130 h[3] += h[2] >> CURVE25519_51BITS; // process carry h[2] -> h[3]
131 h[2] &= CURVE25519_51BITS_MASK; // clear h[2]
132 h[4] += h[3] >> CURVE25519_51BITS; // process carry h[3] -> h[4]
133 h[3] &= CURVE25519_51BITS_MASK; // clear h[3]
134 h[4] &= CURVE25519_51BITS_MASK; // clear h[4]
135
136 Fp51UnloadTo8Bits(out, h);
137 }
138
139 /* out = in1 ^ (4 * 2 ^ (2 * times)) * in2 */
Fp51MultiSquare(Fp51 * in1,Fp51 * in2,Fp51 * out,int32_t times)140 static inline void Fp51MultiSquare(Fp51 *in1, Fp51 *in2, Fp51 *out, int32_t times)
141 {
142 int32_t i;
143 Fp51 temp1, temp2;
144 Fp51Square(&temp1, in1);
145 Fp51Square(&temp2, &temp1);
146 for (i = 0; i < times; i++) {
147 Fp51Square(&temp1, &temp2);
148 Fp51Square(&temp2, &temp1);
149 }
150 Fp51Mul(out, in2, &temp2);
151 }
152
153 /* out = a ^ -1 */
Fp51Invert(Fp51 * out,const Fp51 * a)154 static void Fp51Invert(Fp51 *out, const Fp51 *a)
155 {
156 Fp51 a0; /* save a^1 */
157 Fp51 a1; /* save a^2 */
158 Fp51 a2; /* save a^11 */
159 Fp51 a3; /* save a^(2^5-1) */
160 Fp51 a4; /* save a^(2^10-1) */
161 Fp51 a5; /* save a^(2^20-1) */
162 Fp51 a6; /* save a^(2^40-1) */
163 Fp51 a7; /* save a^(2^50-1) */
164 Fp51 a8; /* save a^(2^100-1) */
165 Fp51 a9; /* save a^(2^200-1) */
166 Fp51 a10; /* save a^(2^250-1) */
167 Fp51 temp1, temp2;
168
169 /* We know a×b=1(mod p), then a and b are inverses of mod p, i.e. a=b^(-1), b=a^(-1);
170 * According to Fermat's little theorem a^(p-1)=1(mod p), so a*a^(p-2)=1(mod p);
171 * So the inverse element of a is a^(-1) = a^(p-2)(mod p)
172 * Here it is, p=2^255-19, thus we need to compute a^(2^255-21)(mod(2^255-19))
173 */
174
175 /* a^1 */
176 CURVE25519_FP51_COPY(a0.data, a->data);
177
178 /* a^2 */
179 Fp51Square(&a1, &a0);
180
181 /* a^4 */
182 Fp51Square(&temp1, &a1);
183
184 /* a^8 */
185 Fp51Square(&temp2, &temp1);
186
187 /* a^9 */
188 Fp51Mul(&temp1, &a0, &temp2);
189
190 /* a^11 */
191 Fp51Mul(&a2, &a1, &temp1);
192
193 /* a^22 */
194 Fp51Square(&temp2, &a2);
195
196 /* a^(2^5-1) = a^(9+22) */
197 Fp51Mul(&a3, &temp1, &temp2);
198
199 /* a^(2^10-1) = a^(2^10-2^5) * a^(2^5-1) */
200 Fp51Square(&temp1, &a3);
201 Fp51Square(&temp2, &temp1);
202 Fp51Square(&temp1, &temp2);
203 Fp51Square(&temp2, &temp1);
204 Fp51Square(&temp1, &temp2);
205 Fp51Mul(&a4, &a3, &temp1);
206
207 /* a^(2^20-1) = a^(2^20-2^10) * a^(2^10-1) */
208 Fp51MultiSquare(&a4, &a4, &a5, 4); // (2 * 2) ^ 4
209
210 /* a^(2^40-1) = a^(2^40-2^20) * a^(2^20-1) */
211 Fp51MultiSquare(&a5, &a5, &a6, 9); // (2 * 2) ^ 9
212
213 /* a^(2^50-1) = a^(2^50-2^10) * a^(2^10-1) */
214 Fp51MultiSquare(&a6, &a4, &a7, 4); // (2 * 2) ^ 4
215
216 /* a^(2^100-1) = a^(2^100-2^50) * a^(2^50-1) */
217 Fp51MultiSquare(&a7, &a7, &a8, 24); // (2 * 2) ^ 24
218
219 /* a^(2^200-1) = a^(2^200-2^100) * a^(2^100-1) */
220 Fp51MultiSquare(&a8, &a8, &a9, 49); // (2 * 2) ^ 49
221
222 /* a^(2^250-1) = a^(2^250-2^50) * a^(2^50-1) */
223 Fp51MultiSquare(&a9, &a7, &a10, 24); // (2 * 2) ^ 24
224
225 /* a^(2^5*(2^250-1)) = (a^(2^250-1))^5 */
226 Fp51Square(&temp1, &a10);
227 Fp51Square(&temp2, &temp1);
228 Fp51Square(&temp1, &temp2);
229 Fp51Square(&temp2, &temp1);
230 Fp51Square(&temp1, &temp2);
231
232 /* The output:a^(2^255-21) = a(2^5*(2^250-1)+11) = a^(2^5*(2^250-1)) * a^11 */
233 Fp51Mul(out, &a2, &temp1);
234 }
235
Fp51ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])236 void Fp51ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
237 {
238 uint8_t k[32];
239 const uint8_t *u = point;
240 int32_t t;
241 uint32_t swap;
242 uint32_t kTemp;
243 Fp51 x1, x2, x3;
244 Fp51 z2, z3;
245 Fp51 t1, t2;
246
247 /* Decord the scalar into k */
248 CURVE25519_DECODE_LITTLE_ENDIAN(k, scalar);
249
250 /* Reference RFC 7748 section 5:The constant a24 is (486662 - 2) / 4 = 121665 for curve25519/X25519 */
251 Fp51DataToPoly(&x1, u);
252 CURVE25519_FP51_SET(x2.data, 1);
253 CURVE25519_FP51_SET(z2.data, 0);
254 CURVE25519_FP51_COPY(x3.data, x1.data);
255 CURVE25519_FP51_SET(z3.data, 1);
256 swap = 0;
257
258 /* "bits" parameter set to 255 for x25519 */ /* For t = bits-1(254) down to 0: */
259 for (t = 254; t >= 0; t--) {
260 /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
261 kTemp = (k[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1; /* kTemp = (k >> t) & 1 */
262 swap ^= kTemp; /* swap ^= kTemp */
263 CURVE25519_FP51_CSWAP(swap, x2.data, x3.data); /* (x_2, x_3) = cswap(swap, x_2, x_3) */
264
265 CURVE25519_FP51_CSWAP(swap, z2.data, z3.data); /* (z_2, z_3) = cswap(swap, z_2, z_3) */
266 swap = kTemp; /* swap = kTemp */
267 CURVE25519_FP51_SUB(t1.data, x3.data, z3.data); /* x3 = D */
268 CURVE25519_FP51_SUB(t2.data, x2.data, z2.data); /* t2 = B */
269 CURVE25519_FP51_ADD(x2.data, x2.data, z2.data); /* t1 = A */
270 CURVE25519_FP51_ADD(z2.data, x3.data, z3.data); /* x2 = C */
271
272 Fp51Mul(&z3, &t1, &x2);
273 Fp51Mul(&z2, &z2, &t2);
274 Fp51Square(&t1, &t2);
275 Fp51Square(&t2, &x2);
276
277 CURVE25519_FP51_ADD(x3.data, z3.data, z2.data);
278 CURVE25519_FP51_SUB(z2.data, z3.data, z2.data);
279 Fp51Mul(&x2, &t2, &t1);
280 CURVE25519_FP51_SUB(t2.data, t2.data, t1.data);
281 Fp51Square(&z2, &z2);
282 Fp51MulScalar(&z3, &t2); // z2 *= 121665 + 1 = 121666
283 Fp51Square(&x3, &x3);
284 CURVE25519_FP51_ADD(t1.data, t1.data, z3.data);
285 Fp51Mul(&z3, &x1, &z2);
286 Fp51Mul(&z2, &t2, &t1);
287 }
288
289 CURVE25519_FP51_CSWAP(swap, x2.data, x3.data);
290 CURVE25519_FP51_CSWAP(swap, z2.data, z3.data);
291 /* Return x2 * (z2 ^ (p - 2)) */
292 Fp51Invert(&t1, &z2);
293 Fp51Mul(&t2, &x2, &t1);
294 Fp51PolyToData(&t2, out);
295 (void)memset_s(k, sizeof(k), 0, sizeof(k));
296 }
297
298 #ifdef HITLS_CRYPTO_X25519_X8664
299
300 #define CURVE25519_63BITS_MASK 0x7fffffffffffffff
301 #define CURVE25519_FP64_SET(dst, value) \
302 do { \
303 (dst)[0] = (value); \
304 (dst)[1] = 0; \
305 (dst)[2] = 0; \
306 (dst)[3] = 0; \
307 } while (0)
308
309 #define CURVE25519_FP64_COPY(dst, src) \
310 do { \
311 (dst)[0] = (src)[0]; \
312 (dst)[1] = (src)[1]; \
313 (dst)[2] = (src)[2]; \
314 (dst)[3] = (src)[3]; \
315 } while (0)
316
317 #define CURVE25519_BYTES8_LOAD(dst, src) \
318 do { \
319 dst = (uint64_t)(src)[0]; \
320 dst |= ((uint64_t)(src)[1]) << 8; \
321 dst |= ((uint64_t)(src)[2]) << 16; \
322 dst |= ((uint64_t)(src)[3]) << 24; \
323 dst |= ((uint64_t)(src)[4]) << 32; \
324 dst |= ((uint64_t)(src)[5]) << 40; \
325 dst |= ((uint64_t)(src)[6]) << 48; \
326 dst |= ((uint64_t)(src)[7]) << 56; \
327 } while (0)
328
329 #define CURVE25519_FP64_CSWAP(s, a, b) \
330 do { \
331 uint64_t tt; \
332 const uint64_t tsMacro = 0 - (uint64_t)(s); \
333 for (uint32_t ii = 0; ii < 4; ii++) { \
334 tt = tsMacro & ((a)[ii] ^ (b)[ii]); \
335 (a)[ii] = (a)[ii] ^ tt; \
336 (b)[ii] = (b)[ii] ^ tt; \
337 } \
338 } while (0)
339
Fp64DataToPoly(Fp64 h,const uint8_t * point)340 static void Fp64DataToPoly(Fp64 h, const uint8_t *point)
341 {
342 uint8_t *tmp = (uint8_t *)(uintptr_t)point;
343 CURVE25519_BYTES8_LOAD(h[0], tmp);
344 tmp += 8; // the second 8 bytes
345 CURVE25519_BYTES8_LOAD(h[1], tmp);
346 tmp += 8; // the third 8 bytes
347 CURVE25519_BYTES8_LOAD(h[2], tmp);
348 tmp += 8; // the forth 8 bytes
349 CURVE25519_BYTES8_LOAD(h[3], tmp);
350 h[3] &= CURVE25519_63BITS_MASK;
351 return;
352 }
353
354 /* out = in1 ^ (4 * 2 ^ (2 * times)) * in2 */
Fp64MultiSqr(Fp64 in1,Fp64 in2,Fp64 out,int32_t times)355 static inline void Fp64MultiSqr(Fp64 in1, Fp64 in2, Fp64 out, int32_t times)
356 {
357 int32_t i;
358 Fp64 temp1, temp2;
359 Fp64Sqr(temp1, in1);
360 Fp64Sqr(temp2, temp1);
361 for (i = 0; i < times; i++) {
362 Fp64Sqr(temp1, temp2);
363 Fp64Sqr(temp2, temp1);
364 }
365 Fp64Mul(out, in2, temp2);
366 }
367
Fe64Invert(Fp64 out,const Fp64 z)368 static void Fe64Invert(Fp64 out, const Fp64 z)
369 {
370 Fp64 t0;
371 Fp64 t1;
372 Fp64 t2;
373 Fp64 t3;
374 Fp64 t4;
375
376 Fp64Sqr(t0, z); /* t^2 */
377 Fp64Sqr(t1, t0); /* t^4 */
378 Fp64Sqr(t1, t1); /* t^8 */
379 Fp64Mul(t1, z, t1); /* t^9 */
380 Fp64Mul(t0, t0, t1); /* t^11 */
381 Fp64Sqr(t2, t0); /* t^22 */
382 Fp64Mul(t1, t1, t2); /* t^(2^5-1) = t^(9+22) */
383
384 /* t^(2^10-1) = t^(2^10-2^5) * t^(2^5-1) */
385 Fp64Sqr(t2, t1);
386 Fp64Sqr(t4, t2);
387 Fp64Sqr(t2, t4);
388 Fp64Sqr(t4, t2);
389 Fp64Sqr(t2, t4);
390 Fp64Mul(t1, t2, t1);
391
392 /* t^(2^20-1) = t^(2^20-2^10) * t^(2^10-1) */
393 Fp64MultiSqr(t1, t1, t2, 4);
394
395 /* t^(2^40-1) = t^(2^40-2^20) * t^(2^20-1) */
396 Fp64MultiSqr(t2, t2, t4, 9); // (2 * 2) ^ 9
397
398 /* t^(2^50-1) = t^(2^50-2^10) * t^(2^10-1) */
399 Fp64MultiSqr(t4, t1, t2, 4); // (2 * 2) ^ 4
400
401 /* t^(2^100-1) = t^(2^100-2^50) * t^(2^50-1) */
402 Fp64MultiSqr(t2, t2, t1, 24); // (2 * 2) ^ 24
403
404 /* t^(2^200-1) = t^(2^200-2^100) * t^(2^100-1) */
405 Fp64MultiSqr(t1, t1, t4, 49); // (2 * 2) ^ 49
406
407 /* t^(2^250-1) = t^(2^250-2^50) * t^(2^50-1) */
408 Fp64MultiSqr(t4, t2, t3, 24); // (2 * 2) ^ 24
409
410 /* t^(2^5*(2^250-1)) = (t^(2^250-1))^5 */
411 Fp64Sqr(t1, t3);
412 Fp64Sqr(t2, t1);
413 Fp64Sqr(t1, t2);
414 Fp64Sqr(t2, t1);
415 Fp64Sqr(t1, t2);
416
417 /* The output:t^(2^255-21) = t(2^5*(2^250-1)+11) = t^(2^5*(2^250-1)) * t^11 */
418 Fp64Mul(out, t0, t1);
419 }
420
421
Fp64ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])422 void Fp64ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
423 {
424 uint8_t e[32];
425 uint32_t swap = 0;
426 int32_t t;
427 Fp64 x1, x2, x3;
428 Fp64 z2, z3;
429 Fp64 t1, t2;
430
431 CURVE25519_DECODE_LITTLE_ENDIAN(e, scalar);
432 Fp64DataToPoly(x1, point);
433 CURVE25519_FP64_SET(x2, 1);
434 CURVE25519_FP64_SET(z2, 0);
435 CURVE25519_FP64_COPY(x3, x1);
436 CURVE25519_FP64_SET(z3, 1);
437
438 for (t = 254; t >= 0; --t) { /* For t = bits-1(254) down to 0: */
439 /* t >> 3: calculation the index of bit; t & 7: Obtains the corresponding bit in the byte */
440 uint32_t kTemp = (e[(uint32_t)t >> 3] >> ((uint32_t)t & 7)) & 1;
441
442 swap ^= kTemp;
443 CURVE25519_FP64_CSWAP(swap, x2, x3);
444 CURVE25519_FP64_CSWAP(swap, z2, z3);
445 swap = kTemp;
446 Fp64Sub(t1, x3, z3);
447 Fp64Sub(t2, x2, z2);
448 Fp64Add(x2, x2, z2);
449 Fp64Add(z2, x3, z3);
450 Fp64Mul(z3, x2, t1);
451 Fp64Mul(z2, z2, t2);
452 Fp64Sqr(t1, t2);
453 Fp64Sqr(t2, x2);
454 Fp64Add(x3, z3, z2);
455 Fp64Sub(z2, z3, z2);
456 Fp64Mul(x2, t2, t1);
457 Fp64Sub(t2, t2, t1);
458 Fp64Sqr(z2, z2);
459 Fp64MulScalar(z3, t2);
460 Fp64Sqr(x3, x3);
461 Fp64Add(t1, t1, z3);
462 Fp64Mul(z3, x1, z2);
463 Fp64Mul(z2, t2, t1);
464 }
465
466 Fe64Invert(z2, z2);
467 Fp64Mul(x2, x2, z2);
468 Fp64PolyToData(out, x2);
469 (void)memset_s(e, sizeof(e), 0, sizeof(e));
470 }
471 #endif
472
ScalarMultiPoint(uint8_t out[32],const uint8_t scalar[32],const uint8_t point[32])473 void ScalarMultiPoint(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32])
474 {
475 #if defined (__x86_64__) && defined (HITLS_CRYPTO_X25519_X8664)
476 if (IsSupportBMI2() && IsSupportADX()) {
477 Fp64ScalarMultiPoint(out, scalar, point);
478 return;
479 }
480 #endif
481 Fp51ScalarMultiPoint(out, scalar, point);
482 return;
483 }
484
485 #endif /* HITLS_CRYPTO_X25519 */
486