1 /*******************************************************************************
2 * Copyright 2017-2018 Intel Corporation
3 * All Rights Reserved.
4 *
5 * If this software was obtained under the Intel Simplified Software License,
6 * the following terms apply:
7 *
8 * The source code, information and material ("Material") contained herein is
9 * owned by Intel Corporation or its suppliers or licensors, and title to such
10 * Material remains with Intel Corporation or its suppliers or licensors. The
11 * Material contains proprietary information of Intel or its suppliers and
12 * licensors. The Material is protected by worldwide copyright laws and treaty
13 * provisions. No part of the Material may be used, copied, reproduced,
14 * modified, published, uploaded, posted, transmitted, distributed or disclosed
15 * in any way without Intel's prior express written permission. No license under
16 * any patent, copyright or other intellectual property rights in the Material
17 * is granted to or conferred upon you, either expressly, by implication,
18 * inducement, estoppel or otherwise. Any license under such intellectual
19 * property rights must be express and approved by Intel in writing.
20 *
21 * Unless otherwise agreed by Intel in writing, you may not remove or alter this
22 * notice or any other notice embedded in Materials by Intel or Intel's
23 * suppliers or licensors in any way.
24 *
25 *
26 * If this software was obtained under the Apache License, Version 2.0 (the
27 * "License"), the following terms apply:
28 *
29 * You may not use this file except in compliance with the License. You may
30 * obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
31 *
32 *
33 * Unless required by applicable law or agreed to in writing, software
34 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
35 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
36 *
37 * See the License for the specific language governing permissions and
38 * limitations under the License.
39 *******************************************************************************/
40
41 #if !defined(_GS_MOD_METHOD_STUFF_H)
42 #define _GS_MOD_METHOD_STUFF_H
43
44 #include "owndefs.h"
45 #include "owncp.h"
46 #include "pcpbnumisc.h"
47 #include "pcpbnuarith.h"
48
49 #include "gsmodstuff.h"
50 #include "gsmodmethod.h"
51 #include "pcpmontred.h"
52
53 //tbcd: temporary excluded: #include <assert.h>
54
55 /* r = (a+m) mod m */
56 /*
57 * Requirements:
58 * Length of pr data buffer: modLen
59 * Length of pa data buffer: modLen
60 * Length of pb data buffer: modLen
61 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
62 */
gs_mont_add(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,const BNU_CHUNK_T * pb,gsModEngine * pME)63 static BNU_CHUNK_T* gs_mont_add(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, const BNU_CHUNK_T* pb, gsModEngine* pME)
64 {
65 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
66 int mLen = MOD_LEN(pME);
67
68 const int polLength = 1;
69 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
70 //tbcd: temporary excluded: assert(NULL!=pBuffer);
71 {
72 BNU_CHUNK_T extension = cpAdd_BNU(pr, pa, pb, mLen);
73 extension -= cpSub_BNU(pBuffer, pr, pm, mLen);
74 cpMaskMove_gs(pr, pBuffer, mLen, cpIsZero(extension));
75 }
76 gsModPoolFree(pME, polLength);
77 return pr;
78 }
79
80 /* r = (a-b) mod m */
81 /*
82 * Requirements:
83 * Length of pr data buffer: modLen
84 * Length of pa data buffer: modLen
85 * Length of pb data buffer: modLen
86 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
87 */
gs_mont_sub(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,const BNU_CHUNK_T * pb,gsModEngine * pME)88 static BNU_CHUNK_T* gs_mont_sub(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, const BNU_CHUNK_T* pb, gsModEngine* pME)
89 {
90 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
91 int mLen = MOD_LEN(pME);
92
93 const int polLength = 1;
94 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
95 //tbcd: temporary excluded: assert(NULL!=pBuffer);
96 {
97 BNU_CHUNK_T extension = cpSub_BNU(pr, pa, pb, mLen);
98 cpAdd_BNU(pBuffer, pr, pm, mLen);
99 cpMaskMove_gs(pr, pBuffer, mLen, cpIsNonZero(extension));
100 }
101 gsModPoolFree(pME, polLength);
102 return pr;
103 }
104
105 /* r = (m-a) mod m */
106 /*
107 * Requirements:
108 * Length of pr data buffer: modLen
109 * Length of pa data buffer: modLen
110 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
111 */
gs_mont_neg(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)112 static BNU_CHUNK_T* gs_mont_neg(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
113 {
114 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
115 int mLen = MOD_LEN(pME);
116
117 const int polLength = 1;
118 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
119 //tbcd: temporary excluded: assert(NULL!=pBuffer);
120 {
121 BNU_CHUNK_T extension = cpSub_BNU(pr, pm, pa, mLen);
122 extension -= cpSub_BNU(pBuffer, pr, pm, mLen);
123 cpMaskMove_gs(pr, pBuffer, mLen, cpIsZero(extension));
124 }
125 gsModPoolFree(pME, polLength);
126 return pr;
127 }
128
129 /* r = (a/2) mod m */
130 /*
131 * Requirements:
132 * Length of pr data buffer: modLen
133 * Length of pa data buffer: modLen
134 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
135 */
gs_mont_div2(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)136 static BNU_CHUNK_T* gs_mont_div2(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
137 {
138 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
139 int mLen = MOD_LEN(pME);
140
141 const int polLength = 1;
142 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
143 //tbcd: temporary excluded: assert(NULL!=pBuffer);
144 {
145 cpSize i;
146 BNU_CHUNK_T mask = 0 - (pa[0]&1);
147 for(i=0; i<mLen; i++) pBuffer[i] = pm[i] & mask;
148
149 pBuffer[mLen] = cpAdd_BNU(pBuffer, pBuffer, pa, mLen);
150 cpLSR_BNU(pBuffer, pBuffer, mLen+1, 1);
151
152 for(i=0; i<mLen; i++) pr[i] = pBuffer[i];
153 }
154 gsModPoolFree(pME, polLength);
155 return pr;
156 }
157
158 /* r = (a*2) mod m */
159 /*
160 * Requirements:
161 * Length of pr data buffer: modLen
162 * Length of pa data buffer: modLen
163 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
164 */
gs_mont_mul2(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)165 static BNU_CHUNK_T* gs_mont_mul2(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
166 {
167 return gs_mont_add(pr, pa, pa, pME);
168 }
169
170 /* r = (a*3) mod m */
171 /*
172 * Requirements:
173 * Length of pr data buffer: modLen
174 * Length of pa data buffer: modLen
175 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T) * 2
176 */
gs_mont_mul3(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)177 static BNU_CHUNK_T* gs_mont_mul3(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
178 {
179 const int polLength = 1;
180 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
181 //tbcd: temporary excluded: assert(NULL!=pBuffer);
182
183 gs_mont_add(pBuffer, pa, pa, pME);
184 gs_mont_add(pr, pa, pBuffer, pME);
185
186 gsModPoolFree(pME, polLength);
187 return pr;
188 }
189
190 /* r = prod mod m */
191 /*
192 * Requirements:
193 * Length of pr data buffer: modLen
194 * Length of prod data buffer: modLen * 2
195 * Memory size from the pool: n/a
196 */
197 #if ((_IPP <_IPP_W7) && (_IPP32E <_IPP32E_M7)) || defined(_USE_C_cpMontRedAdc_BNU_)
gs_mont_red(BNU_CHUNK_T * pr,BNU_CHUNK_T * prod,gsModEngine * pME)198 static BNU_CHUNK_T* gs_mont_red(BNU_CHUNK_T* pr, BNU_CHUNK_T* prod, gsModEngine* pME)
199 {
200 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
201 BNU_CHUNK_T k0 = MOD_MNT_FACTOR(pME);
202 int mLen = MOD_LEN(pME);
203
204 /* mont mul */
205 BNU_CHUNK_T carry = 0;
206 int i, j;
207
208 for(i=0; i<mLen; i++, prod++) {
209 BNU_CHUNK_T muL, muH, ex_mu;
210 BNU_CHUNK_T t;
211
212 /* u = prod[0]*k0 mod B */
213 BNU_CHUNK_T u = prod[0] * k0;
214
215 /* (ex_mu, t) = prod[0] + m[0]*u (note t ==0) */
216 MUL_AB(muH, muL, pm[0], u);
217 ADD_AB(ex_mu, t, prod[0], muL);
218 ex_mu += muH;
219
220 for(j=1; j<mLen; j++) {
221 BNU_CHUNK_T c;
222 MUL_AB(muH, muL, pm[j], u); /* (H,L) = m[j]*u */
223 ADD_AB(ex_mu, t, prod[j], ex_mu); /* carry in ex_mu,t */
224 ADD_AB(c, prod[j], t, muL); /* carry in c */
225 ex_mu += muH+c; /* accumulates both carrys above */
226 }
227 ADD_ABC(carry, prod[mLen], prod[mLen], ex_mu, carry);
228 }
229
230 {
231 carry -= cpSub_BNU(pr, prod, pm, mLen);
232 cpMaskMove_gs(pr, prod, mLen, cpIsNonZero(carry));
233 return pr;
234 }
235 }
236
237 #else
gs_mont_red(BNU_CHUNK_T * pr,BNU_CHUNK_T * prod,gsModEngine * pME)238 static BNU_CHUNK_T* gs_mont_red(BNU_CHUNK_T* pr, BNU_CHUNK_T* prod, gsModEngine* pME)
239 {
240 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
241 BNU_CHUNK_T k0 = MOD_MNT_FACTOR(pME);
242 int mLen = MOD_LEN(pME);
243
244 cpMontRedAdc_BNU(pr, prod, pm, mLen, k0);
245
246 return pr;
247 }
248
249 #if (_IPP32E>=_IPP32E_L9)
gs_mont_redX(BNU_CHUNK_T * pr,BNU_CHUNK_T * prod,gsModEngine * pME)250 static BNU_CHUNK_T* gs_mont_redX(BNU_CHUNK_T* pr, BNU_CHUNK_T* prod, gsModEngine* pME)
251 {
252 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
253 BNU_CHUNK_T k0 = MOD_MNT_FACTOR(pME);
254 int mLen = MOD_LEN(pME);
255
256 cpMontRedAdx_BNU(pr, prod, pm, mLen, k0);
257
258 return pr;
259 }
260 #endif
261
262 #endif
263
264
265 /* r = (a*b) mod m */
266 /*
267 * Requirements:
268 * Length of pr data buffer: modLen
269 * Length of pa data buffer: modLen
270 * Length of pb data buffer: modLen
271 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T) * 2
272 */
273 #if ((_IPP <_IPP_W7) && (_IPP32E <_IPP32E_M7))
gs_mont_mul(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,const BNU_CHUNK_T * pb,gsModEngine * pME)274 static BNU_CHUNK_T* gs_mont_mul(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, const BNU_CHUNK_T* pb, gsModEngine* pME)
275 {
276 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
277 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
278 int mLen = MOD_LEN(pME);
279
280 const int polLength = 1;
281 BNU_CHUNK_T* pBuffer = gsModPoolAlloc(pME, polLength);
282 //tbcd: temporary excluded: assert(NULL!=pBuffer);
283
284 {
285 BNU_CHUNK_T carry = 0;
286 int i, j;
287
288 /* clear buffer */
289 for(i=0; i<mLen; i++) pBuffer[i] = 0;
290
291 /* mont mul */
292 for(i=0; i<mLen; i++) {
293 BNU_CHUNK_T b = pb[i];
294
295 BNU_CHUNK_T ex_ab = 0;
296 BNU_CHUNK_T ex_mu = 0;
297
298 BNU_CHUNK_T u;
299 BNU_CHUNK_T abL, abH;
300 BNU_CHUNK_T muL, muH;
301
302 // (ex_ab, abL) = T = buffer[0] + a[0]*b
303 MUL_AB(abH, abL, pa[0], b);
304 ADD_ABC(ex_ab, abL, pBuffer[0], abL, ex_ab);
305 ex_ab += abH;
306
307 // u = t*m0 mod B
308 u = abL * m0;
309
310 // (ex_mu, abL) = T + m[0]*u (note abL ==0)
311 MUL_AB(muH, muL, pm[0], u);
312 ADD_ABC(ex_mu, abL, abL, muL, ex_mu);
313 ex_mu += muH;
314
315 for(j=1; j<mLen; j++) {
316 MUL_AB(abH, abL, pa[j], b);
317 ADD_ABC(ex_ab, abL, pBuffer[j], abL, ex_ab);
318 ex_ab += abH;
319
320 MUL_AB(muH, muL, pm[j], u);
321 ADD_ABC(ex_mu, abL, abL, muL, ex_mu);
322 ex_mu += muH;
323
324 pBuffer[j-1] = abL;
325 }
326 ADD_ABC(carry, pBuffer[mLen-1], ex_ab, ex_mu, carry);
327 }
328
329 carry -= cpSub_BNU(pr, pBuffer, pm, mLen);
330 cpMaskMove_gs(pr, pBuffer, mLen, cpIsNonZero(carry));
331 }
332
333 gsModPoolFree(pME, polLength);
334 return pr;
335 }
336
337 #else
338
gs_mont_mul(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,const BNU_CHUNK_T * pb,gsModEngine * pME)339 static BNU_CHUNK_T* gs_mont_mul(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, const BNU_CHUNK_T* pb, gsModEngine* pME)
340 {
341 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
342 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
343 int mLen = MOD_LEN(pME);
344
345 const int polLength = 2;
346 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
347 //tbcd: temporary excluded: assert(NULL!=pProduct);
348
349 cpMulAdc_BNU_school(pProduct, pa,mLen, pb,mLen);
350 cpMontRedAdc_BNU(pr, pProduct, pm, mLen, m0);
351
352 gsModPoolFree(pME, polLength);
353 return pr;
354 }
355
356 #if (_IPP32E>=_IPP32E_L9)
gs_mont_mulX(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,const BNU_CHUNK_T * pb,gsModEngine * pME)357 static BNU_CHUNK_T* gs_mont_mulX(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, const BNU_CHUNK_T* pb, gsModEngine* pME)
358 {
359 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
360 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
361 int mLen = MOD_LEN(pME);
362
363 const int polLength = 2;
364 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
365 //tbcd: temporary excluded: assert(NULL!=pProduct);
366
367 cpMulAdx_BNU_school(pProduct, pa,mLen, pb,mLen);
368 cpMontRedAdx_BNU(pr, pProduct, pm, mLen, m0);
369
370 gsModPoolFree(pME, polLength);
371 return pr;
372 }
373 #endif
374 #endif
375
376 /* r = (a^2) mod m */
377 /*
378 * Requirements:
379 * Length of pr data buffer: modLen
380 * Length of pa data buffer: modLen
381 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
382 */
gs_mont_sqr(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)383 static BNU_CHUNK_T* gs_mont_sqr(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
384 {
385 //return gs_mont_mul(pr, pa, pa, pME);
386 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
387 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
388 int mLen = MOD_LEN(pME);
389
390 const int polLength = 2;
391 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
392 //tbcd: temporary excluded: assert(NULL!=pProduct);
393
394 cpSqrAdc_BNU_school(pProduct, pa,mLen);
395 cpMontRedAdc_BNU(pr, pProduct, pm, mLen, m0);
396
397 gsModPoolFree(pME, polLength);
398 return pr;
399 }
400
401 #if (_IPP32E>=_IPP32E_L9)
gs_mont_sqrX(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)402 static BNU_CHUNK_T* gs_mont_sqrX(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
403 {
404 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
405 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
406 int mLen = MOD_LEN(pME);
407
408 const int polLength = 2;
409 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
410 //tbcd: temporary excluded: assert(NULL!=pProduct);
411
412 cpSqrAdx_BNU_school(pProduct, pa,mLen);
413 cpMontRedAdx_BNU(pr, pProduct, pm, mLen, m0);
414
415 gsModPoolFree(pME, polLength);
416 return pr;
417 }
418 #endif
419
420 /* r = to_mont(a) */
421 /*
422 * Requirements:
423 * Length of pr data buffer: modLen
424 * Length of pa data buffer: modLen
425 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
426 */
gs_mont_encode(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)427 static BNU_CHUNK_T* gs_mont_encode(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
428 {
429 //return gs_mont_mul(pr, pa, MOD_MNT_R2(pME), pME);
430 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
431 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
432 int mLen = MOD_LEN(pME);
433
434 const int polLength = 2;
435 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
436 //tbcd: temporary excluded: assert(NULL!=pProduct);
437
438 cpMulAdc_BNU_school(pProduct, pa,mLen, MOD_MNT_R2(pME),mLen);
439 cpMontRedAdc_BNU(pr, pProduct, pm, mLen, m0);
440
441 gsModPoolFree(pME, polLength);
442 return pr;
443 }
444
445 #if (_IPP32E>=_IPP32E_L9)
gs_mont_encodeX(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)446 static BNU_CHUNK_T* gs_mont_encodeX(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
447 {
448 //return gs_mont_mul(pr, pa, MOD_MNT_R2(pME), pME);
449 const BNU_CHUNK_T* pm = MOD_MODULUS(pME);
450 BNU_CHUNK_T m0 = MOD_MNT_FACTOR(pME);
451 int mLen = MOD_LEN(pME);
452
453 const int polLength = 2;
454 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
455 //tbcd: temporary excluded: assert(NULL!=pProduct);
456
457 cpMulAdx_BNU_school(pProduct, pa,mLen, MOD_MNT_R2(pME),mLen);
458 cpMontRedAdx_BNU(pr, pProduct, pm, mLen, m0);
459
460 gsModPoolFree(pME, polLength);
461 return pr;
462 }
463 #endif
464
465 /* r = from_momt(a) */
466 /*
467 * Requirements:
468 * Length of pr data buffer: modLen
469 * Length of pa data buffer: modLen
470 * Memory size from the pool: modLen * sizeof(BNU_CHUNK_T)
471 */
gs_mont_decode(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)472 static BNU_CHUNK_T* gs_mont_decode(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
473 {
474 int mLen = MOD_LEN(pME);
475
476 const int polLength = 2;
477 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
478 //tbcd: temporary excluded: assert(NULL!=pProduct);
479
480 ZEXPAND_COPY_BNU(pProduct, 2*mLen, pa, mLen);
481 cpMontRedAdc_BNU(pr, pProduct, MOD_MODULUS(pME), mLen, MOD_MNT_FACTOR(pME));
482
483 gsModPoolFree(pME, polLength);
484 return pr;
485 }
486
487 #if (_IPP32E>=_IPP32E_L9)
gs_mont_decodeX(BNU_CHUNK_T * pr,const BNU_CHUNK_T * pa,gsModEngine * pME)488 static BNU_CHUNK_T* gs_mont_decodeX(BNU_CHUNK_T* pr, const BNU_CHUNK_T* pa, gsModEngine* pME)
489 {
490 int mLen = MOD_LEN(pME);
491
492 const int polLength = 2;
493 BNU_CHUNK_T* pProduct = gsModPoolAlloc(pME, polLength);
494 //tbcd: temporary excluded: assert(NULL!=pProduct);
495
496 ZEXPAND_COPY_BNU(pProduct, 2*mLen, pa, mLen);
497 cpMontRedAdx_BNU(pr, pProduct, MOD_MODULUS(pME), mLen, MOD_MNT_FACTOR(pME));
498
499 gsModPoolFree(pME, polLength);
500 return pr;
501 }
502 #endif
503
504 #endif /* _GS_MOD_METHOD_STUFF_H */
505
506