1diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c 2index 76fc7ebcff..6e9d239321 100644 3--- a/crypto/bn/bn_blind.c 4+++ b/crypto/bn/bn_blind.c 5@@ -13,20 +13,6 @@ 6 7 #define BN_BLINDING_COUNTER 32 8 9-struct bn_blinding_st { 10- BIGNUM *A; 11- BIGNUM *Ai; 12- BIGNUM *e; 13- BIGNUM *mod; /* just a reference */ 14- CRYPTO_THREAD_ID tid; 15- int counter; 16- unsigned long flags; 17- BN_MONT_CTX *m_ctx; 18- int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 19- const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 20- CRYPTO_RWLOCK *lock; 21-}; 22- 23 BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod) 24 { 25 BN_BLINDING *ret = NULL; 26diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c 27index dd87c152cf..3dd8d9a568 100644 28--- a/crypto/bn/bn_err.c 29+++ b/crypto/bn/bn_err.c 30@@ -73,6 +73,8 @@ static const ERR_STRING_DATA BN_str_functs[] = { 31 {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"}, 32 {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"}, 33 {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"}, 34+ {ERR_PACK(ERR_LIB_BN, BN_F_OSSL_BN_RSA_DO_UNBLIND, 0), 35+ "ossl_bn_rsa_do_unblind"}, 36 {0, NULL} 37 }; 38 39diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h 40index 62a969b134..4d8cb64675 100644 41--- a/crypto/bn/bn_local.h 42+++ b/crypto/bn/bn_local.h 43@@ -283,6 +283,20 @@ struct bn_gencb_st { 44 } cb; 45 }; 46 47+struct bn_blinding_st { 48+ BIGNUM *A; 49+ BIGNUM *Ai; 50+ BIGNUM *e; 51+ BIGNUM *mod; /* just a reference */ 52+ CRYPTO_THREAD_ID tid; 53+ int counter; 54+ unsigned long flags; 55+ BN_MONT_CTX *m_ctx; 56+ int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 57+ const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 58+ CRYPTO_RWLOCK *lock; 59+}; 60+ 61 /*- 62 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions 63 * 64diff --git a/crypto/bn/build.info b/crypto/bn/build.info 65index b9ed5322fa..c9fe2fdada 100644 66--- a/crypto/bn/build.info 67+++ b/crypto/bn/build.info 68@@ -5,7 +5,8 @@ SOURCE[../../libcrypto]=\ 69 bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \ 70 {- $target{bn_asm_src} -} \ 71 bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ 72- bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c 73+ bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c \ 74+ rsa_sup_mul.c 75 76 INCLUDE[bn_exp.o]=.. 77 78diff --git a/crypto/bn/rsa_sup_mul.c b/crypto/bn/rsa_sup_mul.c 79new file mode 100644 80index 0000000000..acafefd5fe 81--- /dev/null 82+++ b/crypto/bn/rsa_sup_mul.c 83@@ -0,0 +1,614 @@ 84+#include <openssl/e_os2.h> 85+#include <stddef.h> 86+#include <sys/types.h> 87+#include <string.h> 88+#include <openssl/bn.h> 89+#include <openssl/err.h> 90+#include <openssl/rsaerr.h> 91+#include "internal/numbers.h" 92+#include "internal/constant_time.h" 93+#include "bn_local.h" 94+ 95+# if BN_BYTES == 8 96+typedef uint64_t limb_t; 97+# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16 98+/* nonstandard; implemented by gcc on 64-bit platforms */ 99+typedef __uint128_t limb2_t; 100+# define HAVE_LIMB2_T 101+# endif 102+# define LIMB_BIT_SIZE 64 103+# define LIMB_BYTE_SIZE 8 104+# elif BN_BYTES == 4 105+typedef uint32_t limb_t; 106+typedef uint64_t limb2_t; 107+# define LIMB_BIT_SIZE 32 108+# define LIMB_BYTE_SIZE 4 109+# define HAVE_LIMB2_T 110+# else 111+# error "Not supported" 112+# endif 113+ 114+/* 115+ * For multiplication we're using schoolbook multiplication, 116+ * so if we have two numbers, each with 6 "digits" (words) 117+ * the multiplication is calculated as follows: 118+ * A B C D E F 119+ * x I J K L M N 120+ * -------------- 121+ * N*F 122+ * N*E 123+ * N*D 124+ * N*C 125+ * N*B 126+ * N*A 127+ * M*F 128+ * M*E 129+ * M*D 130+ * M*C 131+ * M*B 132+ * M*A 133+ * L*F 134+ * L*E 135+ * L*D 136+ * L*C 137+ * L*B 138+ * L*A 139+ * K*F 140+ * K*E 141+ * K*D 142+ * K*C 143+ * K*B 144+ * K*A 145+ * J*F 146+ * J*E 147+ * J*D 148+ * J*C 149+ * J*B 150+ * J*A 151+ * I*F 152+ * I*E 153+ * I*D 154+ * I*C 155+ * I*B 156+ * + I*A 157+ * ========================== 158+ * N*B N*D N*F 159+ * + N*A N*C N*E 160+ * + M*B M*D M*F 161+ * + M*A M*C M*E 162+ * + L*B L*D L*F 163+ * + L*A L*C L*E 164+ * + K*B K*D K*F 165+ * + K*A K*C K*E 166+ * + J*B J*D J*F 167+ * + J*A J*C J*E 168+ * + I*B I*D I*F 169+ * + I*A I*C I*E 170+ * 171+ * 1+1 1+3 1+5 172+ * 1+0 1+2 1+4 173+ * 0+1 0+3 0+5 174+ * 0+0 0+2 0+4 175+ * 176+ * 0 1 2 3 4 5 6 177+ * which requires n^2 multiplications and 2n full length additions 178+ * as we can keep every other result of limb multiplication in two separate 179+ * limbs 180+ */ 181+ 182+#if defined HAVE_LIMB2_T 183+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) 184+{ 185+ limb2_t t; 186+ /* 187+ * this is idiomatic code to tell compiler to use the native mul 188+ * those three lines will actually compile to single instruction 189+ */ 190+ 191+ t = (limb2_t)a * b; 192+ *hi = t >> LIMB_BIT_SIZE; 193+ *lo = (limb_t)t; 194+} 195+#elif (BN_BYTES == 8) && (defined _MSC_VER) 196+/* https://learn.microsoft.com/en-us/cpp/intrinsics/umul128?view=msvc-170 */ 197+#pragma intrinsic(_umul128) 198+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) 199+{ 200+ *lo = _umul128(a, b, hi); 201+} 202+#else 203+/* 204+ * if the compiler doesn't have either a 128bit data type nor a "return 205+ * high 64 bits of multiplication" 206+ */ 207+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b) 208+{ 209+ limb_t a_low = (limb_t)(uint32_t)a; 210+ limb_t a_hi = a >> 32; 211+ limb_t b_low = (limb_t)(uint32_t)b; 212+ limb_t b_hi = b >> 32; 213+ 214+ limb_t p0 = a_low * b_low; 215+ limb_t p1 = a_low * b_hi; 216+ limb_t p2 = a_hi * b_low; 217+ limb_t p3 = a_hi * b_hi; 218+ 219+ uint32_t cy = (uint32_t)(((p0 >> 32) + (uint32_t)p1 + (uint32_t)p2) >> 32); 220+ 221+ *lo = p0 + (p1 << 32) + (p2 << 32); 222+ *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy; 223+} 224+#endif 225+ 226+/* add two limbs with carry in, return carry out */ 227+static ossl_inline limb_t _add_limb(limb_t *ret, limb_t a, limb_t b, limb_t carry) 228+{ 229+ limb_t carry1, carry2, t; 230+ /* 231+ * `c = a + b; if (c < a)` is idiomatic code that makes compilers 232+ * use add with carry on assembly level 233+ */ 234+ 235+ *ret = a + carry; 236+ if (*ret < a) 237+ carry1 = 1; 238+ else 239+ carry1 = 0; 240+ 241+ t = *ret; 242+ *ret = t + b; 243+ if (*ret < t) 244+ carry2 = 1; 245+ else 246+ carry2 = 0; 247+ 248+ return carry1 + carry2; 249+} 250+ 251+/* 252+ * add two numbers of the same size, return overflow 253+ * 254+ * add a to b, place result in ret; all arrays need to be n limbs long 255+ * return overflow from addition (0 or 1) 256+ */ 257+static ossl_inline limb_t add(limb_t *ret, limb_t *a, limb_t *b, size_t n) 258+{ 259+ limb_t c = 0; 260+ ossl_ssize_t i; 261+ 262+ for(i = n - 1; i > -1; i--) 263+ c = _add_limb(&ret[i], a[i], b[i], c); 264+ 265+ return c; 266+} 267+ 268+/* 269+ * return number of limbs necessary for temporary values 270+ * when multiplying numbers n limbs large 271+ */ 272+static ossl_inline size_t mul_limb_numb(size_t n) 273+{ 274+ return 2 * n * 2; 275+} 276+ 277+/* 278+ * multiply two numbers of the same size 279+ * 280+ * multiply a by b, place result in ret; a and b need to be n limbs long 281+ * ret needs to be 2*n limbs long, tmp needs to be mul_limb_numb(n) limbs 282+ * long 283+ */ 284+static void limb_mul(limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp) 285+{ 286+ limb_t *r_odd, *r_even; 287+ size_t i, j, k; 288+ 289+ r_odd = tmp; 290+ r_even = &tmp[2 * n]; 291+ 292+ memset(ret, 0, 2 * n * sizeof(limb_t)); 293+ 294+ for (i = 0; i < n; i++) { 295+ for (k = 0; k < i + n + 1; k++) { 296+ r_even[k] = 0; 297+ r_odd[k] = 0; 298+ } 299+ for (j = 0; j < n; j++) { 300+ /* 301+ * place results from even and odd limbs in separate arrays so that 302+ * we don't have to calculate overflow every time we get individual 303+ * limb multiplication result 304+ */ 305+ if (j % 2 == 0) 306+ _mul_limb(&r_even[i + j], &r_even[i + j + 1], a[i], b[j]); 307+ else 308+ _mul_limb(&r_odd[i + j], &r_odd[i + j + 1], a[i], b[j]); 309+ } 310+ /* 311+ * skip the least significant limbs when adding multiples of 312+ * more significant limbs (they're zero anyway) 313+ */ 314+ add(ret, ret, r_even, n + i + 1); 315+ add(ret, ret, r_odd, n + i + 1); 316+ } 317+} 318+ 319+/* modifies the value in place by performing a right shift by one bit */ 320+static ossl_inline void rshift1(limb_t *val, size_t n) 321+{ 322+ limb_t shift_in = 0, shift_out = 0; 323+ size_t i; 324+ 325+ for (i = 0; i < n; i++) { 326+ shift_out = val[i] & 1; 327+ val[i] = shift_in << (LIMB_BIT_SIZE - 1) | (val[i] >> 1); 328+ shift_in = shift_out; 329+ } 330+} 331+ 332+/* extend the LSB of flag to all bits of limb */ 333+static ossl_inline limb_t mk_mask(limb_t flag) 334+{ 335+ flag |= flag << 1; 336+ flag |= flag << 2; 337+ flag |= flag << 4; 338+ flag |= flag << 8; 339+ flag |= flag << 16; 340+#if (LIMB_BYTE_SIZE == 8) 341+ flag |= flag << 32; 342+#endif 343+ return flag; 344+} 345+ 346+/* 347+ * copy from either a or b to ret based on flag 348+ * when flag == 0, then copies from b 349+ * when flag == 1, then copies from a 350+ */ 351+static ossl_inline void cselect(limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n) 352+{ 353+ /* 354+ * would be more efficient with non volatile mask, but then gcc 355+ * generates code with jumps 356+ */ 357+ volatile limb_t mask; 358+ size_t i; 359+ 360+ mask = mk_mask(flag); 361+ for (i = 0; i < n; i++) { 362+#if (LIMB_BYTE_SIZE == 8) 363+ ret[i] = constant_time_select_64(mask, a[i], b[i]); 364+#else 365+ ret[i] = constant_time_select_32(mask, a[i], b[i]); 366+#endif 367+ } 368+} 369+ 370+static limb_t _sub_limb(limb_t *ret, limb_t a, limb_t b, limb_t borrow) 371+{ 372+ limb_t borrow1, borrow2, t; 373+ /* 374+ * while it doesn't look constant-time, this is idiomatic code 375+ * to tell compilers to use the carry bit from subtraction 376+ */ 377+ 378+ *ret = a - borrow; 379+ if (*ret > a) 380+ borrow1 = 1; 381+ else 382+ borrow1 = 0; 383+ 384+ t = *ret; 385+ *ret = t - b; 386+ if (*ret > t) 387+ borrow2 = 1; 388+ else 389+ borrow2 = 0; 390+ 391+ return borrow1 + borrow2; 392+} 393+ 394+/* 395+ * place the result of a - b into ret, return the borrow bit. 396+ * All arrays need to be n limbs long 397+ */ 398+static limb_t sub(limb_t *ret, limb_t *a, limb_t *b, size_t n) 399+{ 400+ limb_t borrow = 0; 401+ ossl_ssize_t i; 402+ 403+ for (i = n - 1; i > -1; i--) 404+ borrow = _sub_limb(&ret[i], a[i], b[i], borrow); 405+ 406+ return borrow; 407+} 408+ 409+/* return the number of limbs necessary to allocate for the mod() tmp operand */ 410+static ossl_inline size_t mod_limb_numb(size_t anum, size_t modnum) 411+{ 412+ return (anum + modnum) * 3; 413+} 414+ 415+/* 416+ * calculate a % mod, place the result in ret 417+ * size of a is defined by anum, size of ret and mod is modnum, 418+ * size of tmp is returned by mod_limb_numb() 419+ */ 420+static void mod(limb_t *ret, limb_t *a, size_t anum, limb_t *mod, 421+ size_t modnum, limb_t *tmp) 422+{ 423+ limb_t *atmp, *modtmp, *rettmp; 424+ limb_t res; 425+ size_t i; 426+ 427+ memset(tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE); 428+ 429+ atmp = tmp; 430+ modtmp = &tmp[anum + modnum]; 431+ rettmp = &tmp[(anum + modnum) * 2]; 432+ 433+ for (i = modnum; i <modnum + anum; i++) 434+ atmp[i] = a[i-modnum]; 435+ 436+ for (i = 0; i < modnum; i++) 437+ modtmp[i] = mod[i]; 438+ 439+ for (i = 0; i < anum * LIMB_BIT_SIZE; i++) { 440+ rshift1(modtmp, anum + modnum); 441+ res = sub(rettmp, atmp, modtmp, anum+modnum); 442+ cselect(res, atmp, atmp, rettmp, anum+modnum); 443+ } 444+ 445+ memcpy(ret, &atmp[anum], sizeof(limb_t) * modnum); 446+} 447+ 448+/* necessary size of tmp for a _mul_add_limb() call with provided anum */ 449+static ossl_inline size_t _mul_add_limb_numb(size_t anum) 450+{ 451+ return 2 * (anum + 1); 452+} 453+ 454+/* multiply a by m, add to ret, return carry */ 455+static limb_t _mul_add_limb(limb_t *ret, limb_t *a, size_t anum, 456+ limb_t m, limb_t *tmp) 457+{ 458+ limb_t carry = 0; 459+ limb_t *r_odd, *r_even; 460+ size_t i; 461+ 462+ memset(tmp, 0, sizeof(limb_t) * (anum + 1) * 2); 463+ 464+ r_odd = tmp; 465+ r_even = &tmp[anum + 1]; 466+ 467+ for (i = 0; i < anum; i++) { 468+ /* 469+ * place the results from even and odd limbs in separate arrays 470+ * so that we have to worry about carry just once 471+ */ 472+ if (i % 2 == 0) 473+ _mul_limb(&r_even[i], &r_even[i + 1], a[i], m); 474+ else 475+ _mul_limb(&r_odd[i], &r_odd[i + 1], a[i], m); 476+ } 477+ /* assert: add() carry here will be equal zero */ 478+ add(r_even, r_even, r_odd, anum + 1); 479+ /* 480+ * while here it will not overflow as the max value from multiplication 481+ * is -2 while max overflow from addition is 1, so the max value of 482+ * carry is -1 (i.e. max int) 483+ */ 484+ carry = add(ret, ret, &r_even[1], anum) + r_even[0]; 485+ 486+ return carry; 487+} 488+ 489+static ossl_inline size_t mod_montgomery_limb_numb(size_t modnum) 490+{ 491+ return modnum * 2 + _mul_add_limb_numb(modnum); 492+} 493+ 494+/* 495+ * calculate a % mod, place result in ret 496+ * assumes that a is in Montgomery form with the R (Montgomery modulus) being 497+ * smallest power of two big enough to fit mod and that's also a power 498+ * of the count of number of bits in limb_t (B). 499+ * For calculation, we also need n', such that mod * n' == -1 mod B. 500+ * anum must be <= 2 * modnum 501+ * ret needs to be modnum words long 502+ * tmp needs to be mod_montgomery_limb_numb(modnum) limbs long 503+ */ 504+static void mod_montgomery(limb_t *ret, limb_t *a, size_t anum, limb_t *mod, 505+ size_t modnum, limb_t ni0, limb_t *tmp) 506+{ 507+ limb_t carry, v; 508+ limb_t *res, *rp, *tmp2; 509+ ossl_ssize_t i; 510+ 511+ res = tmp; 512+ /* 513+ * for intermediate result we need an integer twice as long as modulus 514+ * but keep the input in the least significant limbs 515+ */ 516+ memset(res, 0, sizeof(limb_t) * (modnum * 2)); 517+ memcpy(&res[modnum * 2 - anum], a, sizeof(limb_t) * anum); 518+ rp = &res[modnum]; 519+ tmp2 = &res[modnum * 2]; 520+ 521+ carry = 0; 522+ 523+ /* add multiples of the modulus to the value until R divides it cleanly */ 524+ for (i = modnum; i > 0; i--, rp--) { 525+ v = _mul_add_limb(rp, mod, modnum, rp[modnum - 1] * ni0, tmp2); 526+ v = v + carry + rp[-1]; 527+ carry |= (v != rp[-1]); 528+ carry &= (v <= rp[-1]); 529+ rp[-1] = v; 530+ } 531+ 532+ /* perform the final reduction by mod... */ 533+ carry -= sub(ret, rp, mod, modnum); 534+ 535+ /* ...conditionally */ 536+ cselect(carry, ret, rp, ret, modnum); 537+} 538+ 539+/* allocated buffer should be freed afterwards */ 540+static void BN_to_limb(const BIGNUM *bn, limb_t *buf, size_t limbs) 541+{ 542+ int i; 543+ int real_limbs = (BN_num_bytes(bn) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; 544+ limb_t *ptr = buf + (limbs - real_limbs); 545+ 546+ for (i = 0; i < real_limbs; i++) 547+ ptr[i] = bn->d[real_limbs - i - 1]; 548+} 549+ 550+#if LIMB_BYTE_SIZE == 8 551+static ossl_inline uint64_t be64(uint64_t host) 552+{ 553+ const union { 554+ long one; 555+ char little; 556+ } is_endian = { 1 }; 557+ 558+ if (is_endian.little) { 559+ uint64_t big = 0; 560+ 561+ big |= (host & 0xff00000000000000) >> 56; 562+ big |= (host & 0x00ff000000000000) >> 40; 563+ big |= (host & 0x0000ff0000000000) >> 24; 564+ big |= (host & 0x000000ff00000000) >> 8; 565+ big |= (host & 0x00000000ff000000) << 8; 566+ big |= (host & 0x0000000000ff0000) << 24; 567+ big |= (host & 0x000000000000ff00) << 40; 568+ big |= (host & 0x00000000000000ff) << 56; 569+ return big; 570+ } else { 571+ return host; 572+ } 573+} 574+ 575+#else 576+/* Not all platforms have htobe32(). */ 577+static ossl_inline uint32_t be32(uint32_t host) 578+{ 579+ const union { 580+ long one; 581+ char little; 582+ } is_endian = { 1 }; 583+ 584+ if (is_endian.little) { 585+ uint32_t big = 0; 586+ 587+ big |= (host & 0xff000000) >> 24; 588+ big |= (host & 0x00ff0000) >> 8; 589+ big |= (host & 0x0000ff00) << 8; 590+ big |= (host & 0x000000ff) << 24; 591+ return big; 592+ } else { 593+ return host; 594+ } 595+} 596+#endif 597+ 598+/* 599+ * We assume that intermediate, possible_arg2, blinding, and ctx are used 600+ * similar to BN_BLINDING_invert_ex() arguments. 601+ * to_mod is RSA modulus. 602+ * buf and num is the serialization buffer and its length. 603+ * 604+ * Here we use classic/Montgomery multiplication and modulo. After the calculation finished 605+ * we serialize the new structure instead of BIGNUMs taking endianness into account. 606+ */ 607+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, 608+ const BN_BLINDING *blinding, 609+ const BIGNUM *possible_arg2, 610+ const BIGNUM *to_mod, BN_CTX *ctx, 611+ unsigned char *buf, int num) 612+{ 613+ limb_t *l_im = NULL, *l_mul = NULL, *l_mod = NULL; 614+ limb_t *l_ret = NULL, *l_tmp = NULL, l_buf; 615+ size_t l_im_count = 0, l_mul_count = 0, l_size = 0, l_mod_count = 0; 616+ size_t l_tmp_count = 0; 617+ int ret = 0; 618+ size_t i; 619+ unsigned char *tmp; 620+ const BIGNUM *arg1 = intermediate; 621+ const BIGNUM *arg2 = (possible_arg2 == NULL) ? blinding->Ai : possible_arg2; 622+ 623+ l_im_count = (BN_num_bytes(arg1) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; 624+ l_mul_count = (BN_num_bytes(arg2) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; 625+ l_mod_count = (BN_num_bytes(to_mod) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE; 626+ 627+ l_size = l_im_count > l_mul_count ? l_im_count : l_mul_count; 628+ l_im = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); 629+ l_mul = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE); 630+ l_mod = OPENSSL_zalloc(l_mod_count * LIMB_BYTE_SIZE); 631+ 632+ if ((l_im == NULL) || (l_mul == NULL) || (l_mod == NULL)) 633+ goto err; 634+ 635+ BN_to_limb(arg1, l_im, l_size); 636+ BN_to_limb(arg2, l_mul, l_size); 637+ BN_to_limb(to_mod, l_mod, l_mod_count); 638+ 639+ l_ret = OPENSSL_malloc(2 * l_size * LIMB_BYTE_SIZE); 640+ 641+ if (blinding->m_ctx != NULL) { 642+ l_tmp_count = mul_limb_numb(l_size) > mod_montgomery_limb_numb(l_mod_count) ? 643+ mul_limb_numb(l_size) : mod_montgomery_limb_numb(l_mod_count); 644+ l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); 645+ } else { 646+ l_tmp_count = mul_limb_numb(l_size) > mod_limb_numb(2 * l_size, l_mod_count) ? 647+ mul_limb_numb(l_size) : mod_limb_numb(2 * l_size, l_mod_count); 648+ l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE); 649+ } 650+ 651+ if ((l_ret == NULL) || (l_tmp == NULL)) 652+ goto err; 653+ 654+ if (blinding->m_ctx != NULL) { 655+ limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); 656+ mod_montgomery(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, 657+ blinding->m_ctx->n0[0], l_tmp); 658+ } else { 659+ limb_mul(l_ret, l_im, l_mul, l_size, l_tmp); 660+ mod(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, l_tmp); 661+ } 662+ 663+ /* modulus size in bytes can be equal to num but after limbs conversion it becomes bigger */ 664+ if (num < BN_num_bytes(to_mod)) { 665+ BNerr(BN_F_OSSL_BN_RSA_DO_UNBLIND, ERR_R_PASSED_INVALID_ARGUMENT); 666+ goto err; 667+ } 668+ 669+ memset(buf, 0, num); 670+ tmp = buf + num - BN_num_bytes(to_mod); 671+ for (i = 0; i < l_mod_count; i++) { 672+#if LIMB_BYTE_SIZE == 8 673+ l_buf = be64(l_ret[i]); 674+#else 675+ l_buf = be32(l_ret[i]); 676+#endif 677+ if (i == 0) { 678+ int delta = LIMB_BYTE_SIZE - ((l_mod_count * LIMB_BYTE_SIZE) - num); 679+ 680+ memcpy(tmp, ((char *)&l_buf) + LIMB_BYTE_SIZE - delta, delta); 681+ tmp += delta; 682+ } else { 683+ memcpy(tmp, &l_buf, LIMB_BYTE_SIZE); 684+ tmp += LIMB_BYTE_SIZE; 685+ } 686+ } 687+ ret = num; 688+ 689+ err: 690+ OPENSSL_free(l_im); 691+ OPENSSL_free(l_mul); 692+ OPENSSL_free(l_mod); 693+ OPENSSL_free(l_tmp); 694+ OPENSSL_free(l_ret); 695+ 696+ return ret; 697+} 698diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt 699index 9f91a4a811..ba3a46d5b9 100644 700--- a/crypto/err/openssl.txt 701+++ b/crypto/err/openssl.txt 702@@ -1,4 +1,4 @@ 703-# Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. 704+# Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. 705 # 706 # Licensed under the OpenSSL license (the "License"). You may not use 707 # this file except in compliance with the License. You can obtain a copy 708@@ -232,6 +232,7 @@ BN_F_BN_RSHIFT:146:BN_rshift 709 BN_F_BN_SET_WORDS:144:bn_set_words 710 BN_F_BN_STACK_PUSH:148:BN_STACK_push 711 BN_F_BN_USUB:115:BN_usub 712+BN_F_OSSL_BN_RSA_DO_UNBLIND:151:ossl_bn_rsa_do_unblind 713 BUF_F_BUF_MEM_GROW:100:BUF_MEM_grow 714 BUF_F_BUF_MEM_GROW_CLEAN:105:BUF_MEM_grow_clean 715 BUF_F_BUF_MEM_NEW:101:BUF_MEM_new 716diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c 717index b52a66f6a6..6c3c0cf78d 100644 718--- a/crypto/rsa/rsa_ossl.c 719+++ b/crypto/rsa/rsa_ossl.c 720@@ -465,11 +465,20 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from, 721 BN_free(d); 722 } 723 724- if (blinding) 725- if (!rsa_blinding_invert(blinding, ret, unblind, ctx)) 726+ if (blinding) { 727+ /* 728+ * ossl_bn_rsa_do_unblind() combines blinding inversion and 729+ * 0-padded BN BE serialization 730+ */ 731+ j = ossl_bn_rsa_do_unblind(ret, blinding, unblind, rsa->n, ctx, 732+ buf, num); 733+ if (j == 0) 734 goto err; 735- 736- j = BN_bn2binpad(ret, buf, num); 737+ } else { 738+ j = BN_bn2binpad(ret, buf, num); 739+ if (j < 0) 740+ goto err; 741+ } 742 743 switch (padding) { 744 case RSA_PKCS1_PADDING: 745diff --git a/include/crypto/bn.h b/include/crypto/bn.h 746index 60afda1dad..b5f36fb25a 100644 747--- a/include/crypto/bn.h 748+++ b/include/crypto/bn.h 749@@ -86,5 +86,10 @@ int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); 750 int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n); 751 int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, 752 const BIGNUM *d, BN_CTX *ctx); 753+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate, 754+ const BN_BLINDING *blinding, 755+ const BIGNUM *possible_arg2, 756+ const BIGNUM *to_mod, BN_CTX *ctx, 757+ unsigned char *buf, int num); 758 759 #endif 760diff --git a/include/openssl/bnerr.h b/include/openssl/bnerr.h 761index 9f3c7cfaab..a0752cea52 100644 762--- a/include/openssl/bnerr.h 763+++ b/include/openssl/bnerr.h 764@@ -72,6 +72,7 @@ int ERR_load_BN_strings(void); 765 # define BN_F_BN_SET_WORDS 144 766 # define BN_F_BN_STACK_PUSH 148 767 # define BN_F_BN_USUB 115 768+# define BN_F_OSSL_BN_RSA_DO_UNBLIND 151 769 770 /* 771 * BN reason codes.