• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
2index 76fc7ebcff..6e9d239321 100644
3--- a/crypto/bn/bn_blind.c
4+++ b/crypto/bn/bn_blind.c
5@@ -13,20 +13,6 @@
6
7 #define BN_BLINDING_COUNTER     32
8
9-struct bn_blinding_st {
10-    BIGNUM *A;
11-    BIGNUM *Ai;
12-    BIGNUM *e;
13-    BIGNUM *mod;                /* just a reference */
14-    CRYPTO_THREAD_ID tid;
15-    int counter;
16-    unsigned long flags;
17-    BN_MONT_CTX *m_ctx;
18-    int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
19-                       const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
20-    CRYPTO_RWLOCK *lock;
21-};
22-
23 BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
24 {
25     BN_BLINDING *ret = NULL;
26diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
27index dd87c152cf..3dd8d9a568 100644
28--- a/crypto/bn/bn_err.c
29+++ b/crypto/bn/bn_err.c
30@@ -73,6 +73,8 @@ static const ERR_STRING_DATA BN_str_functs[] = {
31     {ERR_PACK(ERR_LIB_BN, BN_F_BN_SET_WORDS, 0), "bn_set_words"},
32     {ERR_PACK(ERR_LIB_BN, BN_F_BN_STACK_PUSH, 0), "BN_STACK_push"},
33     {ERR_PACK(ERR_LIB_BN, BN_F_BN_USUB, 0), "BN_usub"},
34+    {ERR_PACK(ERR_LIB_BN, BN_F_OSSL_BN_RSA_DO_UNBLIND, 0),
35+    "ossl_bn_rsa_do_unblind"},
36     {0, NULL}
37 };
38
39diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
40index 62a969b134..4d8cb64675 100644
41--- a/crypto/bn/bn_local.h
42+++ b/crypto/bn/bn_local.h
43@@ -283,6 +283,20 @@ struct bn_gencb_st {
44     } cb;
45 };
46
47+struct bn_blinding_st {
48+    BIGNUM *A;
49+    BIGNUM *Ai;
50+    BIGNUM *e;
51+    BIGNUM *mod;                /* just a reference */
52+    CRYPTO_THREAD_ID tid;
53+    int counter;
54+    unsigned long flags;
55+    BN_MONT_CTX *m_ctx;
56+    int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
57+                       const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
58+    CRYPTO_RWLOCK *lock;
59+};
60+
61 /*-
62  * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
63  *
64diff --git a/crypto/bn/build.info b/crypto/bn/build.info
65index b9ed5322fa..c9fe2fdada 100644
66--- a/crypto/bn/build.info
67+++ b/crypto/bn/build.info
68@@ -5,7 +5,8 @@ SOURCE[../../libcrypto]=\
69         bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c \
70         {- $target{bn_asm_src} -} \
71         bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
72-        bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c
73+        bn_depr.c bn_const.c bn_x931p.c bn_intern.c bn_dh.c bn_srp.c \
74+        rsa_sup_mul.c
75
76 INCLUDE[bn_exp.o]=..
77
78diff --git a/crypto/bn/rsa_sup_mul.c b/crypto/bn/rsa_sup_mul.c
79new file mode 100644
80index 0000000000..acafefd5fe
81--- /dev/null
82+++ b/crypto/bn/rsa_sup_mul.c
83@@ -0,0 +1,614 @@
84+#include <openssl/e_os2.h>
85+#include <stddef.h>
86+#include <sys/types.h>
87+#include <string.h>
88+#include <openssl/bn.h>
89+#include <openssl/err.h>
90+#include <openssl/rsaerr.h>
91+#include "internal/numbers.h"
92+#include "internal/constant_time.h"
93+#include "bn_local.h"
94+
95+# if BN_BYTES == 8
96+typedef uint64_t limb_t;
97+#  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
98+/* nonstandard; implemented by gcc on 64-bit platforms */
99+typedef __uint128_t limb2_t;
100+#   define HAVE_LIMB2_T
101+#  endif
102+#  define LIMB_BIT_SIZE 64
103+#  define LIMB_BYTE_SIZE 8
104+# elif BN_BYTES == 4
105+typedef uint32_t limb_t;
106+typedef uint64_t limb2_t;
107+#  define LIMB_BIT_SIZE 32
108+#  define LIMB_BYTE_SIZE 4
109+#  define HAVE_LIMB2_T
110+# else
111+#  error "Not supported"
112+# endif
113+
114+/*
115+ * For multiplication we're using schoolbook multiplication,
116+ * so if we have two numbers, each with 6 "digits" (words)
117+ * the multiplication is calculated as follows:
118+ *                        A B C D E F
119+ *                     x  I J K L M N
120+ *                     --------------
121+ *                                N*F
122+ *                              N*E
123+ *                            N*D
124+ *                          N*C
125+ *                        N*B
126+ *                      N*A
127+ *                              M*F
128+ *                            M*E
129+ *                          M*D
130+ *                        M*C
131+ *                      M*B
132+ *                    M*A
133+ *                            L*F
134+ *                          L*E
135+ *                        L*D
136+ *                      L*C
137+ *                    L*B
138+ *                  L*A
139+ *                          K*F
140+ *                        K*E
141+ *                      K*D
142+ *                    K*C
143+ *                  K*B
144+ *                K*A
145+ *                        J*F
146+ *                      J*E
147+ *                    J*D
148+ *                  J*C
149+ *                J*B
150+ *              J*A
151+ *                      I*F
152+ *                    I*E
153+ *                  I*D
154+ *                I*C
155+ *              I*B
156+ *         +  I*A
157+ *         ==========================
158+ *                        N*B N*D N*F
159+ *                    + N*A N*C N*E
160+ *                    + M*B M*D M*F
161+ *                  + M*A M*C M*E
162+ *                  + L*B L*D L*F
163+ *                + L*A L*C L*E
164+ *                + K*B K*D K*F
165+ *              + K*A K*C K*E
166+ *              + J*B J*D J*F
167+ *            + J*A J*C J*E
168+ *            + I*B I*D I*F
169+ *          + I*A I*C I*E
170+ *
171+ *                1+1 1+3 1+5
172+ *              1+0 1+2 1+4
173+ *              0+1 0+3 0+5
174+ *            0+0 0+2 0+4
175+ *
176+ *            0 1 2 3 4 5 6
177+ * which requires n^2 multiplications and 2n full length additions
178+ * as we can keep every other result of limb multiplication in two separate
179+ * limbs
180+ */
181+
182+#if defined HAVE_LIMB2_T
183+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
184+{
185+    limb2_t t;
186+    /*
187+     * this is idiomatic code to tell compiler to use the native mul
188+     * those three lines will actually compile to single instruction
189+     */
190+
191+    t = (limb2_t)a * b;
192+    *hi = t >> LIMB_BIT_SIZE;
193+    *lo = (limb_t)t;
194+}
195+#elif (BN_BYTES == 8) && (defined _MSC_VER)
196+/* https://learn.microsoft.com/en-us/cpp/intrinsics/umul128?view=msvc-170 */
197+#pragma intrinsic(_umul128)
198+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
199+{
200+    *lo = _umul128(a, b, hi);
201+}
202+#else
203+/*
204+ * if the compiler doesn't have either a 128bit data type nor a "return
205+ * high 64 bits of multiplication"
206+ */
207+static ossl_inline void _mul_limb(limb_t *hi, limb_t *lo, limb_t a, limb_t b)
208+{
209+    limb_t a_low = (limb_t)(uint32_t)a;
210+    limb_t a_hi = a >> 32;
211+    limb_t b_low = (limb_t)(uint32_t)b;
212+    limb_t b_hi = b >> 32;
213+
214+    limb_t p0 = a_low * b_low;
215+    limb_t p1 = a_low * b_hi;
216+    limb_t p2 = a_hi * b_low;
217+    limb_t p3 = a_hi * b_hi;
218+
219+    uint32_t cy = (uint32_t)(((p0 >> 32) + (uint32_t)p1 + (uint32_t)p2) >> 32);
220+
221+    *lo = p0 + (p1 << 32) + (p2 << 32);
222+    *hi = p3 + (p1 >> 32) + (p2 >> 32) + cy;
223+}
224+#endif
225+
226+/* add two limbs with carry in, return carry out */
227+static ossl_inline limb_t _add_limb(limb_t *ret, limb_t a, limb_t b, limb_t carry)
228+{
229+    limb_t carry1, carry2, t;
230+    /*
231+     * `c = a + b; if (c < a)` is idiomatic code that makes compilers
232+     * use add with carry on assembly level
233+     */
234+
235+    *ret = a + carry;
236+    if (*ret < a)
237+        carry1 = 1;
238+    else
239+        carry1 = 0;
240+
241+    t = *ret;
242+    *ret = t + b;
243+    if (*ret < t)
244+        carry2 = 1;
245+    else
246+        carry2 = 0;
247+
248+    return carry1 + carry2;
249+}
250+
251+/*
252+ * add two numbers of the same size, return overflow
253+ *
254+ * add a to b, place result in ret; all arrays need to be n limbs long
255+ * return overflow from addition (0 or 1)
256+ */
257+static ossl_inline limb_t add(limb_t *ret, limb_t *a, limb_t *b, size_t n)
258+{
259+    limb_t c = 0;
260+    ossl_ssize_t i;
261+
262+    for(i = n - 1; i > -1; i--)
263+        c = _add_limb(&ret[i], a[i], b[i], c);
264+
265+    return c;
266+}
267+
268+/*
269+ * return number of limbs necessary for temporary values
270+ * when multiplying numbers n limbs large
271+ */
272+static ossl_inline size_t mul_limb_numb(size_t n)
273+{
274+    return  2 * n * 2;
275+}
276+
277+/*
278+ * multiply two numbers of the same size
279+ *
280+ * multiply a by b, place result in ret; a and b need to be n limbs long
281+ * ret needs to be 2*n limbs long, tmp needs to be mul_limb_numb(n) limbs
282+ * long
283+ */
284+static void limb_mul(limb_t *ret, limb_t *a, limb_t *b, size_t n, limb_t *tmp)
285+{
286+    limb_t *r_odd, *r_even;
287+    size_t i, j, k;
288+
289+    r_odd = tmp;
290+    r_even = &tmp[2 * n];
291+
292+    memset(ret, 0, 2 * n * sizeof(limb_t));
293+
294+    for (i = 0; i < n; i++) {
295+        for (k = 0; k < i + n + 1; k++) {
296+            r_even[k] = 0;
297+            r_odd[k] = 0;
298+        }
299+        for (j = 0; j < n; j++) {
300+            /*
301+             * place results from even and odd limbs in separate arrays so that
302+             * we don't have to calculate overflow every time we get individual
303+             * limb multiplication result
304+             */
305+            if (j % 2 == 0)
306+                _mul_limb(&r_even[i + j], &r_even[i + j + 1], a[i], b[j]);
307+            else
308+                _mul_limb(&r_odd[i + j], &r_odd[i + j + 1], a[i], b[j]);
309+        }
310+        /*
311+         * skip the least significant limbs when adding multiples of
312+         * more significant limbs (they're zero anyway)
313+         */
314+        add(ret, ret, r_even, n + i + 1);
315+        add(ret, ret, r_odd, n + i + 1);
316+    }
317+}
318+
319+/* modifies the value in place by performing a right shift by one bit */
320+static ossl_inline void rshift1(limb_t *val, size_t n)
321+{
322+    limb_t shift_in = 0, shift_out = 0;
323+    size_t i;
324+
325+    for (i = 0; i < n; i++) {
326+        shift_out = val[i] & 1;
327+        val[i] = shift_in << (LIMB_BIT_SIZE - 1) | (val[i] >> 1);
328+        shift_in = shift_out;
329+    }
330+}
331+
332+/* extend the LSB of flag to all bits of limb */
333+static ossl_inline limb_t mk_mask(limb_t flag)
334+{
335+    flag |= flag << 1;
336+    flag |= flag << 2;
337+    flag |= flag << 4;
338+    flag |= flag << 8;
339+    flag |= flag << 16;
340+#if (LIMB_BYTE_SIZE == 8)
341+    flag |= flag << 32;
342+#endif
343+    return flag;
344+}
345+
346+/*
347+ * copy from either a or b to ret based on flag
348+ * when flag == 0, then copies from b
349+ * when flag == 1, then copies from a
350+ */
351+static ossl_inline void cselect(limb_t flag, limb_t *ret, limb_t *a, limb_t *b, size_t n)
352+{
353+    /*
354+     * would be more efficient with non volatile mask, but then gcc
355+     * generates code with jumps
356+     */
357+    volatile limb_t mask;
358+    size_t i;
359+
360+    mask = mk_mask(flag);
361+    for (i = 0; i < n; i++) {
362+#if (LIMB_BYTE_SIZE == 8)
363+        ret[i] = constant_time_select_64(mask, a[i], b[i]);
364+#else
365+        ret[i] = constant_time_select_32(mask, a[i], b[i]);
366+#endif
367+    }
368+}
369+
370+static limb_t _sub_limb(limb_t *ret, limb_t a, limb_t b, limb_t borrow)
371+{
372+    limb_t borrow1, borrow2, t;
373+    /*
374+     * while it doesn't look constant-time, this is idiomatic code
375+     * to tell compilers to use the carry bit from subtraction
376+     */
377+
378+    *ret = a - borrow;
379+    if (*ret > a)
380+        borrow1 = 1;
381+    else
382+        borrow1 = 0;
383+
384+    t = *ret;
385+    *ret = t - b;
386+    if (*ret > t)
387+        borrow2 = 1;
388+    else
389+        borrow2 = 0;
390+
391+    return borrow1 + borrow2;
392+}
393+
394+/*
395+ * place the result of a - b into ret, return the borrow bit.
396+ * All arrays need to be n limbs long
397+ */
398+static limb_t sub(limb_t *ret, limb_t *a, limb_t *b, size_t n)
399+{
400+    limb_t borrow = 0;
401+    ossl_ssize_t i;
402+
403+    for (i = n - 1; i > -1; i--)
404+        borrow = _sub_limb(&ret[i], a[i], b[i], borrow);
405+
406+    return borrow;
407+}
408+
409+/* return the number of limbs necessary to allocate for the mod() tmp operand */
410+static ossl_inline size_t mod_limb_numb(size_t anum, size_t modnum)
411+{
412+    return (anum + modnum) * 3;
413+}
414+
415+/*
416+ * calculate a % mod, place the result in ret
417+ * size of a is defined by anum, size of ret and mod is modnum,
418+ * size of tmp is returned by mod_limb_numb()
419+ */
420+static void mod(limb_t *ret, limb_t *a, size_t anum, limb_t *mod,
421+               size_t modnum, limb_t *tmp)
422+{
423+    limb_t *atmp, *modtmp, *rettmp;
424+    limb_t res;
425+    size_t i;
426+
427+    memset(tmp, 0, mod_limb_numb(anum, modnum) * LIMB_BYTE_SIZE);
428+
429+    atmp = tmp;
430+    modtmp = &tmp[anum + modnum];
431+    rettmp = &tmp[(anum + modnum) * 2];
432+
433+    for (i = modnum; i <modnum + anum; i++)
434+        atmp[i] = a[i-modnum];
435+
436+    for (i = 0; i < modnum; i++)
437+        modtmp[i] = mod[i];
438+
439+    for (i = 0; i < anum * LIMB_BIT_SIZE; i++) {
440+        rshift1(modtmp, anum + modnum);
441+        res = sub(rettmp, atmp, modtmp, anum+modnum);
442+        cselect(res, atmp, atmp, rettmp, anum+modnum);
443+    }
444+
445+    memcpy(ret, &atmp[anum], sizeof(limb_t) * modnum);
446+}
447+
448+/* necessary size of tmp for a _mul_add_limb() call with provided anum */
449+static ossl_inline size_t _mul_add_limb_numb(size_t anum)
450+{
451+    return 2 * (anum + 1);
452+}
453+
454+/* multiply a by m, add to ret, return carry */
455+static limb_t _mul_add_limb(limb_t *ret, limb_t *a, size_t anum,
456+                           limb_t m, limb_t *tmp)
457+{
458+    limb_t carry = 0;
459+    limb_t *r_odd, *r_even;
460+    size_t i;
461+
462+    memset(tmp, 0, sizeof(limb_t) * (anum + 1) * 2);
463+
464+    r_odd = tmp;
465+    r_even = &tmp[anum + 1];
466+
467+    for (i = 0; i < anum; i++) {
468+        /*
469+         * place the results from even and odd limbs in separate arrays
470+         * so that we have to worry about carry just once
471+         */
472+        if (i % 2 == 0)
473+            _mul_limb(&r_even[i], &r_even[i + 1], a[i], m);
474+        else
475+            _mul_limb(&r_odd[i], &r_odd[i + 1], a[i], m);
476+    }
477+    /* assert: add() carry here will be equal zero */
478+    add(r_even, r_even, r_odd, anum + 1);
479+    /*
480+     * while here it will not overflow as the max value from multiplication
481+     * is -2 while max overflow from addition is 1, so the max value of
482+     * carry is -1 (i.e. max int)
483+     */
484+    carry = add(ret, ret, &r_even[1], anum) + r_even[0];
485+
486+    return carry;
487+}
488+
489+static ossl_inline size_t mod_montgomery_limb_numb(size_t modnum)
490+{
491+    return modnum * 2 + _mul_add_limb_numb(modnum);
492+}
493+
494+/*
495+ * calculate a % mod, place result in ret
496+ * assumes that a is in Montgomery form with the R (Montgomery modulus) being
497+ * smallest power of two big enough to fit mod and that's also a power
498+ * of the count of number of bits in limb_t (B).
499+ * For calculation, we also need n', such that mod * n' == -1 mod B.
500+ * anum must be <= 2 * modnum
501+ * ret needs to be modnum words long
502+ * tmp needs to be mod_montgomery_limb_numb(modnum) limbs long
503+ */
504+static void mod_montgomery(limb_t *ret, limb_t *a, size_t anum, limb_t *mod,
505+                          size_t modnum, limb_t ni0, limb_t *tmp)
506+{
507+    limb_t carry, v;
508+    limb_t *res, *rp, *tmp2;
509+    ossl_ssize_t i;
510+
511+    res = tmp;
512+    /*
513+     * for intermediate result we need an integer twice as long as modulus
514+     * but keep the input in the least significant limbs
515+     */
516+    memset(res, 0, sizeof(limb_t) * (modnum * 2));
517+    memcpy(&res[modnum * 2 - anum], a, sizeof(limb_t) * anum);
518+    rp = &res[modnum];
519+    tmp2 = &res[modnum * 2];
520+
521+    carry = 0;
522+
523+    /* add multiples of the modulus to the value until R divides it cleanly */
524+    for (i = modnum; i > 0; i--, rp--) {
525+        v = _mul_add_limb(rp, mod, modnum, rp[modnum - 1] * ni0, tmp2);
526+        v = v + carry + rp[-1];
527+        carry |= (v != rp[-1]);
528+        carry &= (v <= rp[-1]);
529+        rp[-1] = v;
530+    }
531+
532+    /* perform the final reduction by mod... */
533+    carry -= sub(ret, rp, mod, modnum);
534+
535+    /* ...conditionally */
536+    cselect(carry, ret, rp, ret, modnum);
537+}
538+
539+/* allocated buffer should be freed afterwards */
540+static void BN_to_limb(const BIGNUM *bn, limb_t *buf, size_t limbs)
541+{
542+    int i;
543+    int real_limbs = (BN_num_bytes(bn) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
544+    limb_t *ptr = buf + (limbs - real_limbs);
545+
546+    for (i = 0; i < real_limbs; i++)
547+         ptr[i] = bn->d[real_limbs - i - 1];
548+}
549+
550+#if LIMB_BYTE_SIZE == 8
551+static ossl_inline uint64_t be64(uint64_t host)
552+{
553+    const union {
554+        long one;
555+        char little;
556+    } is_endian = { 1 };
557+
558+    if (is_endian.little) {
559+        uint64_t big = 0;
560+
561+        big |= (host & 0xff00000000000000) >> 56;
562+        big |= (host & 0x00ff000000000000) >> 40;
563+        big |= (host & 0x0000ff0000000000) >> 24;
564+        big |= (host & 0x000000ff00000000) >>  8;
565+        big |= (host & 0x00000000ff000000) <<  8;
566+        big |= (host & 0x0000000000ff0000) << 24;
567+        big |= (host & 0x000000000000ff00) << 40;
568+        big |= (host & 0x00000000000000ff) << 56;
569+        return big;
570+    } else {
571+        return host;
572+    }
573+}
574+
575+#else
576+/* Not all platforms have htobe32(). */
577+static ossl_inline uint32_t be32(uint32_t host)
578+{
579+    const union {
580+        long one;
581+        char little;
582+    } is_endian = { 1 };
583+
584+    if (is_endian.little) {
585+        uint32_t big = 0;
586+
587+        big |= (host & 0xff000000) >> 24;
588+        big |= (host & 0x00ff0000) >> 8;
589+        big |= (host & 0x0000ff00) << 8;
590+        big |= (host & 0x000000ff) << 24;
591+        return big;
592+    } else {
593+        return host;
594+    }
595+}
596+#endif
597+
598+/*
599+ * We assume that intermediate, possible_arg2, blinding, and ctx are used
600+ * similar to BN_BLINDING_invert_ex() arguments.
601+ * to_mod is RSA modulus.
602+ * buf and num is the serialization buffer and its length.
603+ *
604+ * Here we use classic/Montgomery multiplication and modulo. After the calculation finished
605+ * we serialize the new structure instead of BIGNUMs taking endianness into account.
606+ */
607+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate,
608+                           const BN_BLINDING *blinding,
609+                           const BIGNUM *possible_arg2,
610+                           const BIGNUM *to_mod, BN_CTX *ctx,
611+                           unsigned char *buf, int num)
612+{
613+    limb_t *l_im = NULL, *l_mul = NULL, *l_mod = NULL;
614+    limb_t *l_ret = NULL, *l_tmp = NULL, l_buf;
615+    size_t l_im_count = 0, l_mul_count = 0, l_size = 0, l_mod_count = 0;
616+    size_t l_tmp_count = 0;
617+    int ret = 0;
618+    size_t i;
619+    unsigned char *tmp;
620+    const BIGNUM *arg1 = intermediate;
621+    const BIGNUM *arg2 = (possible_arg2 == NULL) ? blinding->Ai : possible_arg2;
622+
623+    l_im_count  = (BN_num_bytes(arg1)   + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
624+    l_mul_count = (BN_num_bytes(arg2)   + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
625+    l_mod_count = (BN_num_bytes(to_mod) + LIMB_BYTE_SIZE - 1) / LIMB_BYTE_SIZE;
626+
627+    l_size = l_im_count > l_mul_count ? l_im_count : l_mul_count;
628+    l_im  = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE);
629+    l_mul = OPENSSL_zalloc(l_size * LIMB_BYTE_SIZE);
630+    l_mod = OPENSSL_zalloc(l_mod_count * LIMB_BYTE_SIZE);
631+
632+    if ((l_im == NULL) || (l_mul == NULL) || (l_mod == NULL))
633+        goto err;
634+
635+    BN_to_limb(arg1,   l_im,  l_size);
636+    BN_to_limb(arg2,   l_mul, l_size);
637+    BN_to_limb(to_mod, l_mod, l_mod_count);
638+
639+    l_ret = OPENSSL_malloc(2 * l_size * LIMB_BYTE_SIZE);
640+
641+    if (blinding->m_ctx != NULL) {
642+        l_tmp_count = mul_limb_numb(l_size) > mod_montgomery_limb_numb(l_mod_count) ?
643+                      mul_limb_numb(l_size) : mod_montgomery_limb_numb(l_mod_count);
644+        l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE);
645+    } else {
646+        l_tmp_count = mul_limb_numb(l_size) > mod_limb_numb(2 * l_size, l_mod_count) ?
647+                      mul_limb_numb(l_size) : mod_limb_numb(2 * l_size, l_mod_count);
648+        l_tmp = OPENSSL_malloc(l_tmp_count * LIMB_BYTE_SIZE);
649+    }
650+
651+    if ((l_ret == NULL) || (l_tmp == NULL))
652+        goto err;
653+
654+    if (blinding->m_ctx != NULL) {
655+        limb_mul(l_ret, l_im, l_mul, l_size, l_tmp);
656+        mod_montgomery(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count,
657+                       blinding->m_ctx->n0[0], l_tmp);
658+    } else {
659+        limb_mul(l_ret, l_im, l_mul, l_size, l_tmp);
660+        mod(l_ret, l_ret, 2 * l_size, l_mod, l_mod_count, l_tmp);
661+    }
662+
663+    /* modulus size in bytes can be equal to num but after limbs conversion it becomes bigger */
664+    if (num < BN_num_bytes(to_mod)) {
665+        BNerr(BN_F_OSSL_BN_RSA_DO_UNBLIND, ERR_R_PASSED_INVALID_ARGUMENT);
666+        goto err;
667+    }
668+
669+    memset(buf, 0, num);
670+    tmp = buf + num - BN_num_bytes(to_mod);
671+    for (i = 0; i < l_mod_count; i++) {
672+#if LIMB_BYTE_SIZE == 8
673+        l_buf = be64(l_ret[i]);
674+#else
675+        l_buf = be32(l_ret[i]);
676+#endif
677+        if (i == 0) {
678+            int delta = LIMB_BYTE_SIZE - ((l_mod_count * LIMB_BYTE_SIZE) - num);
679+
680+            memcpy(tmp, ((char *)&l_buf) + LIMB_BYTE_SIZE - delta, delta);
681+            tmp += delta;
682+        } else {
683+            memcpy(tmp, &l_buf, LIMB_BYTE_SIZE);
684+            tmp += LIMB_BYTE_SIZE;
685+        }
686+    }
687+    ret = num;
688+
689+ err:
690+    OPENSSL_free(l_im);
691+    OPENSSL_free(l_mul);
692+    OPENSSL_free(l_mod);
693+    OPENSSL_free(l_tmp);
694+    OPENSSL_free(l_ret);
695+
696+    return ret;
697+}
698diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt
699index 9f91a4a811..ba3a46d5b9 100644
700--- a/crypto/err/openssl.txt
701+++ b/crypto/err/openssl.txt
702@@ -1,4 +1,4 @@
703-# Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
704+# Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved.
705 #
706 # Licensed under the OpenSSL license (the "License").  You may not use
707 # this file except in compliance with the License.  You can obtain a copy
708@@ -232,6 +232,7 @@ BN_F_BN_RSHIFT:146:BN_rshift
709 BN_F_BN_SET_WORDS:144:bn_set_words
710 BN_F_BN_STACK_PUSH:148:BN_STACK_push
711 BN_F_BN_USUB:115:BN_usub
712+BN_F_OSSL_BN_RSA_DO_UNBLIND:151:ossl_bn_rsa_do_unblind
713 BUF_F_BUF_MEM_GROW:100:BUF_MEM_grow
714 BUF_F_BUF_MEM_GROW_CLEAN:105:BUF_MEM_grow_clean
715 BUF_F_BUF_MEM_NEW:101:BUF_MEM_new
716diff --git a/crypto/rsa/rsa_ossl.c b/crypto/rsa/rsa_ossl.c
717index b52a66f6a6..6c3c0cf78d 100644
718--- a/crypto/rsa/rsa_ossl.c
719+++ b/crypto/rsa/rsa_ossl.c
720@@ -465,11 +465,20 @@ static int rsa_ossl_private_decrypt(int flen, const unsigned char *from,
721         BN_free(d);
722     }
723
724-    if (blinding)
725-        if (!rsa_blinding_invert(blinding, ret, unblind, ctx))
726+    if (blinding) {
727+        /*
728+         * ossl_bn_rsa_do_unblind() combines blinding inversion and
729+         * 0-padded BN BE serialization
730+         */
731+        j = ossl_bn_rsa_do_unblind(ret, blinding, unblind, rsa->n, ctx,
732+                                   buf, num);
733+        if (j == 0)
734             goto err;
735-
736-    j = BN_bn2binpad(ret, buf, num);
737+    } else {
738+        j = BN_bn2binpad(ret, buf, num);
739+        if (j < 0)
740+            goto err;
741+    }
742
743     switch (padding) {
744     case RSA_PKCS1_PADDING:
745diff --git a/include/crypto/bn.h b/include/crypto/bn.h
746index 60afda1dad..b5f36fb25a 100644
747--- a/include/crypto/bn.h
748+++ b/include/crypto/bn.h
749@@ -86,5 +86,10 @@ int bn_lshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
750 int bn_rshift_fixed_top(BIGNUM *r, const BIGNUM *a, int n);
751 int bn_div_fixed_top(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
752                      const BIGNUM *d, BN_CTX *ctx);
753+int ossl_bn_rsa_do_unblind(const BIGNUM *intermediate,
754+                           const BN_BLINDING *blinding,
755+                           const BIGNUM *possible_arg2,
756+                           const BIGNUM *to_mod, BN_CTX *ctx,
757+                           unsigned char *buf, int num);
758
759 #endif
760diff --git a/include/openssl/bnerr.h b/include/openssl/bnerr.h
761index 9f3c7cfaab..a0752cea52 100644
762--- a/include/openssl/bnerr.h
763+++ b/include/openssl/bnerr.h
764@@ -72,6 +72,7 @@ int ERR_load_BN_strings(void);
765 # define BN_F_BN_SET_WORDS                                144
766 # define BN_F_BN_STACK_PUSH                               148
767 # define BN_F_BN_USUB                                     115
768+# define BN_F_OSSL_BN_RSA_DO_UNBLIND                      151
769
770 /*
771  * BN reason codes.