• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Math module -- standard C math library functions, pi and e */
2 
3 /* Here are some comments from Tim Peters, extracted from the
4    discussion attached to http://bugs.python.org/issue1640.  They
5    describe the general aims of the math module with respect to
6    special values, IEEE-754 floating-point exceptions, and Python
7    exceptions.
8 
9 These are the "spirit of 754" rules:
10 
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14 
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18 
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign).  This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions.  In that specific case, the
25 sign of the zero determines the result of 1/0.
26 
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30 
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33 
34 For #1, raise OverflowError.
35 
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38 
39 For #3 and #4, raise ValueError.  It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42 
43 */
44 
45 /*
46    In general, on an IEEE-754 platform the aim is to follow the C99
47    standard, including Annex 'F', whenever possible.  Where the
48    standard recommends raising the 'divide-by-zero' or 'invalid'
49    floating-point exceptions, Python should raise a ValueError.  Where
50    the standard recommends raising 'overflow', Python should raise an
51    OverflowError.  In all other circumstances a value should be
52    returned.
53  */
54 
55 #ifndef Py_BUILD_CORE_BUILTIN
56 #  define Py_BUILD_CORE_MODULE 1
57 #endif
58 
59 #include "Python.h"
60 #include "pycore_abstract.h"      // _PyNumber_Index()
61 #include "pycore_bitutils.h"      // _Py_bit_length()
62 #include "pycore_call.h"          // _PyObject_CallNoArgs()
63 #include "pycore_long.h"          // _PyLong_GetZero()
64 #include "pycore_moduleobject.h"  // _PyModule_GetState()
65 #include "pycore_object.h"        // _PyObject_LookupSpecial()
66 #include "pycore_pymath.h"        // _PY_SHORT_FLOAT_REPR
67 /* For DBL_EPSILON in _math.h */
68 #include <float.h>
69 /* For _Py_log1p with workarounds for buggy handling of zeros. */
70 #include "_math.h"
71 #include <stdbool.h>
72 
73 #include "clinic/mathmodule.c.h"
74 
75 /*[clinic input]
76 module math
77 [clinic start generated code]*/
78 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
79 
80 
81 typedef struct {
82     PyObject *str___ceil__;
83     PyObject *str___floor__;
84     PyObject *str___trunc__;
85 } math_module_state;
86 
87 static inline math_module_state*
get_math_module_state(PyObject * module)88 get_math_module_state(PyObject *module)
89 {
90     void *state = _PyModule_GetState(module);
91     assert(state != NULL);
92     return (math_module_state *)state;
93 }
94 
95 /*
96 Double and triple length extended precision algorithms from:
97 
98   Accurate Sum and Dot Product
99   by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
100   https://doi.org/10.1137/030601818
101   https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
102 
103 */
104 
105 typedef struct{ double hi; double lo; } DoubleLength;
106 
107 static DoubleLength
dl_fast_sum(double a,double b)108 dl_fast_sum(double a, double b)
109 {
110     /* Algorithm 1.1. Compensated summation of two floating-point numbers. */
111     assert(fabs(a) >= fabs(b));
112     double x = a + b;
113     double y = (a - x) + b;
114     return (DoubleLength) {x, y};
115 }
116 
117 static DoubleLength
dl_sum(double a,double b)118 dl_sum(double a, double b)
119 {
120     /* Algorithm 3.1 Error-free transformation of the sum */
121     double x = a + b;
122     double z = x - a;
123     double y = (a - (x - z)) + (b - z);
124     return (DoubleLength) {x, y};
125 }
126 
127 #ifndef UNRELIABLE_FMA
128 
129 static DoubleLength
dl_mul(double x,double y)130 dl_mul(double x, double y)
131 {
132     /* Algorithm 3.5. Error-free transformation of a product */
133     double z = x * y;
134     double zz = fma(x, y, -z);
135     return (DoubleLength) {z, zz};
136 }
137 
138 #else
139 
140 /*
141    The default implementation of dl_mul() depends on the C math library
142    having an accurate fma() function as required by § 7.12.13.1 of the
143    C99 standard.
144 
145    The UNRELIABLE_FMA option is provided as a slower but accurate
146    alternative for builds where the fma() function is found wanting.
147    The speed penalty may be modest (17% slower on an Apple M1 Max),
148    so don't hesitate to enable this build option.
149 
150    The algorithms are from the T. J. Dekker paper:
151    A Floating-Point Technique for Extending the Available Precision
152    https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
153 */
154 
155 static DoubleLength
dl_split(double x)156 dl_split(double x) {
157     // Dekker (5.5) and (5.6).
158     double t = x * 134217729.0;  // Veltkamp constant = 2.0 ** 27 + 1
159     double hi = t - (t - x);
160     double lo = x - hi;
161     return (DoubleLength) {hi, lo};
162 }
163 
164 static DoubleLength
dl_mul(double x,double y)165 dl_mul(double x, double y)
166 {
167     // Dekker (5.12) and mul12()
168     DoubleLength xx = dl_split(x);
169     DoubleLength yy = dl_split(y);
170     double p = xx.hi * yy.hi;
171     double q = xx.hi * yy.lo + xx.lo * yy.hi;
172     double z = p + q;
173     double zz = p - z + q + xx.lo * yy.lo;
174     return (DoubleLength) {z, zz};
175 }
176 
177 #endif
178 
179 typedef struct { double hi; double lo; double tiny; } TripleLength;
180 
181 static const TripleLength tl_zero = {0.0, 0.0, 0.0};
182 
183 static TripleLength
tl_fma(double x,double y,TripleLength total)184 tl_fma(double x, double y, TripleLength total)
185 {
186     /* Algorithm 5.10 with SumKVert for K=3 */
187     DoubleLength pr = dl_mul(x, y);
188     DoubleLength sm = dl_sum(total.hi, pr.hi);
189     DoubleLength r1 = dl_sum(total.lo, pr.lo);
190     DoubleLength r2 = dl_sum(r1.hi, sm.lo);
191     return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
192 }
193 
194 static double
tl_to_d(TripleLength total)195 tl_to_d(TripleLength total)
196 {
197     DoubleLength last = dl_sum(total.lo, total.hi);
198     return total.tiny + last.lo + last.hi;
199 }
200 
201 
202 /*
203    sin(pi*x), giving accurate results for all finite x (especially x
204    integral or close to an integer).  This is here for use in the
205    reflection formula for the gamma function.  It conforms to IEEE
206    754-2008 for finite arguments, but not for infinities or nans.
207 */
208 
209 static const double pi = 3.141592653589793238462643383279502884197;
210 static const double logpi = 1.144729885849400174143427351353058711647;
211 
212 /* Version of PyFloat_AsDouble() with in-line fast paths
213    for exact floats and integers.  Gives a substantial
214    speed improvement for extracting float arguments.
215 */
216 
217 #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
218     if (PyFloat_CheckExact(obj)) {                         \
219         target_var = PyFloat_AS_DOUBLE(obj);               \
220     }                                                      \
221     else if (PyLong_CheckExact(obj)) {                     \
222         target_var = PyLong_AsDouble(obj);                 \
223         if (target_var == -1.0 && PyErr_Occurred()) {      \
224             goto error_label;                              \
225         }                                                  \
226     }                                                      \
227     else {                                                 \
228         target_var = PyFloat_AsDouble(obj);                \
229         if (target_var == -1.0 && PyErr_Occurred()) {      \
230             goto error_label;                              \
231         }                                                  \
232     }
233 
234 static double
m_sinpi(double x)235 m_sinpi(double x)
236 {
237     double y, r;
238     int n;
239     /* this function should only ever be called for finite arguments */
240     assert(Py_IS_FINITE(x));
241     y = fmod(fabs(x), 2.0);
242     n = (int)round(2.0*y);
243     assert(0 <= n && n <= 4);
244     switch (n) {
245     case 0:
246         r = sin(pi*y);
247         break;
248     case 1:
249         r = cos(pi*(y-0.5));
250         break;
251     case 2:
252         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
253            -0.0 instead of 0.0 when y == 1.0. */
254         r = sin(pi*(1.0-y));
255         break;
256     case 3:
257         r = -cos(pi*(y-1.5));
258         break;
259     case 4:
260         r = sin(pi*(y-2.0));
261         break;
262     default:
263         Py_UNREACHABLE();
264     }
265     return copysign(1.0, x)*r;
266 }
267 
268 /* Implementation of the real gamma function.  Kept here to work around
269    issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
270    on various platforms (Windows, MacOS).  In extensive but non-exhaustive
271    random tests, this function proved accurate to within <= 10 ulps across the
272    entire float domain.  Note that accuracy may depend on the quality of the
273    system math functions, the pow function in particular.  Special cases
274    follow C99 annex F.  The parameters and method are tailored to platforms
275    whose double format is the IEEE 754 binary64 format.
276 
277    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
278    and g=6.024680040776729583740234375; these parameters are amongst those
279    used by the Boost library.  Following Boost (again), we re-express the
280    Lanczos sum as a rational function, and compute it that way.  The
281    coefficients below were computed independently using MPFR, and have been
282    double-checked against the coefficients in the Boost source code.
283 
284    For x < 0.0 we use the reflection formula.
285 
286    There's one minor tweak that deserves explanation: Lanczos' formula for
287    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
288    values, x+g-0.5 can be represented exactly.  However, in cases where it
289    can't be represented exactly the small error in x+g-0.5 can be magnified
290    significantly by the pow and exp calls, especially for large x.  A cheap
291    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
292    involved in the computation of x+g-0.5 (that is, e = computed value of
293    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
294 
295    Correction factor
296    -----------------
297    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
298    double, and e is tiny.  Then:
299 
300      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
301      = pow(y, x-0.5)/exp(y) * C,
302 
303    where the correction_factor C is given by
304 
305      C = pow(1-e/y, x-0.5) * exp(e)
306 
307    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
308 
309      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
310 
311    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
312 
313      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
314 
315    Note that for accuracy, when computing r*C it's better to do
316 
317      r + e*g/y*r;
318 
319    than
320 
321      r * (1 + e*g/y);
322 
323    since the addition in the latter throws away most of the bits of
324    information in e*g/y.
325 */
326 
327 #define LANCZOS_N 13
328 static const double lanczos_g = 6.024680040776729583740234375;
329 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
330 static const double lanczos_num_coeffs[LANCZOS_N] = {
331     23531376880.410759688572007674451636754734846804940,
332     42919803642.649098768957899047001988850926355848959,
333     35711959237.355668049440185451547166705960488635843,
334     17921034426.037209699919755754458931112671403265390,
335     6039542586.3520280050642916443072979210699388420708,
336     1439720407.3117216736632230727949123939715485786772,
337     248874557.86205415651146038641322942321632125127801,
338     31426415.585400194380614231628318205362874684987640,
339     2876370.6289353724412254090516208496135991145378768,
340     186056.26539522349504029498971604569928220784236328,
341     8071.6720023658162106380029022722506138218516325024,
342     210.82427775157934587250973392071336271166969580291,
343     2.5066282746310002701649081771338373386264310793408
344 };
345 
346 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
347 static const double lanczos_den_coeffs[LANCZOS_N] = {
348     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
349     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
350 
351 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
352 #define NGAMMA_INTEGRAL 23
353 static const double gamma_integral[NGAMMA_INTEGRAL] = {
354     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
355     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
356     1307674368000.0, 20922789888000.0, 355687428096000.0,
357     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
358     51090942171709440000.0, 1124000727777607680000.0,
359 };
360 
361 /* Lanczos' sum L_g(x), for positive x */
362 
363 static double
lanczos_sum(double x)364 lanczos_sum(double x)
365 {
366     double num = 0.0, den = 0.0;
367     int i;
368     assert(x > 0.0);
369     /* evaluate the rational function lanczos_sum(x).  For large
370        x, the obvious algorithm risks overflow, so we instead
371        rescale the denominator and numerator of the rational
372        function by x**(1-LANCZOS_N) and treat this as a
373        rational function in 1/x.  This also reduces the error for
374        larger x values.  The choice of cutoff point (5.0 below) is
375        somewhat arbitrary; in tests, smaller cutoff values than
376        this resulted in lower accuracy. */
377     if (x < 5.0) {
378         for (i = LANCZOS_N; --i >= 0; ) {
379             num = num * x + lanczos_num_coeffs[i];
380             den = den * x + lanczos_den_coeffs[i];
381         }
382     }
383     else {
384         for (i = 0; i < LANCZOS_N; i++) {
385             num = num / x + lanczos_num_coeffs[i];
386             den = den / x + lanczos_den_coeffs[i];
387         }
388     }
389     return num/den;
390 }
391 
392 
393 static double
m_tgamma(double x)394 m_tgamma(double x)
395 {
396     double absx, r, y, z, sqrtpow;
397 
398     /* special cases */
399     if (!Py_IS_FINITE(x)) {
400         if (Py_IS_NAN(x) || x > 0.0)
401             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
402         else {
403             errno = EDOM;
404             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
405         }
406     }
407     if (x == 0.0) {
408         errno = EDOM;
409         /* tgamma(+-0.0) = +-inf, divide-by-zero */
410         return copysign(Py_INFINITY, x);
411     }
412 
413     /* integer arguments */
414     if (x == floor(x)) {
415         if (x < 0.0) {
416             errno = EDOM;  /* tgamma(n) = nan, invalid for */
417             return Py_NAN; /* negative integers n */
418         }
419         if (x <= NGAMMA_INTEGRAL)
420             return gamma_integral[(int)x - 1];
421     }
422     absx = fabs(x);
423 
424     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
425     if (absx < 1e-20) {
426         r = 1.0/x;
427         if (Py_IS_INFINITY(r))
428             errno = ERANGE;
429         return r;
430     }
431 
432     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
433        x > 200, and underflows to +-0.0 for x < -200, not a negative
434        integer. */
435     if (absx > 200.0) {
436         if (x < 0.0) {
437             return 0.0/m_sinpi(x);
438         }
439         else {
440             errno = ERANGE;
441             return Py_HUGE_VAL;
442         }
443     }
444 
445     y = absx + lanczos_g_minus_half;
446     /* compute error in sum */
447     if (absx > lanczos_g_minus_half) {
448         /* note: the correction can be foiled by an optimizing
449            compiler that (incorrectly) thinks that an expression like
450            a + b - a - b can be optimized to 0.0.  This shouldn't
451            happen in a standards-conforming compiler. */
452         double q = y - absx;
453         z = q - lanczos_g_minus_half;
454     }
455     else {
456         double q = y - lanczos_g_minus_half;
457         z = q - absx;
458     }
459     z = z * lanczos_g / y;
460     if (x < 0.0) {
461         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
462         r -= z * r;
463         if (absx < 140.0) {
464             r /= pow(y, absx - 0.5);
465         }
466         else {
467             sqrtpow = pow(y, absx / 2.0 - 0.25);
468             r /= sqrtpow;
469             r /= sqrtpow;
470         }
471     }
472     else {
473         r = lanczos_sum(absx) / exp(y);
474         r += z * r;
475         if (absx < 140.0) {
476             r *= pow(y, absx - 0.5);
477         }
478         else {
479             sqrtpow = pow(y, absx / 2.0 - 0.25);
480             r *= sqrtpow;
481             r *= sqrtpow;
482         }
483     }
484     if (Py_IS_INFINITY(r))
485         errno = ERANGE;
486     return r;
487 }
488 
489 /*
490    lgamma:  natural log of the absolute value of the Gamma function.
491    For large arguments, Lanczos' formula works extremely well here.
492 */
493 
494 static double
m_lgamma(double x)495 m_lgamma(double x)
496 {
497     double r;
498     double absx;
499 
500     /* special cases */
501     if (!Py_IS_FINITE(x)) {
502         if (Py_IS_NAN(x))
503             return x;  /* lgamma(nan) = nan */
504         else
505             return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
506     }
507 
508     /* integer arguments */
509     if (x == floor(x) && x <= 2.0) {
510         if (x <= 0.0) {
511             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
512             return Py_HUGE_VAL; /* integers n <= 0 */
513         }
514         else {
515             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
516         }
517     }
518 
519     absx = fabs(x);
520     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
521     if (absx < 1e-20)
522         return -log(absx);
523 
524     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
525        having a second set of numerator coefficients for lanczos_sum that
526        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
527        subtraction below; it's probably not worth it. */
528     r = log(lanczos_sum(absx)) - lanczos_g;
529     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
530     if (x < 0.0)
531         /* Use reflection formula to get value for negative x. */
532         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
533     if (Py_IS_INFINITY(r))
534         errno = ERANGE;
535     return r;
536 }
537 
538 /*
539    wrapper for atan2 that deals directly with special cases before
540    delegating to the platform libm for the remaining cases.  This
541    is necessary to get consistent behaviour across platforms.
542    Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
543    always follow C99.
544 */
545 
546 static double
m_atan2(double y,double x)547 m_atan2(double y, double x)
548 {
549     if (Py_IS_NAN(x) || Py_IS_NAN(y))
550         return Py_NAN;
551     if (Py_IS_INFINITY(y)) {
552         if (Py_IS_INFINITY(x)) {
553             if (copysign(1., x) == 1.)
554                 /* atan2(+-inf, +inf) == +-pi/4 */
555                 return copysign(0.25*Py_MATH_PI, y);
556             else
557                 /* atan2(+-inf, -inf) == +-pi*3/4 */
558                 return copysign(0.75*Py_MATH_PI, y);
559         }
560         /* atan2(+-inf, x) == +-pi/2 for finite x */
561         return copysign(0.5*Py_MATH_PI, y);
562     }
563     if (Py_IS_INFINITY(x) || y == 0.) {
564         if (copysign(1., x) == 1.)
565             /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
566             return copysign(0., y);
567         else
568             /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
569             return copysign(Py_MATH_PI, y);
570     }
571     return atan2(y, x);
572 }
573 
574 
575 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
576    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
577    binary floating-point format, the result is always exact. */
578 
579 static double
m_remainder(double x,double y)580 m_remainder(double x, double y)
581 {
582     /* Deal with most common case first. */
583     if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
584         double absx, absy, c, m, r;
585 
586         if (y == 0.0) {
587             return Py_NAN;
588         }
589 
590         absx = fabs(x);
591         absy = fabs(y);
592         m = fmod(absx, absy);
593 
594         /*
595            Warning: some subtlety here. What we *want* to know at this point is
596            whether the remainder m is less than, equal to, or greater than half
597            of absy. However, we can't do that comparison directly because we
598            can't be sure that 0.5*absy is representable (the multiplication
599            might incur precision loss due to underflow). So instead we compare
600            m with the complement c = absy - m: m < 0.5*absy if and only if m <
601            c, and so on. The catch is that absy - m might also not be
602            representable, but it turns out that it doesn't matter:
603 
604            - if m > 0.5*absy then absy - m is exactly representable, by
605              Sterbenz's lemma, so m > c
606            - if m == 0.5*absy then again absy - m is exactly representable
607              and m == c
608            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
609              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
610              c, or (ii) absy is tiny, either subnormal or in the lowest normal
611              binade. Then absy - m is exactly representable and again m < c.
612         */
613 
614         c = absy - m;
615         if (m < c) {
616             r = m;
617         }
618         else if (m > c) {
619             r = -c;
620         }
621         else {
622             /*
623                Here absx is exactly halfway between two multiples of absy,
624                and we need to choose the even multiple. x now has the form
625 
626                    absx = n * absy + m
627 
628                for some integer n (recalling that m = 0.5*absy at this point).
629                If n is even we want to return m; if n is odd, we need to
630                return -m.
631 
632                So
633 
634                    0.5 * (absx - m) = (n/2) * absy
635 
636                and now reducing modulo absy gives us:
637 
638                                                   | m, if n is odd
639                    fmod(0.5 * (absx - m), absy) = |
640                                                   | 0, if n is even
641 
642                Now m - 2.0 * fmod(...) gives the desired result: m
643                if n is even, -m if m is odd.
644 
645                Note that all steps in fmod(0.5 * (absx - m), absy)
646                will be computed exactly, with no rounding error
647                introduced.
648             */
649             assert(m == c);
650             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
651         }
652         return copysign(1.0, x) * r;
653     }
654 
655     /* Special values. */
656     if (Py_IS_NAN(x)) {
657         return x;
658     }
659     if (Py_IS_NAN(y)) {
660         return y;
661     }
662     if (Py_IS_INFINITY(x)) {
663         return Py_NAN;
664     }
665     assert(Py_IS_INFINITY(y));
666     return x;
667 }
668 
669 
670 /*
671     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
672     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
673     special values directly, passing positive non-special values through to
674     the system log/log10.
675  */
676 
677 static double
m_log(double x)678 m_log(double x)
679 {
680     if (Py_IS_FINITE(x)) {
681         if (x > 0.0)
682             return log(x);
683         errno = EDOM;
684         if (x == 0.0)
685             return -Py_HUGE_VAL; /* log(0) = -inf */
686         else
687             return Py_NAN; /* log(-ve) = nan */
688     }
689     else if (Py_IS_NAN(x))
690         return x; /* log(nan) = nan */
691     else if (x > 0.0)
692         return x; /* log(inf) = inf */
693     else {
694         errno = EDOM;
695         return Py_NAN; /* log(-inf) = nan */
696     }
697 }
698 
699 /*
700    log2: log to base 2.
701 
702    Uses an algorithm that should:
703 
704      (a) produce exact results for powers of 2, and
705      (b) give a monotonic log2 (for positive finite floats),
706          assuming that the system log is monotonic.
707 */
708 
709 static double
m_log2(double x)710 m_log2(double x)
711 {
712     if (!Py_IS_FINITE(x)) {
713         if (Py_IS_NAN(x))
714             return x; /* log2(nan) = nan */
715         else if (x > 0.0)
716             return x; /* log2(+inf) = +inf */
717         else {
718             errno = EDOM;
719             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
720         }
721     }
722 
723     if (x > 0.0) {
724         return log2(x);
725     }
726     else if (x == 0.0) {
727         errno = EDOM;
728         return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
729     }
730     else {
731         errno = EDOM;
732         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
733     }
734 }
735 
736 static double
m_log10(double x)737 m_log10(double x)
738 {
739     if (Py_IS_FINITE(x)) {
740         if (x > 0.0)
741             return log10(x);
742         errno = EDOM;
743         if (x == 0.0)
744             return -Py_HUGE_VAL; /* log10(0) = -inf */
745         else
746             return Py_NAN; /* log10(-ve) = nan */
747     }
748     else if (Py_IS_NAN(x))
749         return x; /* log10(nan) = nan */
750     else if (x > 0.0)
751         return x; /* log10(inf) = inf */
752     else {
753         errno = EDOM;
754         return Py_NAN; /* log10(-inf) = nan */
755     }
756 }
757 
758 
759 static PyObject *
math_gcd(PyObject * module,PyObject * const * args,Py_ssize_t nargs)760 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
761 {
762     // Fast-path for the common case: gcd(int, int)
763     if (nargs == 2 && PyLong_CheckExact(args[0]) && PyLong_CheckExact(args[1]))
764     {
765         return _PyLong_GCD(args[0], args[1]);
766     }
767 
768     if (nargs == 0) {
769         return PyLong_FromLong(0);
770     }
771 
772     PyObject *res = PyNumber_Index(args[0]);
773     if (res == NULL) {
774         return NULL;
775     }
776     if (nargs == 1) {
777         Py_SETREF(res, PyNumber_Absolute(res));
778         return res;
779     }
780 
781     PyObject *one = _PyLong_GetOne();  // borrowed ref
782     for (Py_ssize_t i = 1; i < nargs; i++) {
783         PyObject *x = _PyNumber_Index(args[i]);
784         if (x == NULL) {
785             Py_DECREF(res);
786             return NULL;
787         }
788         if (res == one) {
789             /* Fast path: just check arguments.
790                It is okay to use identity comparison here. */
791             Py_DECREF(x);
792             continue;
793         }
794         Py_SETREF(res, _PyLong_GCD(res, x));
795         Py_DECREF(x);
796         if (res == NULL) {
797             return NULL;
798         }
799     }
800     return res;
801 }
802 
803 PyDoc_STRVAR(math_gcd_doc,
804 "gcd($module, *integers)\n"
805 "--\n"
806 "\n"
807 "Greatest Common Divisor.");
808 
809 
810 static PyObject *
long_lcm(PyObject * a,PyObject * b)811 long_lcm(PyObject *a, PyObject *b)
812 {
813     PyObject *g, *m, *f, *ab;
814 
815     if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
816         return PyLong_FromLong(0);
817     }
818     g = _PyLong_GCD(a, b);
819     if (g == NULL) {
820         return NULL;
821     }
822     f = PyNumber_FloorDivide(a, g);
823     Py_DECREF(g);
824     if (f == NULL) {
825         return NULL;
826     }
827     m = PyNumber_Multiply(f, b);
828     Py_DECREF(f);
829     if (m == NULL) {
830         return NULL;
831     }
832     ab = PyNumber_Absolute(m);
833     Py_DECREF(m);
834     return ab;
835 }
836 
837 
838 static PyObject *
math_lcm(PyObject * module,PyObject * const * args,Py_ssize_t nargs)839 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
840 {
841     PyObject *res, *x;
842     Py_ssize_t i;
843 
844     if (nargs == 0) {
845         return PyLong_FromLong(1);
846     }
847     res = PyNumber_Index(args[0]);
848     if (res == NULL) {
849         return NULL;
850     }
851     if (nargs == 1) {
852         Py_SETREF(res, PyNumber_Absolute(res));
853         return res;
854     }
855 
856     PyObject *zero = _PyLong_GetZero();  // borrowed ref
857     for (i = 1; i < nargs; i++) {
858         x = PyNumber_Index(args[i]);
859         if (x == NULL) {
860             Py_DECREF(res);
861             return NULL;
862         }
863         if (res == zero) {
864             /* Fast path: just check arguments.
865                It is okay to use identity comparison here. */
866             Py_DECREF(x);
867             continue;
868         }
869         Py_SETREF(res, long_lcm(res, x));
870         Py_DECREF(x);
871         if (res == NULL) {
872             return NULL;
873         }
874     }
875     return res;
876 }
877 
878 
879 PyDoc_STRVAR(math_lcm_doc,
880 "lcm($module, *integers)\n"
881 "--\n"
882 "\n"
883 "Least Common Multiple.");
884 
885 
886 /* Call is_error when errno != 0, and where x is the result libm
887  * returned.  is_error will usually set up an exception and return
888  * true (1), but may return false (0) without setting up an exception.
889  */
890 static int
is_error(double x)891 is_error(double x)
892 {
893     int result = 1;     /* presumption of guilt */
894     assert(errno);      /* non-zero errno is a precondition for calling */
895     if (errno == EDOM)
896         PyErr_SetString(PyExc_ValueError, "math domain error");
897 
898     else if (errno == ERANGE) {
899         /* ANSI C generally requires libm functions to set ERANGE
900          * on overflow, but also generally *allows* them to set
901          * ERANGE on underflow too.  There's no consistency about
902          * the latter across platforms.
903          * Alas, C99 never requires that errno be set.
904          * Here we suppress the underflow errors (libm functions
905          * should return a zero on underflow, and +- HUGE_VAL on
906          * overflow, so testing the result for zero suffices to
907          * distinguish the cases).
908          *
909          * On some platforms (Ubuntu/ia64) it seems that errno can be
910          * set to ERANGE for subnormal results that do *not* underflow
911          * to zero.  So to be safe, we'll ignore ERANGE whenever the
912          * function result is less than 1.5 in absolute value.
913          *
914          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
915          * are correctly detected, since the function may underflow
916          * toward -1.0 rather than 0.0.
917          */
918         if (fabs(x) < 1.5)
919             result = 0;
920         else
921             PyErr_SetString(PyExc_OverflowError,
922                             "math range error");
923     }
924     else
925         /* Unexpected math error */
926         PyErr_SetFromErrno(PyExc_ValueError);
927     return result;
928 }
929 
930 /*
931    math_1 is used to wrap a libm function f that takes a double
932    argument and returns a double.
933 
934    The error reporting follows these rules, which are designed to do
935    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
936    platforms.
937 
938    - a NaN result from non-NaN inputs causes ValueError to be raised
939    - an infinite result from finite inputs causes OverflowError to be
940      raised if can_overflow is 1, or raises ValueError if can_overflow
941      is 0.
942    - if the result is finite and errno == EDOM then ValueError is
943      raised
944    - if the result is finite and nonzero and errno == ERANGE then
945      OverflowError is raised
946 
947    The last rule is used to catch overflow on platforms which follow
948    C89 but for which HUGE_VAL is not an infinity.
949 
950    For the majority of one-argument functions these rules are enough
951    to ensure that Python's functions behave as specified in 'Annex F'
952    of the C99 standard, with the 'invalid' and 'divide-by-zero'
953    floating-point exceptions mapping to Python's ValueError and the
954    'overflow' floating-point exception mapping to OverflowError.
955    math_1 only works for functions that don't have singularities *and*
956    the possibility of overflow; fortunately, that covers everything we
957    care about right now.
958 */
959 
960 static PyObject *
math_1(PyObject * arg,double (* func)(double),int can_overflow)961 math_1(PyObject *arg, double (*func) (double), int can_overflow)
962 {
963     double x, r;
964     x = PyFloat_AsDouble(arg);
965     if (x == -1.0 && PyErr_Occurred())
966         return NULL;
967     errno = 0;
968     r = (*func)(x);
969     if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
970         PyErr_SetString(PyExc_ValueError,
971                         "math domain error"); /* invalid arg */
972         return NULL;
973     }
974     if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
975         if (can_overflow)
976             PyErr_SetString(PyExc_OverflowError,
977                             "math range error"); /* overflow */
978         else
979             PyErr_SetString(PyExc_ValueError,
980                             "math domain error"); /* singularity */
981         return NULL;
982     }
983     if (Py_IS_FINITE(r) && errno && is_error(r))
984         /* this branch unnecessary on most platforms */
985         return NULL;
986 
987     return PyFloat_FromDouble(r);
988 }
989 
990 /* variant of math_1, to be used when the function being wrapped is known to
991    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
992    errno = ERANGE for overflow). */
993 
994 static PyObject *
math_1a(PyObject * arg,double (* func)(double))995 math_1a(PyObject *arg, double (*func) (double))
996 {
997     double x, r;
998     x = PyFloat_AsDouble(arg);
999     if (x == -1.0 && PyErr_Occurred())
1000         return NULL;
1001     errno = 0;
1002     r = (*func)(x);
1003     if (errno && is_error(r))
1004         return NULL;
1005     return PyFloat_FromDouble(r);
1006 }
1007 
1008 /*
1009    math_2 is used to wrap a libm function f that takes two double
1010    arguments and returns a double.
1011 
1012    The error reporting follows these rules, which are designed to do
1013    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1014    platforms.
1015 
1016    - a NaN result from non-NaN inputs causes ValueError to be raised
1017    - an infinite result from finite inputs causes OverflowError to be
1018      raised.
1019    - if the result is finite and errno == EDOM then ValueError is
1020      raised
1021    - if the result is finite and nonzero and errno == ERANGE then
1022      OverflowError is raised
1023 
1024    The last rule is used to catch overflow on platforms which follow
1025    C89 but for which HUGE_VAL is not an infinity.
1026 
1027    For most two-argument functions (copysign, fmod, hypot, atan2)
1028    these rules are enough to ensure that Python's functions behave as
1029    specified in 'Annex F' of the C99 standard, with the 'invalid' and
1030    'divide-by-zero' floating-point exceptions mapping to Python's
1031    ValueError and the 'overflow' floating-point exception mapping to
1032    OverflowError.
1033 */
1034 
1035 static PyObject *
math_2(PyObject * const * args,Py_ssize_t nargs,double (* func)(double,double),const char * funcname)1036 math_2(PyObject *const *args, Py_ssize_t nargs,
1037        double (*func) (double, double), const char *funcname)
1038 {
1039     double x, y, r;
1040     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1041         return NULL;
1042     x = PyFloat_AsDouble(args[0]);
1043     if (x == -1.0 && PyErr_Occurred()) {
1044         return NULL;
1045     }
1046     y = PyFloat_AsDouble(args[1]);
1047     if (y == -1.0 && PyErr_Occurred()) {
1048         return NULL;
1049     }
1050     errno = 0;
1051     r = (*func)(x, y);
1052     if (Py_IS_NAN(r)) {
1053         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1054             errno = EDOM;
1055         else
1056             errno = 0;
1057     }
1058     else if (Py_IS_INFINITY(r)) {
1059         if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1060             errno = ERANGE;
1061         else
1062             errno = 0;
1063     }
1064     if (errno && is_error(r))
1065         return NULL;
1066     else
1067         return PyFloat_FromDouble(r);
1068 }
1069 
1070 #define FUNC1(funcname, func, can_overflow, docstring)                  \
1071     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1072         return math_1(args, func, can_overflow);                            \
1073     }\
1074     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1075 
1076 #define FUNC1A(funcname, func, docstring)                               \
1077     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1078         return math_1a(args, func);                                     \
1079     }\
1080     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1081 
1082 #define FUNC2(funcname, func, docstring) \
1083     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1084         return math_2(args, nargs, func, #funcname); \
1085     }\
1086     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1087 
1088 FUNC1(acos, acos, 0,
1089       "acos($module, x, /)\n--\n\n"
1090       "Return the arc cosine (measured in radians) of x.\n\n"
1091       "The result is between 0 and pi.")
1092 FUNC1(acosh, acosh, 0,
1093       "acosh($module, x, /)\n--\n\n"
1094       "Return the inverse hyperbolic cosine of x.")
1095 FUNC1(asin, asin, 0,
1096       "asin($module, x, /)\n--\n\n"
1097       "Return the arc sine (measured in radians) of x.\n\n"
1098       "The result is between -pi/2 and pi/2.")
1099 FUNC1(asinh, asinh, 0,
1100       "asinh($module, x, /)\n--\n\n"
1101       "Return the inverse hyperbolic sine of x.")
1102 FUNC1(atan, atan, 0,
1103       "atan($module, x, /)\n--\n\n"
1104       "Return the arc tangent (measured in radians) of x.\n\n"
1105       "The result is between -pi/2 and pi/2.")
1106 FUNC2(atan2, m_atan2,
1107       "atan2($module, y, x, /)\n--\n\n"
1108       "Return the arc tangent (measured in radians) of y/x.\n\n"
1109       "Unlike atan(y/x), the signs of both x and y are considered.")
1110 FUNC1(atanh, atanh, 0,
1111       "atanh($module, x, /)\n--\n\n"
1112       "Return the inverse hyperbolic tangent of x.")
1113 FUNC1(cbrt, cbrt, 0,
1114       "cbrt($module, x, /)\n--\n\n"
1115       "Return the cube root of x.")
1116 
1117 /*[clinic input]
1118 math.ceil
1119 
1120     x as number: object
1121     /
1122 
1123 Return the ceiling of x as an Integral.
1124 
1125 This is the smallest integer >= x.
1126 [clinic start generated code]*/
1127 
1128 static PyObject *
math_ceil(PyObject * module,PyObject * number)1129 math_ceil(PyObject *module, PyObject *number)
1130 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1131 {
1132     double x;
1133 
1134     if (PyFloat_CheckExact(number)) {
1135         x = PyFloat_AS_DOUBLE(number);
1136     }
1137     else {
1138         math_module_state *state = get_math_module_state(module);
1139         PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
1140         if (method != NULL) {
1141             PyObject *result = _PyObject_CallNoArgs(method);
1142             Py_DECREF(method);
1143             return result;
1144         }
1145         if (PyErr_Occurred())
1146             return NULL;
1147         x = PyFloat_AsDouble(number);
1148         if (x == -1.0 && PyErr_Occurred())
1149             return NULL;
1150     }
1151     return PyLong_FromDouble(ceil(x));
1152 }
1153 
1154 FUNC2(copysign, copysign,
1155       "copysign($module, x, y, /)\n--\n\n"
1156        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1157       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1158       "returns -1.0.\n")
1159 FUNC1(cos, cos, 0,
1160       "cos($module, x, /)\n--\n\n"
1161       "Return the cosine of x (measured in radians).")
1162 FUNC1(cosh, cosh, 1,
1163       "cosh($module, x, /)\n--\n\n"
1164       "Return the hyperbolic cosine of x.")
1165 FUNC1A(erf, erf,
1166        "erf($module, x, /)\n--\n\n"
1167        "Error function at x.")
1168 FUNC1A(erfc, erfc,
1169        "erfc($module, x, /)\n--\n\n"
1170        "Complementary error function at x.")
1171 FUNC1(exp, exp, 1,
1172       "exp($module, x, /)\n--\n\n"
1173       "Return e raised to the power of x.")
1174 FUNC1(exp2, exp2, 1,
1175       "exp2($module, x, /)\n--\n\n"
1176       "Return 2 raised to the power of x.")
1177 FUNC1(expm1, expm1, 1,
1178       "expm1($module, x, /)\n--\n\n"
1179       "Return exp(x)-1.\n\n"
1180       "This function avoids the loss of precision involved in the direct "
1181       "evaluation of exp(x)-1 for small x.")
1182 FUNC1(fabs, fabs, 0,
1183       "fabs($module, x, /)\n--\n\n"
1184       "Return the absolute value of the float x.")
1185 
1186 /*[clinic input]
1187 math.floor
1188 
1189     x as number: object
1190     /
1191 
1192 Return the floor of x as an Integral.
1193 
1194 This is the largest integer <= x.
1195 [clinic start generated code]*/
1196 
1197 static PyObject *
math_floor(PyObject * module,PyObject * number)1198 math_floor(PyObject *module, PyObject *number)
1199 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1200 {
1201     double x;
1202 
1203     if (PyFloat_CheckExact(number)) {
1204         x = PyFloat_AS_DOUBLE(number);
1205     }
1206     else {
1207         math_module_state *state = get_math_module_state(module);
1208         PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
1209         if (method != NULL) {
1210             PyObject *result = _PyObject_CallNoArgs(method);
1211             Py_DECREF(method);
1212             return result;
1213         }
1214         if (PyErr_Occurred())
1215             return NULL;
1216         x = PyFloat_AsDouble(number);
1217         if (x == -1.0 && PyErr_Occurred())
1218             return NULL;
1219     }
1220     return PyLong_FromDouble(floor(x));
1221 }
1222 
1223 FUNC1A(gamma, m_tgamma,
1224       "gamma($module, x, /)\n--\n\n"
1225       "Gamma function at x.")
1226 FUNC1A(lgamma, m_lgamma,
1227       "lgamma($module, x, /)\n--\n\n"
1228       "Natural logarithm of absolute value of Gamma function at x.")
1229 FUNC1(log1p, m_log1p, 0,
1230       "log1p($module, x, /)\n--\n\n"
1231       "Return the natural logarithm of 1+x (base e).\n\n"
1232       "The result is computed in a way which is accurate for x near zero.")
1233 FUNC2(remainder, m_remainder,
1234       "remainder($module, x, y, /)\n--\n\n"
1235       "Difference between x and the closest integer multiple of y.\n\n"
1236       "Return x - n*y where n*y is the closest integer multiple of y.\n"
1237       "In the case where x is exactly halfway between two multiples of\n"
1238       "y, the nearest even value of n is used. The result is always exact.")
1239 FUNC1(sin, sin, 0,
1240       "sin($module, x, /)\n--\n\n"
1241       "Return the sine of x (measured in radians).")
1242 FUNC1(sinh, sinh, 1,
1243       "sinh($module, x, /)\n--\n\n"
1244       "Return the hyperbolic sine of x.")
1245 FUNC1(sqrt, sqrt, 0,
1246       "sqrt($module, x, /)\n--\n\n"
1247       "Return the square root of x.")
1248 FUNC1(tan, tan, 0,
1249       "tan($module, x, /)\n--\n\n"
1250       "Return the tangent of x (measured in radians).")
1251 FUNC1(tanh, tanh, 0,
1252       "tanh($module, x, /)\n--\n\n"
1253       "Return the hyperbolic tangent of x.")
1254 
1255 /* Precision summation function as msum() by Raymond Hettinger in
1256    <https://code.activestate.com/recipes/393090-binary-floating-point-summation-accurate-to-full-p/>,
1257    enhanced with the exact partials sum and roundoff from Mark
1258    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1259    See those links for more details, proofs and other references.
1260 
1261    Note 1: IEEE 754 floating-point semantics with a rounding mode of
1262    roundTiesToEven are assumed.
1263 
1264    Note 2: No provision is made for intermediate overflow handling;
1265    therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
1266    fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
1267    overflow of the first partial sum.
1268 
1269    Note 3: The algorithm has two potential sources of fragility. First, C
1270    permits arithmetic operations on `double`s to be performed in an
1271    intermediate format whose range and precision may be greater than those of
1272    `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
1273    example on machines using the now largely historical x87 FPUs. In this case,
1274    `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
1275    `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
1276    should be safe from this source of errors. Second, an aggressively
1277    optimizing compiler can re-associate operations so that (for example) the
1278    statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
1279    re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
1280    re-association would be in violation of the C standard, and should not occur
1281    except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
1282    -fassociative-math). Such optimizations should be avoided for this module.
1283 
1284    Note 4: The signature of math.fsum() differs from builtins.sum()
1285    because the start argument doesn't make sense in the context of
1286    accurate summation.  Since the partials table is collapsed before
1287    returning a result, sum(seq2, start=sum(seq1)) may not equal the
1288    accurate result returned by sum(itertools.chain(seq1, seq2)).
1289 */
1290 
1291 #define NUM_PARTIALS  32  /* initial partials array size, on stack */
1292 
1293 /* Extend the partials array p[] by doubling its size. */
1294 static int                          /* non-zero on error */
_fsum_realloc(double ** p_ptr,Py_ssize_t n,double * ps,Py_ssize_t * m_ptr)1295 _fsum_realloc(double **p_ptr, Py_ssize_t  n,
1296              double  *ps,    Py_ssize_t *m_ptr)
1297 {
1298     void *v = NULL;
1299     Py_ssize_t m = *m_ptr;
1300 
1301     m += m;  /* double */
1302     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1303         double *p = *p_ptr;
1304         if (p == ps) {
1305             v = PyMem_Malloc(sizeof(double) * m);
1306             if (v != NULL)
1307                 memcpy(v, ps, sizeof(double) * n);
1308         }
1309         else
1310             v = PyMem_Realloc(p, sizeof(double) * m);
1311     }
1312     if (v == NULL) {        /* size overflow or no memory */
1313         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1314         return 1;
1315     }
1316     *p_ptr = (double*) v;
1317     *m_ptr = m;
1318     return 0;
1319 }
1320 
1321 /* Full precision summation of a sequence of floats.
1322 
1323    def msum(iterable):
1324        partials = []  # sorted, non-overlapping partial sums
1325        for x in iterable:
1326            i = 0
1327            for y in partials:
1328                if abs(x) < abs(y):
1329                    x, y = y, x
1330                hi = x + y
1331                lo = y - (hi - x)
1332                if lo:
1333                    partials[i] = lo
1334                    i += 1
1335                x = hi
1336            partials[i:] = [x]
1337        return sum_exact(partials)
1338 
1339    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
1340    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
1341    partial so that the list of partial sums remains exact.
1342 
1343    Sum_exact() adds the partial sums exactly and correctly rounds the final
1344    result (using the round-half-to-even rule).  The items in partials remain
1345    non-zero, non-special, non-overlapping and strictly increasing in
1346    magnitude, but possibly not all having the same sign.
1347 
1348    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1349 */
1350 
1351 /*[clinic input]
1352 math.fsum
1353 
1354     seq: object
1355     /
1356 
1357 Return an accurate floating-point sum of values in the iterable seq.
1358 
1359 Assumes IEEE-754 floating-point arithmetic.
1360 [clinic start generated code]*/
1361 
1362 static PyObject *
math_fsum(PyObject * module,PyObject * seq)1363 math_fsum(PyObject *module, PyObject *seq)
1364 /*[clinic end generated code: output=ba5c672b87fe34fc input=4506244ded6057dc]*/
1365 {
1366     PyObject *item, *iter, *sum = NULL;
1367     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1368     double x, y, t, ps[NUM_PARTIALS], *p = ps;
1369     double xsave, special_sum = 0.0, inf_sum = 0.0;
1370     double hi, yr, lo = 0.0;
1371 
1372     iter = PyObject_GetIter(seq);
1373     if (iter == NULL)
1374         return NULL;
1375 
1376     for(;;) {           /* for x in iterable */
1377         assert(0 <= n && n <= m);
1378         assert((m == NUM_PARTIALS && p == ps) ||
1379                (m >  NUM_PARTIALS && p != NULL));
1380 
1381         item = PyIter_Next(iter);
1382         if (item == NULL) {
1383             if (PyErr_Occurred())
1384                 goto _fsum_error;
1385             break;
1386         }
1387         ASSIGN_DOUBLE(x, item, error_with_item);
1388         Py_DECREF(item);
1389 
1390         xsave = x;
1391         for (i = j = 0; j < n; j++) {       /* for y in partials */
1392             y = p[j];
1393             if (fabs(x) < fabs(y)) {
1394                 t = x; x = y; y = t;
1395             }
1396             hi = x + y;
1397             yr = hi - x;
1398             lo = y - yr;
1399             if (lo != 0.0)
1400                 p[i++] = lo;
1401             x = hi;
1402         }
1403 
1404         n = i;                              /* ps[i:] = [x] */
1405         if (x != 0.0) {
1406             if (! Py_IS_FINITE(x)) {
1407                 /* a nonfinite x could arise either as
1408                    a result of intermediate overflow, or
1409                    as a result of a nan or inf in the
1410                    summands */
1411                 if (Py_IS_FINITE(xsave)) {
1412                     PyErr_SetString(PyExc_OverflowError,
1413                           "intermediate overflow in fsum");
1414                     goto _fsum_error;
1415                 }
1416                 if (Py_IS_INFINITY(xsave))
1417                     inf_sum += xsave;
1418                 special_sum += xsave;
1419                 /* reset partials */
1420                 n = 0;
1421             }
1422             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1423                 goto _fsum_error;
1424             else
1425                 p[n++] = x;
1426         }
1427     }
1428 
1429     if (special_sum != 0.0) {
1430         if (Py_IS_NAN(inf_sum))
1431             PyErr_SetString(PyExc_ValueError,
1432                             "-inf + inf in fsum");
1433         else
1434             sum = PyFloat_FromDouble(special_sum);
1435         goto _fsum_error;
1436     }
1437 
1438     hi = 0.0;
1439     if (n > 0) {
1440         hi = p[--n];
1441         /* sum_exact(ps, hi) from the top, stop when the sum becomes
1442            inexact. */
1443         while (n > 0) {
1444             x = hi;
1445             y = p[--n];
1446             assert(fabs(y) < fabs(x));
1447             hi = x + y;
1448             yr = hi - x;
1449             lo = y - yr;
1450             if (lo != 0.0)
1451                 break;
1452         }
1453         /* Make half-even rounding work across multiple partials.
1454            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1455            digit to two instead of down to zero (the 1e-16 makes the 1
1456            slightly closer to two).  With a potential 1 ULP rounding
1457            error fixed-up, math.fsum() can guarantee commutativity. */
1458         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1459                       (lo > 0.0 && p[n-1] > 0.0))) {
1460             y = lo * 2.0;
1461             x = hi + y;
1462             yr = x - hi;
1463             if (y == yr)
1464                 hi = x;
1465         }
1466     }
1467     sum = PyFloat_FromDouble(hi);
1468 
1469   _fsum_error:
1470     Py_DECREF(iter);
1471     if (p != ps)
1472         PyMem_Free(p);
1473     return sum;
1474 
1475   error_with_item:
1476     Py_DECREF(item);
1477     goto _fsum_error;
1478 }
1479 
1480 #undef NUM_PARTIALS
1481 
1482 
1483 static unsigned long
count_set_bits(unsigned long n)1484 count_set_bits(unsigned long n)
1485 {
1486     unsigned long count = 0;
1487     while (n != 0) {
1488         ++count;
1489         n &= n - 1; /* clear least significant bit */
1490     }
1491     return count;
1492 }
1493 
1494 /* Integer square root
1495 
1496 Given a nonnegative integer `n`, we want to compute the largest integer
1497 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1498 square root of `n`.
1499 
1500 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1501 a positive integer `n`, the algorithm produces at each iteration an integer
1502 approximation `a` to the square root of `n >> s` for some even integer `s`,
1503 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1504 zero and we have an approximation to the square root of `n` itself.
1505 
1506 At every step, the approximation `a` is strictly within 1.0 of the true square
1507 root, so we have
1508 
1509     (a - 1)**2 < (n >> s) < (a + 1)**2
1510 
1511 After the final iteration, a check-and-correct step is needed to determine
1512 whether `a` or `a - 1` gives the desired integer square root of `n`.
1513 
1514 The algorithm is remarkable in its simplicity. There's no need for a
1515 per-iteration check-and-correct step, and termination is straightforward: the
1516 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1517 for `n > 1`). The only tricky part of the correctness proof is in establishing
1518 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1519 iteration to the next. A sketch of the proof of this is given below.
1520 
1521 In addition to the proof sketch, a formal, computer-verified proof
1522 of correctness (using Lean) of an equivalent recursive algorithm can be found
1523 here:
1524 
1525     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1526 
1527 
1528 Here's Python code equivalent to the C implementation below:
1529 
1530     def isqrt(n):
1531         """
1532         Return the integer part of the square root of the input.
1533         """
1534         n = operator.index(n)
1535 
1536         if n < 0:
1537             raise ValueError("isqrt() argument must be nonnegative")
1538         if n == 0:
1539             return 0
1540 
1541         c = (n.bit_length() - 1) // 2
1542         a = 1
1543         d = 0
1544         for s in reversed(range(c.bit_length())):
1545             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1546             e = d
1547             d = c >> s
1548             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1549 
1550         return a - (a*a > n)
1551 
1552 
1553 Sketch of proof of correctness
1554 ------------------------------
1555 
1556 The delicate part of the correctness proof is showing that the loop invariant
1557 is preserved from one iteration to the next. That is, just before the line
1558 
1559     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1560 
1561 is executed in the above code, we know that
1562 
1563     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1564 
1565 (since `e` is always the value of `d` from the previous iteration). We must
1566 prove that after that line is executed, we have
1567 
1568     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1569 
1570 To facilitate the proof, we make some changes of notation. Write `m` for
1571 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1572 
1573     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1574 
1575 or equivalently:
1576 
1577     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
1578 
1579 Then we can rewrite (1) as:
1580 
1581     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1582 
1583 and we must show that (b - 1)**2 < m < (b + 1)**2.
1584 
1585 From this point on, we switch to mathematical notation, so `/` means exact
1586 division rather than integer division and `^` is used for exponentiation. We
1587 use the `√` symbol for the exact square root. In (3), we can remove the
1588 implicit floor operation to give:
1589 
1590     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1591 
1592 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1593 
1594     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
1595 
1596 Squaring and dividing through by `2^(d-e+1) a` gives
1597 
1598     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1599 
1600 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1601 right-hand side of (6) with `1`, and now replacing the central
1602 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1603 
1604     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1605 
1606 Or equivalently, from (2):
1607 
1608     (7) -1 < b - √m < 1
1609 
1610 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1611 to prove.
1612 
1613 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1614 a` that was used to get line (7) above. From the definition of `c`, we have
1615 `4^c <= n`, which implies
1616 
1617     (8)  4^d <= m
1618 
1619 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1620 that `2d - 2e - 1 <= d` and hence that
1621 
1622     (9)  4^(2d - 2e - 1) <= m
1623 
1624 Dividing both sides by `4^(d - e)` gives
1625 
1626     (10)  4^(d - e - 1) <= m / 4^(d - e)
1627 
1628 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1629 
1630     (11)  4^(d - e - 1) < (a + 1)^2
1631 
1632 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1633 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1634 completes the proof sketch.
1635 
1636 */
1637 
1638 /*
1639     The _approximate_isqrt_tab table provides approximate square roots for
1640     16-bit integers. For any n in the range 2**14 <= n < 2**16, the value
1641 
1642         a = _approximate_isqrt_tab[(n >> 8) - 64]
1643 
1644     is an approximate square root of n, satisfying (a - 1)**2 < n < (a + 1)**2.
1645 
1646     The table was computed in Python using the expression:
1647 
1648         [min(round(sqrt(256*n + 128)), 255) for n in range(64, 256)]
1649 */
1650 
1651 static const uint8_t _approximate_isqrt_tab[192] = {
1652     128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
1653     140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, 150,
1654     151, 151, 152, 153, 154, 155, 156, 156, 157, 158, 159, 160,
1655     160, 161, 162, 163, 164, 164, 165, 166, 167, 167, 168, 169,
1656     170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
1657     179, 179, 180, 181, 181, 182, 183, 183, 184, 185, 186, 186,
1658     187, 188, 188, 189, 190, 190, 191, 192, 192, 193, 194, 194,
1659     195, 196, 196, 197, 198, 198, 199, 200, 200, 201, 201, 202,
1660     203, 203, 204, 205, 205, 206, 206, 207, 208, 208, 209, 210,
1661     210, 211, 211, 212, 213, 213, 214, 214, 215, 216, 216, 217,
1662     217, 218, 219, 219, 220, 220, 221, 221, 222, 223, 223, 224,
1663     224, 225, 225, 226, 227, 227, 228, 228, 229, 229, 230, 230,
1664     231, 232, 232, 233, 233, 234, 234, 235, 235, 236, 237, 237,
1665     238, 238, 239, 239, 240, 240, 241, 241, 242, 242, 243, 243,
1666     244, 244, 245, 246, 246, 247, 247, 248, 248, 249, 249, 250,
1667     250, 251, 251, 252, 252, 253, 253, 254, 254, 255, 255, 255,
1668 };
1669 
1670 /* Approximate square root of a large 64-bit integer.
1671 
1672    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1673    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1674 
1675 static inline uint32_t
_approximate_isqrt(uint64_t n)1676 _approximate_isqrt(uint64_t n)
1677 {
1678     uint32_t u = _approximate_isqrt_tab[(n >> 56) - 64];
1679     u = (u << 7) + (uint32_t)(n >> 41) / u;
1680     return (u << 15) + (uint32_t)((n >> 17) / u);
1681 }
1682 
1683 /*[clinic input]
1684 math.isqrt
1685 
1686     n: object
1687     /
1688 
1689 Return the integer part of the square root of the input.
1690 [clinic start generated code]*/
1691 
1692 static PyObject *
math_isqrt(PyObject * module,PyObject * n)1693 math_isqrt(PyObject *module, PyObject *n)
1694 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1695 {
1696     int a_too_large, c_bit_length;
1697     size_t c, d;
1698     uint64_t m;
1699     uint32_t u;
1700     PyObject *a = NULL, *b;
1701 
1702     n = _PyNumber_Index(n);
1703     if (n == NULL) {
1704         return NULL;
1705     }
1706 
1707     if (_PyLong_IsNegative((PyLongObject *)n)) {
1708         PyErr_SetString(
1709             PyExc_ValueError,
1710             "isqrt() argument must be nonnegative");
1711         goto error;
1712     }
1713     if (_PyLong_IsZero((PyLongObject *)n)) {
1714         Py_DECREF(n);
1715         return PyLong_FromLong(0);
1716     }
1717 
1718     /* c = (n.bit_length() - 1) // 2 */
1719     c = _PyLong_NumBits(n);
1720     if (c == (size_t)(-1)) {
1721         goto error;
1722     }
1723     c = (c - 1U) / 2U;
1724 
1725     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1726        fast, almost branch-free algorithm. */
1727     if (c <= 31U) {
1728         int shift = 31 - (int)c;
1729         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1730         Py_DECREF(n);
1731         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1732             return NULL;
1733         }
1734         u = _approximate_isqrt(m << 2*shift) >> shift;
1735         u -= (uint64_t)u * u > m;
1736         return PyLong_FromUnsignedLong(u);
1737     }
1738 
1739     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1740        arithmetic, then switch to using Python long integers. */
1741 
1742     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1743     c_bit_length = 6;
1744     while ((c >> c_bit_length) > 0U) {
1745         ++c_bit_length;
1746     }
1747 
1748     /* Initialise d and a. */
1749     d = c >> (c_bit_length - 5);
1750     b = _PyLong_Rshift(n, 2U*c - 62U);
1751     if (b == NULL) {
1752         goto error;
1753     }
1754     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1755     Py_DECREF(b);
1756     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1757         goto error;
1758     }
1759     u = _approximate_isqrt(m) >> (31U - d);
1760     a = PyLong_FromUnsignedLong(u);
1761     if (a == NULL) {
1762         goto error;
1763     }
1764 
1765     for (int s = c_bit_length - 6; s >= 0; --s) {
1766         PyObject *q;
1767         size_t e = d;
1768 
1769         d = c >> s;
1770 
1771         /* q = (n >> 2*c - e - d + 1) // a */
1772         q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1773         if (q == NULL) {
1774             goto error;
1775         }
1776         Py_SETREF(q, PyNumber_FloorDivide(q, a));
1777         if (q == NULL) {
1778             goto error;
1779         }
1780 
1781         /* a = (a << d - 1 - e) + q */
1782         Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1783         if (a == NULL) {
1784             Py_DECREF(q);
1785             goto error;
1786         }
1787         Py_SETREF(a, PyNumber_Add(a, q));
1788         Py_DECREF(q);
1789         if (a == NULL) {
1790             goto error;
1791         }
1792     }
1793 
1794     /* The correct result is either a or a - 1. Figure out which, and
1795        decrement a if necessary. */
1796 
1797     /* a_too_large = n < a * a */
1798     b = PyNumber_Multiply(a, a);
1799     if (b == NULL) {
1800         goto error;
1801     }
1802     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1803     Py_DECREF(b);
1804     if (a_too_large == -1) {
1805         goto error;
1806     }
1807 
1808     if (a_too_large) {
1809         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1810     }
1811     Py_DECREF(n);
1812     return a;
1813 
1814   error:
1815     Py_XDECREF(a);
1816     Py_DECREF(n);
1817     return NULL;
1818 }
1819 
1820 /* Divide-and-conquer factorial algorithm
1821  *
1822  * Based on the formula and pseudo-code provided at:
1823  * http://www.luschny.de/math/factorial/binarysplitfact.html
1824  *
1825  * Faster algorithms exist, but they're more complicated and depend on
1826  * a fast prime factorization algorithm.
1827  *
1828  * Notes on the algorithm
1829  * ----------------------
1830  *
1831  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
1832  * computed separately, and then combined using a left shift.
1833  *
1834  * The function factorial_odd_part computes the odd part m (i.e., the greatest
1835  * odd divisor) of factorial(n), using the formula:
1836  *
1837  *   factorial_odd_part(n) =
1838  *
1839  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1840  *
1841  * Example: factorial_odd_part(20) =
1842  *
1843  *        (1) *
1844  *        (1) *
1845  *        (1 * 3 * 5) *
1846  *        (1 * 3 * 5 * 7 * 9) *
1847  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1848  *
1849  * Here i goes from large to small: the first term corresponds to i=4 (any
1850  * larger i gives an empty product), and the last term corresponds to i=0.
1851  * Each term can be computed from the last by multiplying by the extra odd
1852  * numbers required: e.g., to get from the penultimate term to the last one,
1853  * we multiply by (11 * 13 * 15 * 17 * 19).
1854  *
1855  * To see a hint of why this formula works, here are the same numbers as above
1856  * but with the even parts (i.e., the appropriate powers of 2) included.  For
1857  * each subterm in the product for i, we multiply that subterm by 2**i:
1858  *
1859  *   factorial(20) =
1860  *
1861  *        (16) *
1862  *        (8) *
1863  *        (4 * 12 * 20) *
1864  *        (2 * 6 * 10 * 14 * 18) *
1865  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1866  *
1867  * The factorial_partial_product function computes the product of all odd j in
1868  * range(start, stop) for given start and stop.  It's used to compute the
1869  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
1870  * operates recursively, repeatedly splitting the range into two roughly equal
1871  * pieces until the subranges are small enough to be computed using only C
1872  * integer arithmetic.
1873  *
1874  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1875  * the factorial) is computed independently in the main math_factorial
1876  * function.  By standard results, its value is:
1877  *
1878  *    two_valuation = n//2 + n//4 + n//8 + ....
1879  *
1880  * It can be shown (e.g., by complete induction on n) that two_valuation is
1881  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1882  * '1'-bits in the binary expansion of n.
1883  */
1884 
1885 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1886  * divide and conquer.  Assumes start and stop are odd and stop > start.
1887  * max_bits must be >= bit_length(stop - 2). */
1888 
1889 static PyObject *
factorial_partial_product(unsigned long start,unsigned long stop,unsigned long max_bits)1890 factorial_partial_product(unsigned long start, unsigned long stop,
1891                           unsigned long max_bits)
1892 {
1893     unsigned long midpoint, num_operands;
1894     PyObject *left = NULL, *right = NULL, *result = NULL;
1895 
1896     /* If the return value will fit an unsigned long, then we can
1897      * multiply in a tight, fast loop where each multiply is O(1).
1898      * Compute an upper bound on the number of bits required to store
1899      * the answer.
1900      *
1901      * Storing some integer z requires floor(lg(z))+1 bits, which is
1902      * conveniently the value returned by bit_length(z).  The
1903      * product x*y will require at most
1904      * bit_length(x) + bit_length(y) bits to store, based
1905      * on the idea that lg product = lg x + lg y.
1906      *
1907      * We know that stop - 2 is the largest number to be multiplied.  From
1908      * there, we have: bit_length(answer) <= num_operands *
1909      * bit_length(stop - 2)
1910      */
1911 
1912     num_operands = (stop - start) / 2;
1913     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1914      * unlikely case of an overflow in num_operands * max_bits. */
1915     if (num_operands <= 8 * SIZEOF_LONG &&
1916         num_operands * max_bits <= 8 * SIZEOF_LONG) {
1917         unsigned long j, total;
1918         for (total = start, j = start + 2; j < stop; j += 2)
1919             total *= j;
1920         return PyLong_FromUnsignedLong(total);
1921     }
1922 
1923     /* find midpoint of range(start, stop), rounded up to next odd number. */
1924     midpoint = (start + num_operands) | 1;
1925     left = factorial_partial_product(start, midpoint,
1926                                      _Py_bit_length(midpoint - 2));
1927     if (left == NULL)
1928         goto error;
1929     right = factorial_partial_product(midpoint, stop, max_bits);
1930     if (right == NULL)
1931         goto error;
1932     result = PyNumber_Multiply(left, right);
1933 
1934   error:
1935     Py_XDECREF(left);
1936     Py_XDECREF(right);
1937     return result;
1938 }
1939 
1940 /* factorial_odd_part:  compute the odd part of factorial(n). */
1941 
1942 static PyObject *
factorial_odd_part(unsigned long n)1943 factorial_odd_part(unsigned long n)
1944 {
1945     long i;
1946     unsigned long v, lower, upper;
1947     PyObject *partial, *tmp, *inner, *outer;
1948 
1949     inner = PyLong_FromLong(1);
1950     if (inner == NULL)
1951         return NULL;
1952     outer = Py_NewRef(inner);
1953 
1954     upper = 3;
1955     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
1956         v = n >> i;
1957         if (v <= 2)
1958             continue;
1959         lower = upper;
1960         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
1961         upper = (v + 1) | 1;
1962         /* Here inner is the product of all odd integers j in the range (0,
1963            n/2**(i+1)].  The factorial_partial_product call below gives the
1964            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
1965         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
1966         /* inner *= partial */
1967         if (partial == NULL)
1968             goto error;
1969         tmp = PyNumber_Multiply(inner, partial);
1970         Py_DECREF(partial);
1971         if (tmp == NULL)
1972             goto error;
1973         Py_SETREF(inner, tmp);
1974         /* Now inner is the product of all odd integers j in the range (0,
1975            n/2**i], giving the inner product in the formula above. */
1976 
1977         /* outer *= inner; */
1978         tmp = PyNumber_Multiply(outer, inner);
1979         if (tmp == NULL)
1980             goto error;
1981         Py_SETREF(outer, tmp);
1982     }
1983     Py_DECREF(inner);
1984     return outer;
1985 
1986   error:
1987     Py_DECREF(outer);
1988     Py_DECREF(inner);
1989     return NULL;
1990 }
1991 
1992 
1993 /* Lookup table for small factorial values */
1994 
1995 static const unsigned long SmallFactorials[] = {
1996     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
1997     362880, 3628800, 39916800, 479001600,
1998 #if SIZEOF_LONG >= 8
1999     6227020800, 87178291200, 1307674368000,
2000     20922789888000, 355687428096000, 6402373705728000,
2001     121645100408832000, 2432902008176640000
2002 #endif
2003 };
2004 
2005 /*[clinic input]
2006 math.factorial
2007 
2008     n as arg: object
2009     /
2010 
2011 Find n!.
2012 
2013 Raise a ValueError if x is negative or non-integral.
2014 [clinic start generated code]*/
2015 
2016 static PyObject *
math_factorial(PyObject * module,PyObject * arg)2017 math_factorial(PyObject *module, PyObject *arg)
2018 /*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/
2019 {
2020     long x, two_valuation;
2021     int overflow;
2022     PyObject *result, *odd_part;
2023 
2024     x = PyLong_AsLongAndOverflow(arg, &overflow);
2025     if (x == -1 && PyErr_Occurred()) {
2026         return NULL;
2027     }
2028     else if (overflow == 1) {
2029         PyErr_Format(PyExc_OverflowError,
2030                      "factorial() argument should not exceed %ld",
2031                      LONG_MAX);
2032         return NULL;
2033     }
2034     else if (overflow == -1 || x < 0) {
2035         PyErr_SetString(PyExc_ValueError,
2036                         "factorial() not defined for negative values");
2037         return NULL;
2038     }
2039 
2040     /* use lookup table if x is small */
2041     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2042         return PyLong_FromUnsignedLong(SmallFactorials[x]);
2043 
2044     /* else express in the form odd_part * 2**two_valuation, and compute as
2045        odd_part << two_valuation. */
2046     odd_part = factorial_odd_part(x);
2047     if (odd_part == NULL)
2048         return NULL;
2049     two_valuation = x - count_set_bits(x);
2050     result = _PyLong_Lshift(odd_part, two_valuation);
2051     Py_DECREF(odd_part);
2052     return result;
2053 }
2054 
2055 
2056 /*[clinic input]
2057 math.trunc
2058 
2059     x: object
2060     /
2061 
2062 Truncates the Real x to the nearest Integral toward 0.
2063 
2064 Uses the __trunc__ magic method.
2065 [clinic start generated code]*/
2066 
2067 static PyObject *
math_trunc(PyObject * module,PyObject * x)2068 math_trunc(PyObject *module, PyObject *x)
2069 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2070 {
2071     PyObject *trunc, *result;
2072 
2073     if (PyFloat_CheckExact(x)) {
2074         return PyFloat_Type.tp_as_number->nb_int(x);
2075     }
2076 
2077     math_module_state *state = get_math_module_state(module);
2078     trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
2079     if (trunc == NULL) {
2080         if (!PyErr_Occurred())
2081             PyErr_Format(PyExc_TypeError,
2082                          "type %.100s doesn't define __trunc__ method",
2083                          Py_TYPE(x)->tp_name);
2084         return NULL;
2085     }
2086     result = _PyObject_CallNoArgs(trunc);
2087     Py_DECREF(trunc);
2088     return result;
2089 }
2090 
2091 
2092 /*[clinic input]
2093 math.frexp
2094 
2095     x: double
2096     /
2097 
2098 Return the mantissa and exponent of x, as pair (m, e).
2099 
2100 m is a float and e is an int, such that x = m * 2.**e.
2101 If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
2102 [clinic start generated code]*/
2103 
2104 static PyObject *
math_frexp_impl(PyObject * module,double x)2105 math_frexp_impl(PyObject *module, double x)
2106 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2107 {
2108     int i;
2109     /* deal with special cases directly, to sidestep platform
2110        differences */
2111     if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2112         i = 0;
2113     }
2114     else {
2115         x = frexp(x, &i);
2116     }
2117     return Py_BuildValue("(di)", x, i);
2118 }
2119 
2120 
2121 /*[clinic input]
2122 math.ldexp
2123 
2124     x: double
2125     i: object
2126     /
2127 
2128 Return x * (2**i).
2129 
2130 This is essentially the inverse of frexp().
2131 [clinic start generated code]*/
2132 
2133 static PyObject *
math_ldexp_impl(PyObject * module,double x,PyObject * i)2134 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2135 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2136 {
2137     double r;
2138     long exp;
2139     int overflow;
2140 
2141     if (PyLong_Check(i)) {
2142         /* on overflow, replace exponent with either LONG_MAX
2143            or LONG_MIN, depending on the sign. */
2144         exp = PyLong_AsLongAndOverflow(i, &overflow);
2145         if (exp == -1 && PyErr_Occurred())
2146             return NULL;
2147         if (overflow)
2148             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2149     }
2150     else {
2151         PyErr_SetString(PyExc_TypeError,
2152                         "Expected an int as second argument to ldexp.");
2153         return NULL;
2154     }
2155 
2156     if (x == 0. || !Py_IS_FINITE(x)) {
2157         /* NaNs, zeros and infinities are returned unchanged */
2158         r = x;
2159         errno = 0;
2160     } else if (exp > INT_MAX) {
2161         /* overflow */
2162         r = copysign(Py_HUGE_VAL, x);
2163         errno = ERANGE;
2164     } else if (exp < INT_MIN) {
2165         /* underflow to +-0 */
2166         r = copysign(0., x);
2167         errno = 0;
2168     } else {
2169         errno = 0;
2170         r = ldexp(x, (int)exp);
2171         if (Py_IS_INFINITY(r))
2172             errno = ERANGE;
2173     }
2174 
2175     if (errno && is_error(r))
2176         return NULL;
2177     return PyFloat_FromDouble(r);
2178 }
2179 
2180 
2181 /*[clinic input]
2182 math.modf
2183 
2184     x: double
2185     /
2186 
2187 Return the fractional and integer parts of x.
2188 
2189 Both results carry the sign of x and are floats.
2190 [clinic start generated code]*/
2191 
2192 static PyObject *
math_modf_impl(PyObject * module,double x)2193 math_modf_impl(PyObject *module, double x)
2194 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2195 {
2196     double y;
2197     /* some platforms don't do the right thing for NaNs and
2198        infinities, so we take care of special cases directly. */
2199     if (Py_IS_INFINITY(x))
2200         return Py_BuildValue("(dd)", copysign(0., x), x);
2201     else if (Py_IS_NAN(x))
2202         return Py_BuildValue("(dd)", x, x);
2203 
2204     errno = 0;
2205     x = modf(x, &y);
2206     return Py_BuildValue("(dd)", x, y);
2207 }
2208 
2209 
2210 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2211    do that by itself -- loghelper can.  func is log or log10, and name is
2212    "log" or "log10".  Note that overflow of the result isn't possible: an int
2213    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2214    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2215    small enough to fit in an IEEE single.  log and log10 are even smaller.
2216    However, intermediate overflow is possible for an int if the number of bits
2217    in that int is larger than PY_SSIZE_T_MAX. */
2218 
2219 static PyObject*
loghelper(PyObject * arg,double (* func)(double))2220 loghelper(PyObject* arg, double (*func)(double))
2221 {
2222     /* If it is int, do it ourselves. */
2223     if (PyLong_Check(arg)) {
2224         double x, result;
2225         Py_ssize_t e;
2226 
2227         /* Negative or zero inputs give a ValueError. */
2228         if (!_PyLong_IsPositive((PyLongObject *)arg)) {
2229             PyErr_SetString(PyExc_ValueError,
2230                             "math domain error");
2231             return NULL;
2232         }
2233 
2234         x = PyLong_AsDouble(arg);
2235         if (x == -1.0 && PyErr_Occurred()) {
2236             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2237                 return NULL;
2238             /* Here the conversion to double overflowed, but it's possible
2239                to compute the log anyway.  Clear the exception and continue. */
2240             PyErr_Clear();
2241             x = _PyLong_Frexp((PyLongObject *)arg, &e);
2242             if (x == -1.0 && PyErr_Occurred())
2243                 return NULL;
2244             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2245             result = func(x) + func(2.0) * e;
2246         }
2247         else
2248             /* Successfully converted x to a double. */
2249             result = func(x);
2250         return PyFloat_FromDouble(result);
2251     }
2252 
2253     /* Else let libm handle it by itself. */
2254     return math_1(arg, func, 0);
2255 }
2256 
2257 
2258 /* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
2259    for support of multiple signatures */
2260 static PyObject *
math_log(PyObject * module,PyObject * const * args,Py_ssize_t nargs)2261 math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
2262 {
2263     PyObject *num, *den;
2264     PyObject *ans;
2265 
2266     if (!_PyArg_CheckPositional("log", nargs, 1, 2))
2267         return NULL;
2268 
2269     num = loghelper(args[0], m_log);
2270     if (num == NULL || nargs == 1)
2271         return num;
2272 
2273     den = loghelper(args[1], m_log);
2274     if (den == NULL) {
2275         Py_DECREF(num);
2276         return NULL;
2277     }
2278 
2279     ans = PyNumber_TrueDivide(num, den);
2280     Py_DECREF(num);
2281     Py_DECREF(den);
2282     return ans;
2283 }
2284 
2285 PyDoc_STRVAR(math_log_doc,
2286 "log(x, [base=math.e])\n\
2287 Return the logarithm of x to the given base.\n\n\
2288 If the base is not specified, returns the natural logarithm (base e) of x.");
2289 
2290 /*[clinic input]
2291 math.log2
2292 
2293     x: object
2294     /
2295 
2296 Return the base 2 logarithm of x.
2297 [clinic start generated code]*/
2298 
2299 static PyObject *
math_log2(PyObject * module,PyObject * x)2300 math_log2(PyObject *module, PyObject *x)
2301 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2302 {
2303     return loghelper(x, m_log2);
2304 }
2305 
2306 
2307 /*[clinic input]
2308 math.log10
2309 
2310     x: object
2311     /
2312 
2313 Return the base 10 logarithm of x.
2314 [clinic start generated code]*/
2315 
2316 static PyObject *
math_log10(PyObject * module,PyObject * x)2317 math_log10(PyObject *module, PyObject *x)
2318 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2319 {
2320     return loghelper(x, m_log10);
2321 }
2322 
2323 
2324 /*[clinic input]
2325 math.fma
2326 
2327     x: double
2328     y: double
2329     z: double
2330     /
2331 
2332 Fused multiply-add operation.
2333 
2334 Compute (x * y) + z with a single round.
2335 [clinic start generated code]*/
2336 
2337 static PyObject *
math_fma_impl(PyObject * module,double x,double y,double z)2338 math_fma_impl(PyObject *module, double x, double y, double z)
2339 /*[clinic end generated code: output=4fc8626dbc278d17 input=e3ad1f4a4c89626e]*/
2340 {
2341     double r = fma(x, y, z);
2342 
2343     /* Fast path: if we got a finite result, we're done. */
2344     if (Py_IS_FINITE(r)) {
2345         return PyFloat_FromDouble(r);
2346     }
2347 
2348     /* Non-finite result. Raise an exception if appropriate, else return r. */
2349     if (Py_IS_NAN(r)) {
2350         if (!Py_IS_NAN(x) && !Py_IS_NAN(y) && !Py_IS_NAN(z)) {
2351             /* NaN result from non-NaN inputs. */
2352             PyErr_SetString(PyExc_ValueError, "invalid operation in fma");
2353             return NULL;
2354         }
2355     }
2356     else if (Py_IS_FINITE(x) && Py_IS_FINITE(y) && Py_IS_FINITE(z)) {
2357         /* Infinite result from finite inputs. */
2358         PyErr_SetString(PyExc_OverflowError, "overflow in fma");
2359         return NULL;
2360     }
2361 
2362     return PyFloat_FromDouble(r);
2363 }
2364 
2365 
2366 /*[clinic input]
2367 math.fmod
2368 
2369     x: double
2370     y: double
2371     /
2372 
2373 Return fmod(x, y), according to platform C.
2374 
2375 x % y may differ.
2376 [clinic start generated code]*/
2377 
2378 static PyObject *
math_fmod_impl(PyObject * module,double x,double y)2379 math_fmod_impl(PyObject *module, double x, double y)
2380 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2381 {
2382     double r;
2383     /* fmod(x, +/-Inf) returns x for finite x. */
2384     if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2385         return PyFloat_FromDouble(x);
2386     errno = 0;
2387     r = fmod(x, y);
2388 #ifdef _MSC_VER
2389     /* Windows (e.g. Windows 10 with MSC v.1916) loose sign
2390        for zero result.  But C99+ says: "if y is nonzero, the result
2391        has the same sign as x".
2392      */
2393     if (r == 0.0 && y != 0.0) {
2394         r = copysign(r, x);
2395     }
2396 #endif
2397     if (Py_IS_NAN(r)) {
2398         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2399             errno = EDOM;
2400         else
2401             errno = 0;
2402     }
2403     if (errno && is_error(r))
2404         return NULL;
2405     else
2406         return PyFloat_FromDouble(r);
2407 }
2408 
2409 /*
2410 Given a *vec* of values, compute the vector norm:
2411 
2412     sqrt(sum(x ** 2 for x in vec))
2413 
2414 The *max* variable should be equal to the largest fabs(x).
2415 The *n* variable is the length of *vec*.
2416 If n==0, then *max* should be 0.0.
2417 If an infinity is present in the vec, *max* should be INF.
2418 The *found_nan* variable indicates whether some member of
2419 the *vec* is a NaN.
2420 
2421 To avoid overflow/underflow and to achieve high accuracy giving results
2422 that are almost always correctly rounded, four techniques are used:
2423 
2424 * lossless scaling using a power-of-two scaling factor
2425 * accurate squaring using Veltkamp-Dekker splitting [1]
2426   or an equivalent with an fma() call
2427 * compensated summation using a variant of the Neumaier algorithm [2]
2428 * differential correction of the square root [3]
2429 
2430 The usual presentation of the Neumaier summation algorithm has an
2431 expensive branch depending on which operand has the larger
2432 magnitude.  We avoid this cost by arranging the calculation so that
2433 fabs(csum) is always as large as fabs(x).
2434 
2435 To establish the invariant, *csum* is initialized to 1.0 which is
2436 always larger than x**2 after scaling or after division by *max*.
2437 After the loop is finished, the initial 1.0 is subtracted out for a
2438 net zero effect on the final sum.  Since *csum* will be greater than
2439 1.0, the subtraction of 1.0 will not cause fractional digits to be
2440 dropped from *csum*.
2441 
2442 To get the full benefit from compensated summation, the largest
2443 addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
2444 scaling or division by *max* should not be skipped even if not
2445 otherwise needed to prevent overflow or loss of precision.
2446 
2447 The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
2448 gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
2449 algorithm gives a *hi* value that is correctly rounded to half
2450 precision.  When a value at or below 1.0 is correctly rounded, it
2451 never goes above 1.0.  And when values at or below 1.0 are squared,
2452 they remain at or below 1.0, thus preserving the summation invariant.
2453 
2454 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2455 each scaled vector element has a magnitude less than 1.0.  After the
2456 Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
2457 value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
2458 Given that csum >= 1.0, we have:
2459     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2460 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2461 
2462 To minimize loss of information during the accumulation of fractional
2463 values, each term has a separate accumulator.  This also breaks up
2464 sequential dependencies in the inner loop so the CPU can maximize
2465 floating-point throughput. [4]  On an Apple M1 Max, hypot(*vec)
2466 takes only 3.33 µsec when len(vec) == 1000.
2467 
2468 The square root differential correction is needed because a
2469 correctly rounded square root of a correctly rounded sum of
2470 squares can still be off by as much as one ulp.
2471 
2472 The differential correction starts with a value *x* that is
2473 the difference between the square of *h*, the possibly inaccurately
2474 rounded square root, and the accurately computed sum of squares.
2475 The correction is the first order term of the Maclaurin series
2476 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2477 
2478 Essentially, this differential correction is equivalent to one
2479 refinement step in Newton's divide-and-average square root
2480 algorithm, effectively doubling the number of accurate bits.
2481 This technique is used in Dekker's SQRT2 algorithm and again in
2482 Borges' ALGORITHM 4 and 5.
2483 
2484 The hypot() function is faithfully rounded (less than 1 ulp error)
2485 and usually correctly rounded (within 1/2 ulp).  The squaring
2486 step is exact.  The Neumaier summation computes as if in doubled
2487 precision (106 bits) and has the advantage that its input squares
2488 are non-negative so that the condition number of the sum is one.
2489 The square root with a differential correction is likewise computed
2490 as if in doubled precision.
2491 
2492 For n <= 1000, prior to the final addition that rounds the overall
2493 result, the internal accuracy of "h" together with its correction of
2494 "x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
2495 against a Decimal implementation with prec=300.  After 100 million
2496 trials, no incorrectly rounded examples were found.  In addition,
2497 perfect commutativity (all permutations are exactly equal) was
2498 verified for 1 billion random inputs with n=5. [7]
2499 
2500 References:
2501 
2502 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2503 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2504 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
2505 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
2506 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2507 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
2508 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
2509 
2510 */
2511 
2512 static inline double
vector_norm(Py_ssize_t n,double * vec,double max,int found_nan)2513 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2514 {
2515     double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
2516     DoubleLength pr, sm;
2517     int max_e;
2518     Py_ssize_t i;
2519 
2520     if (Py_IS_INFINITY(max)) {
2521         return max;
2522     }
2523     if (found_nan) {
2524         return Py_NAN;
2525     }
2526     if (max == 0.0 || n <= 1) {
2527         return max;
2528     }
2529     frexp(max, &max_e);
2530     if (max_e < -1023) {
2531         /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
2532         for (i=0 ; i < n ; i++) {
2533             vec[i] /= DBL_MIN;          // convert subnormals to normals
2534         }
2535         return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
2536     }
2537     scale = ldexp(1.0, -max_e);
2538     assert(max * scale >= 0.5);
2539     assert(max * scale < 1.0);
2540     for (i=0 ; i < n ; i++) {
2541         x = vec[i];
2542         assert(Py_IS_FINITE(x) && fabs(x) <= max);
2543         x *= scale;                     // lossless scaling
2544         assert(fabs(x) < 1.0);
2545         pr = dl_mul(x, x);              // lossless squaring
2546         assert(pr.hi <= 1.0);
2547         sm = dl_fast_sum(csum, pr.hi);  // lossless addition
2548         csum = sm.hi;
2549         frac1 += pr.lo;                 // lossy addition
2550         frac2 += sm.lo;                 // lossy addition
2551     }
2552     h = sqrt(csum - 1.0 + (frac1 + frac2));
2553     pr = dl_mul(-h, h);
2554     sm = dl_fast_sum(csum, pr.hi);
2555     csum = sm.hi;
2556     frac1 += pr.lo;
2557     frac2 += sm.lo;
2558     x = csum - 1.0 + (frac1 + frac2);
2559     h +=  x / (2.0 * h);                 // differential correction
2560     return h / scale;
2561 }
2562 
2563 #define NUM_STACK_ELEMS 16
2564 
2565 /*[clinic input]
2566 math.dist
2567 
2568     p: object
2569     q: object
2570     /
2571 
2572 Return the Euclidean distance between two points p and q.
2573 
2574 The points should be specified as sequences (or iterables) of
2575 coordinates.  Both inputs must have the same dimension.
2576 
2577 Roughly equivalent to:
2578     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2579 [clinic start generated code]*/
2580 
2581 static PyObject *
math_dist_impl(PyObject * module,PyObject * p,PyObject * q)2582 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2583 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2584 {
2585     PyObject *item;
2586     double max = 0.0;
2587     double x, px, qx, result;
2588     Py_ssize_t i, m, n;
2589     int found_nan = 0, p_allocated = 0, q_allocated = 0;
2590     double diffs_on_stack[NUM_STACK_ELEMS];
2591     double *diffs = diffs_on_stack;
2592 
2593     if (!PyTuple_Check(p)) {
2594         p = PySequence_Tuple(p);
2595         if (p == NULL) {
2596             return NULL;
2597         }
2598         p_allocated = 1;
2599     }
2600     if (!PyTuple_Check(q)) {
2601         q = PySequence_Tuple(q);
2602         if (q == NULL) {
2603             if (p_allocated) {
2604                 Py_DECREF(p);
2605             }
2606             return NULL;
2607         }
2608         q_allocated = 1;
2609     }
2610 
2611     m = PyTuple_GET_SIZE(p);
2612     n = PyTuple_GET_SIZE(q);
2613     if (m != n) {
2614         PyErr_SetString(PyExc_ValueError,
2615                         "both points must have the same number of dimensions");
2616         goto error_exit;
2617     }
2618     if (n > NUM_STACK_ELEMS) {
2619         diffs = (double *) PyMem_Malloc(n * sizeof(double));
2620         if (diffs == NULL) {
2621             PyErr_NoMemory();
2622             goto error_exit;
2623         }
2624     }
2625     for (i=0 ; i<n ; i++) {
2626         item = PyTuple_GET_ITEM(p, i);
2627         ASSIGN_DOUBLE(px, item, error_exit);
2628         item = PyTuple_GET_ITEM(q, i);
2629         ASSIGN_DOUBLE(qx, item, error_exit);
2630         x = fabs(px - qx);
2631         diffs[i] = x;
2632         found_nan |= Py_IS_NAN(x);
2633         if (x > max) {
2634             max = x;
2635         }
2636     }
2637     result = vector_norm(n, diffs, max, found_nan);
2638     if (diffs != diffs_on_stack) {
2639         PyMem_Free(diffs);
2640     }
2641     if (p_allocated) {
2642         Py_DECREF(p);
2643     }
2644     if (q_allocated) {
2645         Py_DECREF(q);
2646     }
2647     return PyFloat_FromDouble(result);
2648 
2649   error_exit:
2650     if (diffs != diffs_on_stack) {
2651         PyMem_Free(diffs);
2652     }
2653     if (p_allocated) {
2654         Py_DECREF(p);
2655     }
2656     if (q_allocated) {
2657         Py_DECREF(q);
2658     }
2659     return NULL;
2660 }
2661 
2662 /* AC: cannot convert yet, waiting for *args support */
2663 static PyObject *
math_hypot(PyObject * self,PyObject * const * args,Py_ssize_t nargs)2664 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2665 {
2666     Py_ssize_t i;
2667     PyObject *item;
2668     double max = 0.0;
2669     double x, result;
2670     int found_nan = 0;
2671     double coord_on_stack[NUM_STACK_ELEMS];
2672     double *coordinates = coord_on_stack;
2673 
2674     if (nargs > NUM_STACK_ELEMS) {
2675         coordinates = (double *) PyMem_Malloc(nargs * sizeof(double));
2676         if (coordinates == NULL) {
2677             return PyErr_NoMemory();
2678         }
2679     }
2680     for (i = 0; i < nargs; i++) {
2681         item = args[i];
2682         ASSIGN_DOUBLE(x, item, error_exit);
2683         x = fabs(x);
2684         coordinates[i] = x;
2685         found_nan |= Py_IS_NAN(x);
2686         if (x > max) {
2687             max = x;
2688         }
2689     }
2690     result = vector_norm(nargs, coordinates, max, found_nan);
2691     if (coordinates != coord_on_stack) {
2692         PyMem_Free(coordinates);
2693     }
2694     return PyFloat_FromDouble(result);
2695 
2696   error_exit:
2697     if (coordinates != coord_on_stack) {
2698         PyMem_Free(coordinates);
2699     }
2700     return NULL;
2701 }
2702 
2703 #undef NUM_STACK_ELEMS
2704 
2705 PyDoc_STRVAR(math_hypot_doc,
2706              "hypot(*coordinates) -> value\n\n\
2707 Multidimensional Euclidean distance from the origin to a point.\n\
2708 \n\
2709 Roughly equivalent to:\n\
2710     sqrt(sum(x**2 for x in coordinates))\n\
2711 \n\
2712 For a two dimensional point (x, y), gives the hypotenuse\n\
2713 using the Pythagorean theorem:  sqrt(x*x + y*y).\n\
2714 \n\
2715 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2716 \n\
2717     >>> hypot(3.0, 4.0)\n\
2718     5.0\n\
2719 ");
2720 
2721 /** sumprod() ***************************************************************/
2722 
2723 /* Forward declaration */
2724 static inline int _check_long_mult_overflow(long a, long b);
2725 
2726 static inline bool
long_add_would_overflow(long a,long b)2727 long_add_would_overflow(long a, long b)
2728 {
2729     return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
2730 }
2731 
2732 /*[clinic input]
2733 math.sumprod
2734 
2735     p: object
2736     q: object
2737     /
2738 
2739 Return the sum of products of values from two iterables p and q.
2740 
2741 Roughly equivalent to:
2742 
2743     sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
2744 
2745 For float and mixed int/float inputs, the intermediate products
2746 and sums are computed with extended precision.
2747 [clinic start generated code]*/
2748 
2749 static PyObject *
math_sumprod_impl(PyObject * module,PyObject * p,PyObject * q)2750 math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
2751 /*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
2752 {
2753     PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
2754     PyObject *p_it, *q_it, *total;
2755     iternextfunc p_next, q_next;
2756     bool p_stopped = false, q_stopped = false;
2757     bool int_path_enabled = true, int_total_in_use = false;
2758     bool flt_path_enabled = true, flt_total_in_use = false;
2759     long int_total = 0;
2760     TripleLength flt_total = tl_zero;
2761 
2762     p_it = PyObject_GetIter(p);
2763     if (p_it == NULL) {
2764         return NULL;
2765     }
2766     q_it = PyObject_GetIter(q);
2767     if (q_it == NULL) {
2768         Py_DECREF(p_it);
2769         return NULL;
2770     }
2771     total = PyLong_FromLong(0);
2772     if (total == NULL) {
2773         Py_DECREF(p_it);
2774         Py_DECREF(q_it);
2775         return NULL;
2776     }
2777     p_next = *Py_TYPE(p_it)->tp_iternext;
2778     q_next = *Py_TYPE(q_it)->tp_iternext;
2779     while (1) {
2780         bool finished;
2781 
2782         assert (p_i == NULL);
2783         assert (q_i == NULL);
2784         assert (term_i == NULL);
2785         assert (new_total == NULL);
2786 
2787         assert (p_it != NULL);
2788         assert (q_it != NULL);
2789         assert (total != NULL);
2790 
2791         p_i = p_next(p_it);
2792         if (p_i == NULL) {
2793             if (PyErr_Occurred()) {
2794                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2795                     goto err_exit;
2796                 }
2797                 PyErr_Clear();
2798             }
2799             p_stopped = true;
2800         }
2801         q_i = q_next(q_it);
2802         if (q_i == NULL) {
2803             if (PyErr_Occurred()) {
2804                 if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
2805                     goto err_exit;
2806                 }
2807                 PyErr_Clear();
2808             }
2809             q_stopped = true;
2810         }
2811         if (p_stopped != q_stopped) {
2812             PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
2813             goto err_exit;
2814         }
2815         finished = p_stopped & q_stopped;
2816 
2817         if (int_path_enabled) {
2818 
2819             if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
2820                 int overflow;
2821                 long int_p, int_q, int_prod;
2822 
2823                 int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
2824                 if (overflow) {
2825                     goto finalize_int_path;
2826                 }
2827                 int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
2828                 if (overflow) {
2829                     goto finalize_int_path;
2830                 }
2831                 if (_check_long_mult_overflow(int_p, int_q)) {
2832                     goto finalize_int_path;
2833                 }
2834                 int_prod = int_p * int_q;
2835                 if (long_add_would_overflow(int_total, int_prod)) {
2836                     goto finalize_int_path;
2837                 }
2838                 int_total += int_prod;
2839                 int_total_in_use = true;
2840                 Py_CLEAR(p_i);
2841                 Py_CLEAR(q_i);
2842                 continue;
2843             }
2844 
2845           finalize_int_path:
2846             // We're finished, overflowed, or have a non-int
2847             int_path_enabled = false;
2848             if (int_total_in_use) {
2849                 term_i = PyLong_FromLong(int_total);
2850                 if (term_i == NULL) {
2851                     goto err_exit;
2852                 }
2853                 new_total = PyNumber_Add(total, term_i);
2854                 if (new_total == NULL) {
2855                     goto err_exit;
2856                 }
2857                 Py_SETREF(total, new_total);
2858                 new_total = NULL;
2859                 Py_CLEAR(term_i);
2860                 int_total = 0;   // An ounce of prevention, ...
2861                 int_total_in_use = false;
2862             }
2863         }
2864 
2865         if (flt_path_enabled) {
2866 
2867             if (!finished) {
2868                 double flt_p, flt_q;
2869                 bool p_type_float = PyFloat_CheckExact(p_i);
2870                 bool q_type_float = PyFloat_CheckExact(q_i);
2871                 if (p_type_float && q_type_float) {
2872                     flt_p = PyFloat_AS_DOUBLE(p_i);
2873                     flt_q = PyFloat_AS_DOUBLE(q_i);
2874                 } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
2875                     /* We care about float/int pairs and int/float pairs because
2876                        they arise naturally in several use cases such as price
2877                        times quantity, measurements with integer weights, or
2878                        data selected by a vector of bools. */
2879                     flt_p = PyFloat_AS_DOUBLE(p_i);
2880                     flt_q = PyLong_AsDouble(q_i);
2881                     if (flt_q == -1.0 && PyErr_Occurred()) {
2882                         PyErr_Clear();
2883                         goto finalize_flt_path;
2884                     }
2885                 } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
2886                     flt_q = PyFloat_AS_DOUBLE(q_i);
2887                     flt_p = PyLong_AsDouble(p_i);
2888                     if (flt_p == -1.0 && PyErr_Occurred()) {
2889                         PyErr_Clear();
2890                         goto finalize_flt_path;
2891                     }
2892                 } else {
2893                     goto finalize_flt_path;
2894                 }
2895                 TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
2896                 if (isfinite(new_flt_total.hi)) {
2897                     flt_total = new_flt_total;
2898                     flt_total_in_use = true;
2899                     Py_CLEAR(p_i);
2900                     Py_CLEAR(q_i);
2901                     continue;
2902                 }
2903             }
2904 
2905           finalize_flt_path:
2906             // We're finished, overflowed, have a non-float, or got a non-finite value
2907             flt_path_enabled = false;
2908             if (flt_total_in_use) {
2909                 term_i = PyFloat_FromDouble(tl_to_d(flt_total));
2910                 if (term_i == NULL) {
2911                     goto err_exit;
2912                 }
2913                 new_total = PyNumber_Add(total, term_i);
2914                 if (new_total == NULL) {
2915                     goto err_exit;
2916                 }
2917                 Py_SETREF(total, new_total);
2918                 new_total = NULL;
2919                 Py_CLEAR(term_i);
2920                 flt_total = tl_zero;
2921                 flt_total_in_use = false;
2922             }
2923         }
2924 
2925         assert(!int_total_in_use);
2926         assert(!flt_total_in_use);
2927         if (finished) {
2928             goto normal_exit;
2929         }
2930         term_i = PyNumber_Multiply(p_i, q_i);
2931         if (term_i == NULL) {
2932             goto err_exit;
2933         }
2934         new_total = PyNumber_Add(total, term_i);
2935         if (new_total == NULL) {
2936             goto err_exit;
2937         }
2938         Py_SETREF(total, new_total);
2939         new_total = NULL;
2940         Py_CLEAR(p_i);
2941         Py_CLEAR(q_i);
2942         Py_CLEAR(term_i);
2943     }
2944 
2945  normal_exit:
2946     Py_DECREF(p_it);
2947     Py_DECREF(q_it);
2948     return total;
2949 
2950  err_exit:
2951     Py_DECREF(p_it);
2952     Py_DECREF(q_it);
2953     Py_DECREF(total);
2954     Py_XDECREF(p_i);
2955     Py_XDECREF(q_i);
2956     Py_XDECREF(term_i);
2957     Py_XDECREF(new_total);
2958     return NULL;
2959 }
2960 
2961 
2962 /* pow can't use math_2, but needs its own wrapper: the problem is
2963    that an infinite result can arise either as a result of overflow
2964    (in which case OverflowError should be raised) or as a result of
2965    e.g. 0.**-5. (for which ValueError needs to be raised.)
2966 */
2967 
2968 /*[clinic input]
2969 math.pow
2970 
2971     x: double
2972     y: double
2973     /
2974 
2975 Return x**y (x to the power of y).
2976 [clinic start generated code]*/
2977 
2978 static PyObject *
math_pow_impl(PyObject * module,double x,double y)2979 math_pow_impl(PyObject *module, double x, double y)
2980 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2981 {
2982     double r;
2983     int odd_y;
2984 
2985     /* deal directly with IEEE specials, to cope with problems on various
2986        platforms whose semantics don't exactly match C99 */
2987     r = 0.; /* silence compiler warning */
2988     if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2989         errno = 0;
2990         if (Py_IS_NAN(x))
2991             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2992         else if (Py_IS_NAN(y))
2993             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2994         else if (Py_IS_INFINITY(x)) {
2995             odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2996             if (y > 0.)
2997                 r = odd_y ? x : fabs(x);
2998             else if (y == 0.)
2999                 r = 1.;
3000             else /* y < 0. */
3001                 r = odd_y ? copysign(0., x) : 0.;
3002         }
3003         else {
3004             assert(Py_IS_INFINITY(y));
3005             if (fabs(x) == 1.0)
3006                 r = 1.;
3007             else if (y > 0. && fabs(x) > 1.0)
3008                 r = y;
3009             else if (y < 0. && fabs(x) < 1.0) {
3010                 r = -y; /* result is +inf */
3011             }
3012             else
3013                 r = 0.;
3014         }
3015     }
3016     else {
3017         /* let libm handle finite**finite */
3018         errno = 0;
3019         r = pow(x, y);
3020         /* a NaN result should arise only from (-ve)**(finite
3021            non-integer); in this case we want to raise ValueError. */
3022         if (!Py_IS_FINITE(r)) {
3023             if (Py_IS_NAN(r)) {
3024                 errno = EDOM;
3025             }
3026             /*
3027                an infinite result here arises either from:
3028                (A) (+/-0.)**negative (-> divide-by-zero)
3029                (B) overflow of x**y with x and y finite
3030             */
3031             else if (Py_IS_INFINITY(r)) {
3032                 if (x == 0.)
3033                     errno = EDOM;
3034                 else
3035                     errno = ERANGE;
3036             }
3037         }
3038     }
3039 
3040     if (errno && is_error(r))
3041         return NULL;
3042     else
3043         return PyFloat_FromDouble(r);
3044 }
3045 
3046 
3047 static const double degToRad = Py_MATH_PI / 180.0;
3048 static const double radToDeg = 180.0 / Py_MATH_PI;
3049 
3050 /*[clinic input]
3051 math.degrees
3052 
3053     x: double
3054     /
3055 
3056 Convert angle x from radians to degrees.
3057 [clinic start generated code]*/
3058 
3059 static PyObject *
math_degrees_impl(PyObject * module,double x)3060 math_degrees_impl(PyObject *module, double x)
3061 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
3062 {
3063     return PyFloat_FromDouble(x * radToDeg);
3064 }
3065 
3066 
3067 /*[clinic input]
3068 math.radians
3069 
3070     x: double
3071     /
3072 
3073 Convert angle x from degrees to radians.
3074 [clinic start generated code]*/
3075 
3076 static PyObject *
math_radians_impl(PyObject * module,double x)3077 math_radians_impl(PyObject *module, double x)
3078 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
3079 {
3080     return PyFloat_FromDouble(x * degToRad);
3081 }
3082 
3083 
3084 /*[clinic input]
3085 math.isfinite
3086 
3087     x: double
3088     /
3089 
3090 Return True if x is neither an infinity nor a NaN, and False otherwise.
3091 [clinic start generated code]*/
3092 
3093 static PyObject *
math_isfinite_impl(PyObject * module,double x)3094 math_isfinite_impl(PyObject *module, double x)
3095 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
3096 {
3097     return PyBool_FromLong((long)Py_IS_FINITE(x));
3098 }
3099 
3100 
3101 /*[clinic input]
3102 math.isnan
3103 
3104     x: double
3105     /
3106 
3107 Return True if x is a NaN (not a number), and False otherwise.
3108 [clinic start generated code]*/
3109 
3110 static PyObject *
math_isnan_impl(PyObject * module,double x)3111 math_isnan_impl(PyObject *module, double x)
3112 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
3113 {
3114     return PyBool_FromLong((long)Py_IS_NAN(x));
3115 }
3116 
3117 
3118 /*[clinic input]
3119 math.isinf
3120 
3121     x: double
3122     /
3123 
3124 Return True if x is a positive or negative infinity, and False otherwise.
3125 [clinic start generated code]*/
3126 
3127 static PyObject *
math_isinf_impl(PyObject * module,double x)3128 math_isinf_impl(PyObject *module, double x)
3129 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
3130 {
3131     return PyBool_FromLong((long)Py_IS_INFINITY(x));
3132 }
3133 
3134 
3135 /*[clinic input]
3136 math.isclose -> bool
3137 
3138     a: double
3139     b: double
3140     *
3141     rel_tol: double = 1e-09
3142         maximum difference for being considered "close", relative to the
3143         magnitude of the input values
3144     abs_tol: double = 0.0
3145         maximum difference for being considered "close", regardless of the
3146         magnitude of the input values
3147 
3148 Determine whether two floating-point numbers are close in value.
3149 
3150 Return True if a is close in value to b, and False otherwise.
3151 
3152 For the values to be considered close, the difference between them
3153 must be smaller than at least one of the tolerances.
3154 
3155 -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
3156 is, NaN is not close to anything, even itself.  inf and -inf are
3157 only close to themselves.
3158 [clinic start generated code]*/
3159 
3160 static int
math_isclose_impl(PyObject * module,double a,double b,double rel_tol,double abs_tol)3161 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
3162                   double abs_tol)
3163 /*[clinic end generated code: output=b73070207511952d input=12d41764468bfdb8]*/
3164 {
3165     double diff = 0.0;
3166 
3167     /* sanity check on the inputs */
3168     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
3169         PyErr_SetString(PyExc_ValueError,
3170                         "tolerances must be non-negative");
3171         return -1;
3172     }
3173 
3174     if ( a == b ) {
3175         /* short circuit exact equality -- needed to catch two infinities of
3176            the same sign. And perhaps speeds things up a bit sometimes.
3177         */
3178         return 1;
3179     }
3180 
3181     /* This catches the case of two infinities of opposite sign, or
3182        one infinity and one finite number. Two infinities of opposite
3183        sign would otherwise have an infinite relative tolerance.
3184        Two infinities of the same sign are caught by the equality check
3185        above.
3186     */
3187 
3188     if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
3189         return 0;
3190     }
3191 
3192     /* now do the regular computation
3193        this is essentially the "weak" test from the Boost library
3194     */
3195 
3196     diff = fabs(b - a);
3197 
3198     return (((diff <= fabs(rel_tol * b)) ||
3199              (diff <= fabs(rel_tol * a))) ||
3200             (diff <= abs_tol));
3201 }
3202 
3203 static inline int
_check_long_mult_overflow(long a,long b)3204 _check_long_mult_overflow(long a, long b) {
3205 
3206     /* From Python2's int_mul code:
3207 
3208     Integer overflow checking for * is painful:  Python tried a couple ways, but
3209     they didn't work on all platforms, or failed in endcases (a product of
3210     -sys.maxint-1 has been a particular pain).
3211 
3212     Here's another way:
3213 
3214     The native long product x*y is either exactly right or *way* off, being
3215     just the last n bits of the true product, where n is the number of bits
3216     in a long (the delivered product is the true product plus i*2**n for
3217     some integer i).
3218 
3219     The native double product (double)x * (double)y is subject to three
3220     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
3221     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3222     But, unlike the native long product, it's not in *range* trouble:  even
3223     if sizeof(long)==32 (256-bit longs), the product easily fits in the
3224     dynamic range of a double.  So the leading 50 (or so) bits of the double
3225     product are correct.
3226 
3227     We check these two ways against each other, and declare victory if they're
3228     approximately the same.  Else, because the native long product is the only
3229     one that can lose catastrophic amounts of information, it's the native long
3230     product that must have overflowed.
3231 
3232     */
3233 
3234     long longprod = (long)((unsigned long)a * b);
3235     double doubleprod = (double)a * (double)b;
3236     double doubled_longprod = (double)longprod;
3237 
3238     if (doubled_longprod == doubleprod) {
3239         return 0;
3240     }
3241 
3242     const double diff = doubled_longprod - doubleprod;
3243     const double absdiff = diff >= 0.0 ? diff : -diff;
3244     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3245 
3246     if (32.0 * absdiff <= absprod) {
3247         return 0;
3248     }
3249 
3250     return 1;
3251 }
3252 
3253 /*[clinic input]
3254 math.prod
3255 
3256     iterable: object
3257     /
3258     *
3259     start: object(c_default="NULL") = 1
3260 
3261 Calculate the product of all the elements in the input iterable.
3262 
3263 The default start value for the product is 1.
3264 
3265 When the iterable is empty, return the start value.  This function is
3266 intended specifically for use with numeric values and may reject
3267 non-numeric types.
3268 [clinic start generated code]*/
3269 
3270 static PyObject *
math_prod_impl(PyObject * module,PyObject * iterable,PyObject * start)3271 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3272 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3273 {
3274     PyObject *result = start;
3275     PyObject *temp, *item, *iter;
3276 
3277     iter = PyObject_GetIter(iterable);
3278     if (iter == NULL) {
3279         return NULL;
3280     }
3281 
3282     if (result == NULL) {
3283         result = _PyLong_GetOne();
3284     }
3285     Py_INCREF(result);
3286 #ifndef SLOW_PROD
3287     /* Fast paths for integers keeping temporary products in C.
3288      * Assumes all inputs are the same type.
3289      * If the assumption fails, default to use PyObjects instead.
3290     */
3291     if (PyLong_CheckExact(result)) {
3292         int overflow;
3293         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3294         /* If this already overflowed, don't even enter the loop. */
3295         if (overflow == 0) {
3296             Py_SETREF(result, NULL);
3297         }
3298         /* Loop over all the items in the iterable until we finish, we overflow
3299          * or we found a non integer element */
3300         while (result == NULL) {
3301             item = PyIter_Next(iter);
3302             if (item == NULL) {
3303                 Py_DECREF(iter);
3304                 if (PyErr_Occurred()) {
3305                     return NULL;
3306                 }
3307                 return PyLong_FromLong(i_result);
3308             }
3309             if (PyLong_CheckExact(item)) {
3310                 long b = PyLong_AsLongAndOverflow(item, &overflow);
3311                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3312                     long x = i_result * b;
3313                     i_result = x;
3314                     Py_DECREF(item);
3315                     continue;
3316                 }
3317             }
3318             /* Either overflowed or is not an int.
3319              * Restore real objects and process normally */
3320             result = PyLong_FromLong(i_result);
3321             if (result == NULL) {
3322                 Py_DECREF(item);
3323                 Py_DECREF(iter);
3324                 return NULL;
3325             }
3326             temp = PyNumber_Multiply(result, item);
3327             Py_DECREF(result);
3328             Py_DECREF(item);
3329             result = temp;
3330             if (result == NULL) {
3331                 Py_DECREF(iter);
3332                 return NULL;
3333             }
3334         }
3335     }
3336 
3337     /* Fast paths for floats keeping temporary products in C.
3338      * Assumes all inputs are the same type.
3339      * If the assumption fails, default to use PyObjects instead.
3340     */
3341     if (PyFloat_CheckExact(result)) {
3342         double f_result = PyFloat_AS_DOUBLE(result);
3343         Py_SETREF(result, NULL);
3344         while(result == NULL) {
3345             item = PyIter_Next(iter);
3346             if (item == NULL) {
3347                 Py_DECREF(iter);
3348                 if (PyErr_Occurred()) {
3349                     return NULL;
3350                 }
3351                 return PyFloat_FromDouble(f_result);
3352             }
3353             if (PyFloat_CheckExact(item)) {
3354                 f_result *= PyFloat_AS_DOUBLE(item);
3355                 Py_DECREF(item);
3356                 continue;
3357             }
3358             if (PyLong_CheckExact(item)) {
3359                 long value;
3360                 int overflow;
3361                 value = PyLong_AsLongAndOverflow(item, &overflow);
3362                 if (!overflow) {
3363                     f_result *= (double)value;
3364                     Py_DECREF(item);
3365                     continue;
3366                 }
3367             }
3368             result = PyFloat_FromDouble(f_result);
3369             if (result == NULL) {
3370                 Py_DECREF(item);
3371                 Py_DECREF(iter);
3372                 return NULL;
3373             }
3374             temp = PyNumber_Multiply(result, item);
3375             Py_DECREF(result);
3376             Py_DECREF(item);
3377             result = temp;
3378             if (result == NULL) {
3379                 Py_DECREF(iter);
3380                 return NULL;
3381             }
3382         }
3383     }
3384 #endif
3385     /* Consume rest of the iterable (if any) that could not be handled
3386      * by specialized functions above.*/
3387     for(;;) {
3388         item = PyIter_Next(iter);
3389         if (item == NULL) {
3390             /* error, or end-of-sequence */
3391             if (PyErr_Occurred()) {
3392                 Py_SETREF(result, NULL);
3393             }
3394             break;
3395         }
3396         temp = PyNumber_Multiply(result, item);
3397         Py_DECREF(result);
3398         Py_DECREF(item);
3399         result = temp;
3400         if (result == NULL)
3401             break;
3402     }
3403     Py_DECREF(iter);
3404     return result;
3405 }
3406 
3407 
3408 /* least significant 64 bits of the odd part of factorial(n), for n in range(128).
3409 
3410 Python code to generate the values:
3411 
3412     import math
3413 
3414     for n in range(128):
3415         fac = math.factorial(n)
3416         fac_odd_part = fac // (fac & -fac)
3417         reduced_fac_odd_part = fac_odd_part % (2**64)
3418         print(f"{reduced_fac_odd_part:#018x}u")
3419 */
3420 static const uint64_t reduced_factorial_odd_part[] = {
3421     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
3422     0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
3423     0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
3424     0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
3425     0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
3426     0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
3427     0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
3428     0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
3429     0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
3430     0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
3431     0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
3432     0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
3433     0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
3434     0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
3435     0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
3436     0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
3437     0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
3438     0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
3439     0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
3440     0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
3441     0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
3442     0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
3443     0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
3444     0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
3445     0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
3446     0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
3447     0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
3448     0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
3449     0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
3450     0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
3451     0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
3452     0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
3453 };
3454 
3455 /* inverses of reduced_factorial_odd_part values modulo 2**64.
3456 
3457 Python code to generate the values:
3458 
3459     import math
3460 
3461     for n in range(128):
3462         fac = math.factorial(n)
3463         fac_odd_part = fac // (fac & -fac)
3464         inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
3465         print(f"{inverted_fac_odd_part:#018x}u")
3466 */
3467 static const uint64_t inverted_factorial_odd_part[] = {
3468     0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
3469     0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
3470     0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
3471     0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
3472     0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
3473     0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
3474     0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
3475     0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
3476     0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
3477     0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
3478     0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
3479     0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
3480     0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
3481     0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
3482     0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
3483     0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
3484     0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
3485     0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
3486     0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
3487     0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
3488     0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
3489     0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
3490     0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
3491     0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
3492     0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
3493     0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
3494     0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
3495     0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
3496     0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
3497     0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
3498     0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
3499     0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
3500 };
3501 
3502 /* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
3503 
3504 Python code to generate the values:
3505 
3506 import math
3507 
3508 for n in range(128):
3509     fac = math.factorial(n)
3510     fac_trailing_zeros = (fac & -fac).bit_length() - 1
3511     print(fac_trailing_zeros)
3512 */
3513 
3514 static const uint8_t factorial_trailing_zeros[] = {
3515      0,  0,  1,  1,  3,  3,  4,  4,  7,  7,  8,  8, 10, 10, 11, 11,  //  0-15
3516     15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26,  // 16-31
3517     31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42,  // 32-47
3518     46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57,  // 48-63
3519     63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74,  // 64-79
3520     78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89,  // 80-95
3521     94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105,  // 96-111
3522     109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120,  // 112-127
3523 };
3524 
3525 /* Number of permutations and combinations.
3526  * P(n, k) = n! / (n-k)!
3527  * C(n, k) = P(n, k) / k!
3528  */
3529 
3530 /* Calculate C(n, k) for n in the 63-bit range. */
3531 static PyObject *
perm_comb_small(unsigned long long n,unsigned long long k,int iscomb)3532 perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
3533 {
3534     assert(k != 0);
3535 
3536     /* For small enough n and k the result fits in the 64-bit range and can
3537      * be calculated without allocating intermediate PyLong objects. */
3538     if (iscomb) {
3539         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
3540          * fits into a uint64_t.  Exclude k = 1, because the second fast
3541          * path is faster for this case.*/
3542         static const unsigned char fast_comb_limits1[] = {
3543             0, 0, 127, 127, 127, 127, 127, 127,  // 0-7
3544             127, 127, 127, 127, 127, 127, 127, 127,  // 8-15
3545             116, 105, 97, 91, 86, 82, 78, 76,  // 16-23
3546             74, 72, 71, 70, 69, 68, 68, 67,  // 24-31
3547             67, 67, 67,  // 32-34
3548         };
3549         if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
3550             /*
3551                 comb(n, k) fits into a uint64_t. We compute it as
3552 
3553                     comb_odd_part << shift
3554 
3555                 where 2**shift is the largest power of two dividing comb(n, k)
3556                 and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
3557                 calculated efficiently via arithmetic modulo 2**64, using three
3558                 lookups and two uint64_t multiplications.
3559             */
3560             uint64_t comb_odd_part = reduced_factorial_odd_part[n]
3561                                    * inverted_factorial_odd_part[k]
3562                                    * inverted_factorial_odd_part[n - k];
3563             int shift = factorial_trailing_zeros[n]
3564                       - factorial_trailing_zeros[k]
3565                       - factorial_trailing_zeros[n - k];
3566             return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
3567         }
3568 
3569         /* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
3570          * fits into a long long (which is at least 64 bit).  Only contains
3571          * items larger than in fast_comb_limits1. */
3572         static const unsigned long long fast_comb_limits2[] = {
3573             0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449,  // 0-7
3574             746, 453, 308, 227, 178, 147,  // 8-13
3575         };
3576         if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
3577             /* C(n, k) = C(n, k-1) * (n-k+1) / k */
3578             unsigned long long result = n;
3579             for (unsigned long long i = 1; i < k;) {
3580                 result *= --n;
3581                 result /= ++i;
3582             }
3583             return PyLong_FromUnsignedLongLong(result);
3584         }
3585     }
3586     else {
3587         /* Maps k to the maximal n so that k <= n and P(n, k)
3588          * fits into a long long (which is at least 64 bit). */
3589         static const unsigned long long fast_perm_limits[] = {
3590             0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568,  // 0-7
3591             259, 142, 88, 61, 45, 36, 30, 26,  // 8-15
3592             24, 22, 21, 20, 20,  // 16-20
3593         };
3594         if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
3595             if (n <= 127) {
3596                 /* P(n, k) fits into a uint64_t. */
3597                 uint64_t perm_odd_part = reduced_factorial_odd_part[n]
3598                                        * inverted_factorial_odd_part[n - k];
3599                 int shift = factorial_trailing_zeros[n]
3600                           - factorial_trailing_zeros[n - k];
3601                 return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
3602             }
3603 
3604             /* P(n, k) = P(n, k-1) * (n-k+1) */
3605             unsigned long long result = n;
3606             for (unsigned long long i = 1; i < k;) {
3607                 result *= --n;
3608                 ++i;
3609             }
3610             return PyLong_FromUnsignedLongLong(result);
3611         }
3612     }
3613 
3614     /* For larger n use recursive formulas:
3615      *
3616      *   P(n, k) = P(n, j) * P(n-j, k-j)
3617      *   C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
3618      */
3619     unsigned long long j = k / 2;
3620     PyObject *a, *b;
3621     a = perm_comb_small(n, j, iscomb);
3622     if (a == NULL) {
3623         return NULL;
3624     }
3625     b = perm_comb_small(n - j, k - j, iscomb);
3626     if (b == NULL) {
3627         goto error;
3628     }
3629     Py_SETREF(a, PyNumber_Multiply(a, b));
3630     Py_DECREF(b);
3631     if (iscomb && a != NULL) {
3632         b = perm_comb_small(k, j, 1);
3633         if (b == NULL) {
3634             goto error;
3635         }
3636         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3637         Py_DECREF(b);
3638     }
3639     return a;
3640 
3641 error:
3642     Py_DECREF(a);
3643     return NULL;
3644 }
3645 
3646 /* Calculate P(n, k) or C(n, k) using recursive formulas.
3647  * It is more efficient than sequential multiplication thanks to
3648  * Karatsuba multiplication.
3649  */
3650 static PyObject *
perm_comb(PyObject * n,unsigned long long k,int iscomb)3651 perm_comb(PyObject *n, unsigned long long k, int iscomb)
3652 {
3653     if (k == 0) {
3654         return PyLong_FromLong(1);
3655     }
3656     if (k == 1) {
3657         return Py_NewRef(n);
3658     }
3659 
3660     /* P(n, k) = P(n, j) * P(n-j, k-j) */
3661     /* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
3662     unsigned long long j = k / 2;
3663     PyObject *a, *b;
3664     a = perm_comb(n, j, iscomb);
3665     if (a == NULL) {
3666         return NULL;
3667     }
3668     PyObject *t = PyLong_FromUnsignedLongLong(j);
3669     if (t == NULL) {
3670         goto error;
3671     }
3672     n = PyNumber_Subtract(n, t);
3673     Py_DECREF(t);
3674     if (n == NULL) {
3675         goto error;
3676     }
3677     b = perm_comb(n, k - j, iscomb);
3678     Py_DECREF(n);
3679     if (b == NULL) {
3680         goto error;
3681     }
3682     Py_SETREF(a, PyNumber_Multiply(a, b));
3683     Py_DECREF(b);
3684     if (iscomb && a != NULL) {
3685         b = perm_comb_small(k, j, 1);
3686         if (b == NULL) {
3687             goto error;
3688         }
3689         Py_SETREF(a, PyNumber_FloorDivide(a, b));
3690         Py_DECREF(b);
3691     }
3692     return a;
3693 
3694 error:
3695     Py_DECREF(a);
3696     return NULL;
3697 }
3698 
3699 /*[clinic input]
3700 math.perm
3701 
3702     n: object
3703     k: object = None
3704     /
3705 
3706 Number of ways to choose k items from n items without repetition and with order.
3707 
3708 Evaluates to n! / (n - k)! when k <= n and evaluates
3709 to zero when k > n.
3710 
3711 If k is not specified or is None, then k defaults to n
3712 and the function returns n!.
3713 
3714 Raises TypeError if either of the arguments are not integers.
3715 Raises ValueError if either of the arguments are negative.
3716 [clinic start generated code]*/
3717 
3718 static PyObject *
math_perm_impl(PyObject * module,PyObject * n,PyObject * k)3719 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3720 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3721 {
3722     PyObject *result = NULL;
3723     int overflow, cmp;
3724     long long ki, ni;
3725 
3726     if (k == Py_None) {
3727         return math_factorial(module, n);
3728     }
3729     n = PyNumber_Index(n);
3730     if (n == NULL) {
3731         return NULL;
3732     }
3733     k = PyNumber_Index(k);
3734     if (k == NULL) {
3735         Py_DECREF(n);
3736         return NULL;
3737     }
3738     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3739 
3740     if (_PyLong_IsNegative((PyLongObject *)n)) {
3741         PyErr_SetString(PyExc_ValueError,
3742                         "n must be a non-negative integer");
3743         goto error;
3744     }
3745     if (_PyLong_IsNegative((PyLongObject *)k)) {
3746         PyErr_SetString(PyExc_ValueError,
3747                         "k must be a non-negative integer");
3748         goto error;
3749     }
3750 
3751     cmp = PyObject_RichCompareBool(n, k, Py_LT);
3752     if (cmp != 0) {
3753         if (cmp > 0) {
3754             result = PyLong_FromLong(0);
3755             goto done;
3756         }
3757         goto error;
3758     }
3759 
3760     ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3761     assert(overflow >= 0 && !PyErr_Occurred());
3762     if (overflow > 0) {
3763         PyErr_Format(PyExc_OverflowError,
3764                      "k must not exceed %lld",
3765                      LLONG_MAX);
3766         goto error;
3767     }
3768     assert(ki >= 0);
3769 
3770     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3771     assert(overflow >= 0 && !PyErr_Occurred());
3772     if (!overflow && ki > 1) {
3773         assert(ni >= 0);
3774         result = perm_comb_small((unsigned long long)ni,
3775                                  (unsigned long long)ki, 0);
3776     }
3777     else {
3778         result = perm_comb(n, (unsigned long long)ki, 0);
3779     }
3780 
3781 done:
3782     Py_DECREF(n);
3783     Py_DECREF(k);
3784     return result;
3785 
3786 error:
3787     Py_DECREF(n);
3788     Py_DECREF(k);
3789     return NULL;
3790 }
3791 
3792 /*[clinic input]
3793 math.comb
3794 
3795     n: object
3796     k: object
3797     /
3798 
3799 Number of ways to choose k items from n items without repetition and without order.
3800 
3801 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3802 to zero when k > n.
3803 
3804 Also called the binomial coefficient because it is equivalent
3805 to the coefficient of k-th term in polynomial expansion of the
3806 expression (1 + x)**n.
3807 
3808 Raises TypeError if either of the arguments are not integers.
3809 Raises ValueError if either of the arguments are negative.
3810 
3811 [clinic start generated code]*/
3812 
3813 static PyObject *
math_comb_impl(PyObject * module,PyObject * n,PyObject * k)3814 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3815 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3816 {
3817     PyObject *result = NULL, *temp;
3818     int overflow, cmp;
3819     long long ki, ni;
3820 
3821     n = PyNumber_Index(n);
3822     if (n == NULL) {
3823         return NULL;
3824     }
3825     k = PyNumber_Index(k);
3826     if (k == NULL) {
3827         Py_DECREF(n);
3828         return NULL;
3829     }
3830     assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
3831 
3832     if (_PyLong_IsNegative((PyLongObject *)n)) {
3833         PyErr_SetString(PyExc_ValueError,
3834                         "n must be a non-negative integer");
3835         goto error;
3836     }
3837     if (_PyLong_IsNegative((PyLongObject *)k)) {
3838         PyErr_SetString(PyExc_ValueError,
3839                         "k must be a non-negative integer");
3840         goto error;
3841     }
3842 
3843     ni = PyLong_AsLongLongAndOverflow(n, &overflow);
3844     assert(overflow >= 0 && !PyErr_Occurred());
3845     if (!overflow) {
3846         assert(ni >= 0);
3847         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3848         assert(overflow >= 0 && !PyErr_Occurred());
3849         if (overflow || ki > ni) {
3850             result = PyLong_FromLong(0);
3851             goto done;
3852         }
3853         assert(ki >= 0);
3854 
3855         ki = Py_MIN(ki, ni - ki);
3856         if (ki > 1) {
3857             result = perm_comb_small((unsigned long long)ni,
3858                                      (unsigned long long)ki, 1);
3859             goto done;
3860         }
3861         /* For k == 1 just return the original n in perm_comb(). */
3862     }
3863     else {
3864         /* k = min(k, n - k) */
3865         temp = PyNumber_Subtract(n, k);
3866         if (temp == NULL) {
3867             goto error;
3868         }
3869         assert(PyLong_Check(temp));
3870         if (_PyLong_IsNegative((PyLongObject *)temp)) {
3871             Py_DECREF(temp);
3872             result = PyLong_FromLong(0);
3873             goto done;
3874         }
3875         cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3876         if (cmp > 0) {
3877             Py_SETREF(k, temp);
3878         }
3879         else {
3880             Py_DECREF(temp);
3881             if (cmp < 0) {
3882                 goto error;
3883             }
3884         }
3885 
3886         ki = PyLong_AsLongLongAndOverflow(k, &overflow);
3887         assert(overflow >= 0 && !PyErr_Occurred());
3888         if (overflow) {
3889             PyErr_Format(PyExc_OverflowError,
3890                          "min(n - k, k) must not exceed %lld",
3891                          LLONG_MAX);
3892             goto error;
3893         }
3894         assert(ki >= 0);
3895     }
3896 
3897     result = perm_comb(n, (unsigned long long)ki, 1);
3898 
3899 done:
3900     Py_DECREF(n);
3901     Py_DECREF(k);
3902     return result;
3903 
3904 error:
3905     Py_DECREF(n);
3906     Py_DECREF(k);
3907     return NULL;
3908 }
3909 
3910 
3911 /*[clinic input]
3912 math.nextafter
3913 
3914     x: double
3915     y: double
3916     /
3917     *
3918     steps: object = None
3919 
3920 Return the floating-point value the given number of steps after x towards y.
3921 
3922 If steps is not specified or is None, it defaults to 1.
3923 
3924 Raises a TypeError, if x or y is not a double, or if steps is not an integer.
3925 Raises ValueError if steps is negative.
3926 [clinic start generated code]*/
3927 
3928 static PyObject *
math_nextafter_impl(PyObject * module,double x,double y,PyObject * steps)3929 math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
3930 /*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
3931 {
3932 #if defined(_AIX)
3933     if (x == y) {
3934         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3935            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3936         return PyFloat_FromDouble(y);
3937     }
3938     if (Py_IS_NAN(x)) {
3939         return PyFloat_FromDouble(x);
3940     }
3941     if (Py_IS_NAN(y)) {
3942         return PyFloat_FromDouble(y);
3943     }
3944 #endif
3945     if (steps == Py_None) {
3946         // fast path: we default to one step.
3947         return PyFloat_FromDouble(nextafter(x, y));
3948     }
3949     steps = PyNumber_Index(steps);
3950     if (steps == NULL) {
3951         return NULL;
3952     }
3953     assert(PyLong_CheckExact(steps));
3954     if (_PyLong_IsNegative((PyLongObject *)steps)) {
3955         PyErr_SetString(PyExc_ValueError,
3956                         "steps must be a non-negative integer");
3957         Py_DECREF(steps);
3958         return NULL;
3959     }
3960 
3961     unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
3962     // Conveniently, uint64_t and double have the same number of bits
3963     // on all the platforms we care about.
3964     // So if an overflow occurs, we can just use UINT64_MAX.
3965     Py_DECREF(steps);
3966     if (usteps_ull >= UINT64_MAX) {
3967         // This branch includes the case where an error occurred, since
3968         // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
3969         // usteps_ull can be strictly larger than UINT64_MAX on a machine
3970         // where unsigned long long has width > 64 bits.
3971         if (PyErr_Occurred()) {
3972             if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
3973                 PyErr_Clear();
3974             }
3975             else {
3976                 return NULL;
3977             }
3978         }
3979         usteps_ull = UINT64_MAX;
3980     }
3981     assert(usteps_ull <= UINT64_MAX);
3982     uint64_t usteps = (uint64_t)usteps_ull;
3983 
3984     if (usteps == 0) {
3985         return PyFloat_FromDouble(x);
3986     }
3987     if (Py_IS_NAN(x)) {
3988         return PyFloat_FromDouble(x);
3989     }
3990     if (Py_IS_NAN(y)) {
3991         return PyFloat_FromDouble(y);
3992     }
3993 
3994     // We assume that double and uint64_t have the same endianness.
3995     // This is not guaranteed by the C-standard, but it is true for
3996     // all platforms we care about. (The most likely form of violation
3997     // would be a "mixed-endian" double.)
3998     union pun {double f; uint64_t i;};
3999     union pun ux = {x}, uy = {y};
4000     if (ux.i == uy.i) {
4001         return PyFloat_FromDouble(x);
4002     }
4003 
4004     const uint64_t sign_bit = 1ULL<<63;
4005 
4006     uint64_t ax = ux.i & ~sign_bit;
4007     uint64_t ay = uy.i & ~sign_bit;
4008 
4009     // opposite signs
4010     if (((ux.i ^ uy.i) & sign_bit)) {
4011         // NOTE: ax + ay can never overflow, because their most significant bit
4012         // ain't set.
4013         if (ax + ay <= usteps) {
4014             return PyFloat_FromDouble(uy.f);
4015         // This comparison has to use <, because <= would get +0.0 vs -0.0
4016         // wrong.
4017         } else if (ax < usteps) {
4018             union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
4019             return PyFloat_FromDouble(result.f);
4020         } else {
4021             ux.i -= usteps;
4022             return PyFloat_FromDouble(ux.f);
4023         }
4024     // same sign
4025     } else if (ax > ay) {
4026         if (ax - ay >= usteps) {
4027             ux.i -= usteps;
4028             return PyFloat_FromDouble(ux.f);
4029         } else {
4030             return PyFloat_FromDouble(uy.f);
4031         }
4032     } else {
4033         if (ay - ax >= usteps) {
4034             ux.i += usteps;
4035             return PyFloat_FromDouble(ux.f);
4036         } else {
4037             return PyFloat_FromDouble(uy.f);
4038         }
4039     }
4040 }
4041 
4042 
4043 /*[clinic input]
4044 math.ulp -> double
4045 
4046     x: double
4047     /
4048 
4049 Return the value of the least significant bit of the float x.
4050 [clinic start generated code]*/
4051 
4052 static double
math_ulp_impl(PyObject * module,double x)4053 math_ulp_impl(PyObject *module, double x)
4054 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
4055 {
4056     if (Py_IS_NAN(x)) {
4057         return x;
4058     }
4059     x = fabs(x);
4060     if (Py_IS_INFINITY(x)) {
4061         return x;
4062     }
4063     double inf = Py_INFINITY;
4064     double x2 = nextafter(x, inf);
4065     if (Py_IS_INFINITY(x2)) {
4066         /* special case: x is the largest positive representable float */
4067         x2 = nextafter(x, -inf);
4068         return x - x2;
4069     }
4070     return x2 - x;
4071 }
4072 
4073 static int
math_exec(PyObject * module)4074 math_exec(PyObject *module)
4075 {
4076 
4077     math_module_state *state = get_math_module_state(module);
4078     state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
4079     if (state->str___ceil__ == NULL) {
4080         return -1;
4081     }
4082     state->str___floor__ = PyUnicode_InternFromString("__floor__");
4083     if (state->str___floor__ == NULL) {
4084         return -1;
4085     }
4086     state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
4087     if (state->str___trunc__ == NULL) {
4088         return -1;
4089     }
4090     if (PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
4091         return -1;
4092     }
4093     if (PyModule_Add(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
4094         return -1;
4095     }
4096     // 2pi
4097     if (PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
4098         return -1;
4099     }
4100     if (PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
4101         return -1;
4102     }
4103     if (PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
4104         return -1;
4105     }
4106     return 0;
4107 }
4108 
4109 static int
math_clear(PyObject * module)4110 math_clear(PyObject *module)
4111 {
4112     math_module_state *state = get_math_module_state(module);
4113     Py_CLEAR(state->str___ceil__);
4114     Py_CLEAR(state->str___floor__);
4115     Py_CLEAR(state->str___trunc__);
4116     return 0;
4117 }
4118 
4119 static void
math_free(void * module)4120 math_free(void *module)
4121 {
4122     math_clear((PyObject *)module);
4123 }
4124 
4125 static PyMethodDef math_methods[] = {
4126     {"acos",            math_acos,      METH_O,         math_acos_doc},
4127     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
4128     {"asin",            math_asin,      METH_O,         math_asin_doc},
4129     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
4130     {"atan",            math_atan,      METH_O,         math_atan_doc},
4131     {"atan2",           _PyCFunction_CAST(math_atan2),     METH_FASTCALL,  math_atan2_doc},
4132     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
4133     {"cbrt",            math_cbrt,      METH_O,         math_cbrt_doc},
4134     MATH_CEIL_METHODDEF
4135     {"copysign",        _PyCFunction_CAST(math_copysign),  METH_FASTCALL,  math_copysign_doc},
4136     {"cos",             math_cos,       METH_O,         math_cos_doc},
4137     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
4138     MATH_DEGREES_METHODDEF
4139     MATH_DIST_METHODDEF
4140     {"erf",             math_erf,       METH_O,         math_erf_doc},
4141     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
4142     {"exp",             math_exp,       METH_O,         math_exp_doc},
4143     {"exp2",            math_exp2,      METH_O,         math_exp2_doc},
4144     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
4145     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
4146     MATH_FACTORIAL_METHODDEF
4147     MATH_FLOOR_METHODDEF
4148     MATH_FMA_METHODDEF
4149     MATH_FMOD_METHODDEF
4150     MATH_FREXP_METHODDEF
4151     MATH_FSUM_METHODDEF
4152     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
4153     {"gcd",             _PyCFunction_CAST(math_gcd),       METH_FASTCALL,  math_gcd_doc},
4154     {"hypot",           _PyCFunction_CAST(math_hypot),     METH_FASTCALL,  math_hypot_doc},
4155     MATH_ISCLOSE_METHODDEF
4156     MATH_ISFINITE_METHODDEF
4157     MATH_ISINF_METHODDEF
4158     MATH_ISNAN_METHODDEF
4159     MATH_ISQRT_METHODDEF
4160     {"lcm",             _PyCFunction_CAST(math_lcm),       METH_FASTCALL,  math_lcm_doc},
4161     MATH_LDEXP_METHODDEF
4162     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
4163     {"log",             _PyCFunction_CAST(math_log),       METH_FASTCALL,  math_log_doc},
4164     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
4165     MATH_LOG10_METHODDEF
4166     MATH_LOG2_METHODDEF
4167     MATH_MODF_METHODDEF
4168     MATH_POW_METHODDEF
4169     MATH_RADIANS_METHODDEF
4170     {"remainder",       _PyCFunction_CAST(math_remainder), METH_FASTCALL,  math_remainder_doc},
4171     {"sin",             math_sin,       METH_O,         math_sin_doc},
4172     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
4173     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
4174     {"tan",             math_tan,       METH_O,         math_tan_doc},
4175     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
4176     MATH_SUMPROD_METHODDEF
4177     MATH_TRUNC_METHODDEF
4178     MATH_PROD_METHODDEF
4179     MATH_PERM_METHODDEF
4180     MATH_COMB_METHODDEF
4181     MATH_NEXTAFTER_METHODDEF
4182     MATH_ULP_METHODDEF
4183     {NULL,              NULL}           /* sentinel */
4184 };
4185 
4186 static PyModuleDef_Slot math_slots[] = {
4187     {Py_mod_exec, math_exec},
4188     {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
4189     {Py_mod_gil, Py_MOD_GIL_NOT_USED},
4190     {0, NULL}
4191 };
4192 
4193 PyDoc_STRVAR(module_doc,
4194 "This module provides access to the mathematical functions\n"
4195 "defined by the C standard.");
4196 
4197 static struct PyModuleDef mathmodule = {
4198     PyModuleDef_HEAD_INIT,
4199     .m_name = "math",
4200     .m_doc = module_doc,
4201     .m_size = sizeof(math_module_state),
4202     .m_methods = math_methods,
4203     .m_slots = math_slots,
4204     .m_clear = math_clear,
4205     .m_free = math_free,
4206 };
4207 
4208 PyMODINIT_FUNC
PyInit_math(void)4209 PyInit_math(void)
4210 {
4211     return PyModuleDef_Init(&mathmodule);
4212 }
4213