• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Math module -- standard C math library functions, pi and e */
2 
3 /* Here are some comments from Tim Peters, extracted from the
4    discussion attached to http://bugs.python.org/issue1640.  They
5    describe the general aims of the math module with respect to
6    special values, IEEE-754 floating-point exceptions, and Python
7    exceptions.
8 
9 These are the "spirit of 754" rules:
10 
11 1. If the mathematical result is a real number, but of magnitude too
12 large to approximate by a machine float, overflow is signaled and the
13 result is an infinity (with the appropriate sign).
14 
15 2. If the mathematical result is a real number, but of magnitude too
16 small to approximate by a machine float, underflow is signaled and the
17 result is a zero (with the appropriate sign).
18 
19 3. At a singularity (a value x such that the limit of f(y) as y
20 approaches x exists and is an infinity), "divide by zero" is signaled
21 and the result is an infinity (with the appropriate sign).  This is
22 complicated a little by that the left-side and right-side limits may
23 not be the same; e.g., 1/x approaches +inf or -inf as x approaches 0
24 from the positive or negative directions.  In that specific case, the
25 sign of the zero determines the result of 1/0.
26 
27 4. At a point where a function has no defined result in the extended
28 reals (i.e., the reals plus an infinity or two), invalid operation is
29 signaled and a NaN is returned.
30 
31 And these are what Python has historically /tried/ to do (but not
32 always successfully, as platform libm behavior varies a lot):
33 
34 For #1, raise OverflowError.
35 
36 For #2, return a zero (with the appropriate sign if that happens by
37 accident ;-)).
38 
39 For #3 and #4, raise ValueError.  It may have made sense to raise
40 Python's ZeroDivisionError in #3, but historically that's only been
41 raised for division by zero and mod by zero.
42 
43 */
44 
45 /*
46    In general, on an IEEE-754 platform the aim is to follow the C99
47    standard, including Annex 'F', whenever possible.  Where the
48    standard recommends raising the 'divide-by-zero' or 'invalid'
49    floating-point exceptions, Python should raise a ValueError.  Where
50    the standard recommends raising 'overflow', Python should raise an
51    OverflowError.  In all other circumstances a value should be
52    returned.
53  */
54 
55 #include "Python.h"
56 #include "pycore_bitutils.h"      // _Py_bit_length()
57 #include "pycore_dtoa.h"
58 #include "pycore_long.h"          // _PyLong_GetZero()
59 #include "_math.h"
60 
61 #include "clinic/mathmodule.c.h"
62 
63 /*[clinic input]
64 module math
65 [clinic start generated code]*/
66 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
67 
68 
69 /*
70    sin(pi*x), giving accurate results for all finite x (especially x
71    integral or close to an integer).  This is here for use in the
72    reflection formula for the gamma function.  It conforms to IEEE
73    754-2008 for finite arguments, but not for infinities or nans.
74 */
75 
76 static const double pi = 3.141592653589793238462643383279502884197;
77 static const double logpi = 1.144729885849400174143427351353058711647;
78 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
79 static const double sqrtpi = 1.772453850905516027298167483341145182798;
80 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
81 
82 
83 /* Version of PyFloat_AsDouble() with in-line fast paths
84    for exact floats and integers.  Gives a substantial
85    speed improvement for extracting float arguments.
86 */
87 
88 #define ASSIGN_DOUBLE(target_var, obj, error_label)        \
89     if (PyFloat_CheckExact(obj)) {                         \
90         target_var = PyFloat_AS_DOUBLE(obj);               \
91     }                                                      \
92     else if (PyLong_CheckExact(obj)) {                     \
93         target_var = PyLong_AsDouble(obj);                 \
94         if (target_var == -1.0 && PyErr_Occurred()) {      \
95             goto error_label;                              \
96         }                                                  \
97     }                                                      \
98     else {                                                 \
99         target_var = PyFloat_AsDouble(obj);                \
100         if (target_var == -1.0 && PyErr_Occurred()) {      \
101             goto error_label;                              \
102         }                                                  \
103     }
104 
105 static double
m_sinpi(double x)106 m_sinpi(double x)
107 {
108     double y, r;
109     int n;
110     /* this function should only ever be called for finite arguments */
111     assert(Py_IS_FINITE(x));
112     y = fmod(fabs(x), 2.0);
113     n = (int)round(2.0*y);
114     assert(0 <= n && n <= 4);
115     switch (n) {
116     case 0:
117         r = sin(pi*y);
118         break;
119     case 1:
120         r = cos(pi*(y-0.5));
121         break;
122     case 2:
123         /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
124            -0.0 instead of 0.0 when y == 1.0. */
125         r = sin(pi*(1.0-y));
126         break;
127     case 3:
128         r = -cos(pi*(y-1.5));
129         break;
130     case 4:
131         r = sin(pi*(y-2.0));
132         break;
133     default:
134         Py_UNREACHABLE();
135     }
136     return copysign(1.0, x)*r;
137 }
138 
139 /* Implementation of the real gamma function.  In extensive but non-exhaustive
140    random tests, this function proved accurate to within <= 10 ulps across the
141    entire float domain.  Note that accuracy may depend on the quality of the
142    system math functions, the pow function in particular.  Special cases
143    follow C99 annex F.  The parameters and method are tailored to platforms
144    whose double format is the IEEE 754 binary64 format.
145 
146    Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
147    and g=6.024680040776729583740234375; these parameters are amongst those
148    used by the Boost library.  Following Boost (again), we re-express the
149    Lanczos sum as a rational function, and compute it that way.  The
150    coefficients below were computed independently using MPFR, and have been
151    double-checked against the coefficients in the Boost source code.
152 
153    For x < 0.0 we use the reflection formula.
154 
155    There's one minor tweak that deserves explanation: Lanczos' formula for
156    Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5).  For many x
157    values, x+g-0.5 can be represented exactly.  However, in cases where it
158    can't be represented exactly the small error in x+g-0.5 can be magnified
159    significantly by the pow and exp calls, especially for large x.  A cheap
160    correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
161    involved in the computation of x+g-0.5 (that is, e = computed value of
162    x+g-0.5 - exact value of x+g-0.5).  Here's the proof:
163 
164    Correction factor
165    -----------------
166    Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
167    double, and e is tiny.  Then:
168 
169      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
170      = pow(y, x-0.5)/exp(y) * C,
171 
172    where the correction_factor C is given by
173 
174      C = pow(1-e/y, x-0.5) * exp(e)
175 
176    Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
177 
178      C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
179 
180    But y-(x-0.5) = g+e, and g+e ~ g.  So we get C ~ 1 + e*g/y, and
181 
182      pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
183 
184    Note that for accuracy, when computing r*C it's better to do
185 
186      r + e*g/y*r;
187 
188    than
189 
190      r * (1 + e*g/y);
191 
192    since the addition in the latter throws away most of the bits of
193    information in e*g/y.
194 */
195 
196 #define LANCZOS_N 13
197 static const double lanczos_g = 6.024680040776729583740234375;
198 static const double lanczos_g_minus_half = 5.524680040776729583740234375;
199 static const double lanczos_num_coeffs[LANCZOS_N] = {
200     23531376880.410759688572007674451636754734846804940,
201     42919803642.649098768957899047001988850926355848959,
202     35711959237.355668049440185451547166705960488635843,
203     17921034426.037209699919755754458931112671403265390,
204     6039542586.3520280050642916443072979210699388420708,
205     1439720407.3117216736632230727949123939715485786772,
206     248874557.86205415651146038641322942321632125127801,
207     31426415.585400194380614231628318205362874684987640,
208     2876370.6289353724412254090516208496135991145378768,
209     186056.26539522349504029498971604569928220784236328,
210     8071.6720023658162106380029022722506138218516325024,
211     210.82427775157934587250973392071336271166969580291,
212     2.5066282746310002701649081771338373386264310793408
213 };
214 
215 /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
216 static const double lanczos_den_coeffs[LANCZOS_N] = {
217     0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0,
218     13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0};
219 
220 /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
221 #define NGAMMA_INTEGRAL 23
222 static const double gamma_integral[NGAMMA_INTEGRAL] = {
223     1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0,
224     3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0,
225     1307674368000.0, 20922789888000.0, 355687428096000.0,
226     6402373705728000.0, 121645100408832000.0, 2432902008176640000.0,
227     51090942171709440000.0, 1124000727777607680000.0,
228 };
229 
230 /* Lanczos' sum L_g(x), for positive x */
231 
232 static double
lanczos_sum(double x)233 lanczos_sum(double x)
234 {
235     double num = 0.0, den = 0.0;
236     int i;
237     assert(x > 0.0);
238     /* evaluate the rational function lanczos_sum(x).  For large
239        x, the obvious algorithm risks overflow, so we instead
240        rescale the denominator and numerator of the rational
241        function by x**(1-LANCZOS_N) and treat this as a
242        rational function in 1/x.  This also reduces the error for
243        larger x values.  The choice of cutoff point (5.0 below) is
244        somewhat arbitrary; in tests, smaller cutoff values than
245        this resulted in lower accuracy. */
246     if (x < 5.0) {
247         for (i = LANCZOS_N; --i >= 0; ) {
248             num = num * x + lanczos_num_coeffs[i];
249             den = den * x + lanczos_den_coeffs[i];
250         }
251     }
252     else {
253         for (i = 0; i < LANCZOS_N; i++) {
254             num = num / x + lanczos_num_coeffs[i];
255             den = den / x + lanczos_den_coeffs[i];
256         }
257     }
258     return num/den;
259 }
260 
261 /* Constant for +infinity, generated in the same way as float('inf'). */
262 
263 static double
m_inf(void)264 m_inf(void)
265 {
266 #ifndef PY_NO_SHORT_FLOAT_REPR
267     return _Py_dg_infinity(0);
268 #else
269     return Py_HUGE_VAL;
270 #endif
271 }
272 
273 /* Constant nan value, generated in the same way as float('nan'). */
274 /* We don't currently assume that Py_NAN is defined everywhere. */
275 
276 #if !defined(PY_NO_SHORT_FLOAT_REPR) || defined(Py_NAN)
277 
278 static double
m_nan(void)279 m_nan(void)
280 {
281 #ifndef PY_NO_SHORT_FLOAT_REPR
282     return _Py_dg_stdnan(0);
283 #else
284     return Py_NAN;
285 #endif
286 }
287 
288 #endif
289 
290 static double
m_tgamma(double x)291 m_tgamma(double x)
292 {
293     double absx, r, y, z, sqrtpow;
294 
295     /* special cases */
296     if (!Py_IS_FINITE(x)) {
297         if (Py_IS_NAN(x) || x > 0.0)
298             return x;  /* tgamma(nan) = nan, tgamma(inf) = inf */
299         else {
300             errno = EDOM;
301             return Py_NAN;  /* tgamma(-inf) = nan, invalid */
302         }
303     }
304     if (x == 0.0) {
305         errno = EDOM;
306         /* tgamma(+-0.0) = +-inf, divide-by-zero */
307         return copysign(Py_HUGE_VAL, x);
308     }
309 
310     /* integer arguments */
311     if (x == floor(x)) {
312         if (x < 0.0) {
313             errno = EDOM;  /* tgamma(n) = nan, invalid for */
314             return Py_NAN; /* negative integers n */
315         }
316         if (x <= NGAMMA_INTEGRAL)
317             return gamma_integral[(int)x - 1];
318     }
319     absx = fabs(x);
320 
321     /* tiny arguments:  tgamma(x) ~ 1/x for x near 0 */
322     if (absx < 1e-20) {
323         r = 1.0/x;
324         if (Py_IS_INFINITY(r))
325             errno = ERANGE;
326         return r;
327     }
328 
329     /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
330        x > 200, and underflows to +-0.0 for x < -200, not a negative
331        integer. */
332     if (absx > 200.0) {
333         if (x < 0.0) {
334             return 0.0/m_sinpi(x);
335         }
336         else {
337             errno = ERANGE;
338             return Py_HUGE_VAL;
339         }
340     }
341 
342     y = absx + lanczos_g_minus_half;
343     /* compute error in sum */
344     if (absx > lanczos_g_minus_half) {
345         /* note: the correction can be foiled by an optimizing
346            compiler that (incorrectly) thinks that an expression like
347            a + b - a - b can be optimized to 0.0.  This shouldn't
348            happen in a standards-conforming compiler. */
349         double q = y - absx;
350         z = q - lanczos_g_minus_half;
351     }
352     else {
353         double q = y - lanczos_g_minus_half;
354         z = q - absx;
355     }
356     z = z * lanczos_g / y;
357     if (x < 0.0) {
358         r = -pi / m_sinpi(absx) / absx * exp(y) / lanczos_sum(absx);
359         r -= z * r;
360         if (absx < 140.0) {
361             r /= pow(y, absx - 0.5);
362         }
363         else {
364             sqrtpow = pow(y, absx / 2.0 - 0.25);
365             r /= sqrtpow;
366             r /= sqrtpow;
367         }
368     }
369     else {
370         r = lanczos_sum(absx) / exp(y);
371         r += z * r;
372         if (absx < 140.0) {
373             r *= pow(y, absx - 0.5);
374         }
375         else {
376             sqrtpow = pow(y, absx / 2.0 - 0.25);
377             r *= sqrtpow;
378             r *= sqrtpow;
379         }
380     }
381     if (Py_IS_INFINITY(r))
382         errno = ERANGE;
383     return r;
384 }
385 
386 /*
387    lgamma:  natural log of the absolute value of the Gamma function.
388    For large arguments, Lanczos' formula works extremely well here.
389 */
390 
391 static double
m_lgamma(double x)392 m_lgamma(double x)
393 {
394     double r;
395     double absx;
396 
397     /* special cases */
398     if (!Py_IS_FINITE(x)) {
399         if (Py_IS_NAN(x))
400             return x;  /* lgamma(nan) = nan */
401         else
402             return Py_HUGE_VAL; /* lgamma(+-inf) = +inf */
403     }
404 
405     /* integer arguments */
406     if (x == floor(x) && x <= 2.0) {
407         if (x <= 0.0) {
408             errno = EDOM;  /* lgamma(n) = inf, divide-by-zero for */
409             return Py_HUGE_VAL; /* integers n <= 0 */
410         }
411         else {
412             return 0.0; /* lgamma(1) = lgamma(2) = 0.0 */
413         }
414     }
415 
416     absx = fabs(x);
417     /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
418     if (absx < 1e-20)
419         return -log(absx);
420 
421     /* Lanczos' formula.  We could save a fraction of a ulp in accuracy by
422        having a second set of numerator coefficients for lanczos_sum that
423        absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
424        subtraction below; it's probably not worth it. */
425     r = log(lanczos_sum(absx)) - lanczos_g;
426     r += (absx - 0.5) * (log(absx + lanczos_g - 0.5) - 1);
427     if (x < 0.0)
428         /* Use reflection formula to get value for negative x. */
429         r = logpi - log(fabs(m_sinpi(absx))) - log(absx) - r;
430     if (Py_IS_INFINITY(r))
431         errno = ERANGE;
432     return r;
433 }
434 
435 #if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
436 
437 /*
438    Implementations of the error function erf(x) and the complementary error
439    function erfc(x).
440 
441    Method: we use a series approximation for erf for small x, and a continued
442    fraction approximation for erfc(x) for larger x;
443    combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x),
444    this gives us erf(x) and erfc(x) for all x.
445 
446    The series expansion used is:
447 
448       erf(x) = x*exp(-x*x)/sqrt(pi) * [
449                      2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...]
450 
451    The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k).
452    This series converges well for smallish x, but slowly for larger x.
453 
454    The continued fraction expansion used is:
455 
456       erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - )
457                               3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...]
458 
459    after the first term, the general term has the form:
460 
461       k*(k-0.5)/(2*k+0.5 + x**2 - ...).
462 
463    This expansion converges fast for larger x, but convergence becomes
464    infinitely slow as x approaches 0.0.  The (somewhat naive) continued
465    fraction evaluation algorithm used below also risks overflow for large x;
466    but for large x, erfc(x) == 0.0 to within machine precision.  (For
467    example, erfc(30.0) is approximately 2.56e-393).
468 
469    Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and
470    continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) <
471    ERFC_CONTFRAC_CUTOFF.  ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the
472    numbers of terms to use for the relevant expansions.  */
473 
474 #define ERF_SERIES_CUTOFF 1.5
475 #define ERF_SERIES_TERMS 25
476 #define ERFC_CONTFRAC_CUTOFF 30.0
477 #define ERFC_CONTFRAC_TERMS 50
478 
479 /*
480    Error function, via power series.
481 
482    Given a finite float x, return an approximation to erf(x).
483    Converges reasonably fast for small x.
484 */
485 
486 static double
m_erf_series(double x)487 m_erf_series(double x)
488 {
489     double x2, acc, fk, result;
490     int i, saved_errno;
491 
492     x2 = x * x;
493     acc = 0.0;
494     fk = (double)ERF_SERIES_TERMS + 0.5;
495     for (i = 0; i < ERF_SERIES_TERMS; i++) {
496         acc = 2.0 + x2 * acc / fk;
497         fk -= 1.0;
498     }
499     /* Make sure the exp call doesn't affect errno;
500        see m_erfc_contfrac for more. */
501     saved_errno = errno;
502     result = acc * x * exp(-x2) / sqrtpi;
503     errno = saved_errno;
504     return result;
505 }
506 
507 /*
508    Complementary error function, via continued fraction expansion.
509 
510    Given a positive float x, return an approximation to erfc(x).  Converges
511    reasonably fast for x large (say, x > 2.0), and should be safe from
512    overflow if x and nterms are not too large.  On an IEEE 754 machine, with x
513    <= 30.0, we're safe up to nterms = 100.  For x >= 30.0, erfc(x) is smaller
514    than the smallest representable nonzero float.  */
515 
516 static double
m_erfc_contfrac(double x)517 m_erfc_contfrac(double x)
518 {
519     double x2, a, da, p, p_last, q, q_last, b, result;
520     int i, saved_errno;
521 
522     if (x >= ERFC_CONTFRAC_CUTOFF)
523         return 0.0;
524 
525     x2 = x*x;
526     a = 0.0;
527     da = 0.5;
528     p = 1.0; p_last = 0.0;
529     q = da + x2; q_last = 1.0;
530     for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) {
531         double temp;
532         a += da;
533         da += 2.0;
534         b = da + x2;
535         temp = p; p = b*p - a*p_last; p_last = temp;
536         temp = q; q = b*q - a*q_last; q_last = temp;
537     }
538     /* Issue #8986: On some platforms, exp sets errno on underflow to zero;
539        save the current errno value so that we can restore it later. */
540     saved_errno = errno;
541     result = p / q * x * exp(-x2) / sqrtpi;
542     errno = saved_errno;
543     return result;
544 }
545 
546 #endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
547 
548 /* Error function erf(x), for general x */
549 
550 static double
m_erf(double x)551 m_erf(double x)
552 {
553 #ifdef HAVE_ERF
554     return erf(x);
555 #else
556     double absx, cf;
557 
558     if (Py_IS_NAN(x))
559         return x;
560     absx = fabs(x);
561     if (absx < ERF_SERIES_CUTOFF)
562         return m_erf_series(x);
563     else {
564         cf = m_erfc_contfrac(absx);
565         return x > 0.0 ? 1.0 - cf : cf - 1.0;
566     }
567 #endif
568 }
569 
570 /* Complementary error function erfc(x), for general x. */
571 
572 static double
m_erfc(double x)573 m_erfc(double x)
574 {
575 #ifdef HAVE_ERFC
576     return erfc(x);
577 #else
578     double absx, cf;
579 
580     if (Py_IS_NAN(x))
581         return x;
582     absx = fabs(x);
583     if (absx < ERF_SERIES_CUTOFF)
584         return 1.0 - m_erf_series(x);
585     else {
586         cf = m_erfc_contfrac(absx);
587         return x > 0.0 ? cf : 2.0 - cf;
588     }
589 #endif
590 }
591 
592 /*
593    wrapper for atan2 that deals directly with special cases before
594    delegating to the platform libm for the remaining cases.  This
595    is necessary to get consistent behaviour across platforms.
596    Windows, FreeBSD and alpha Tru64 are amongst platforms that don't
597    always follow C99.
598 */
599 
600 static double
m_atan2(double y,double x)601 m_atan2(double y, double x)
602 {
603     if (Py_IS_NAN(x) || Py_IS_NAN(y))
604         return Py_NAN;
605     if (Py_IS_INFINITY(y)) {
606         if (Py_IS_INFINITY(x)) {
607             if (copysign(1., x) == 1.)
608                 /* atan2(+-inf, +inf) == +-pi/4 */
609                 return copysign(0.25*Py_MATH_PI, y);
610             else
611                 /* atan2(+-inf, -inf) == +-pi*3/4 */
612                 return copysign(0.75*Py_MATH_PI, y);
613         }
614         /* atan2(+-inf, x) == +-pi/2 for finite x */
615         return copysign(0.5*Py_MATH_PI, y);
616     }
617     if (Py_IS_INFINITY(x) || y == 0.) {
618         if (copysign(1., x) == 1.)
619             /* atan2(+-y, +inf) = atan2(+-0, +x) = +-0. */
620             return copysign(0., y);
621         else
622             /* atan2(+-y, -inf) = atan2(+-0., -x) = +-pi. */
623             return copysign(Py_MATH_PI, y);
624     }
625     return atan2(y, x);
626 }
627 
628 
629 /* IEEE 754-style remainder operation: x - n*y where n*y is the nearest
630    multiple of y to x, taking n even in the case of a tie. Assuming an IEEE 754
631    binary floating-point format, the result is always exact. */
632 
633 static double
m_remainder(double x,double y)634 m_remainder(double x, double y)
635 {
636     /* Deal with most common case first. */
637     if (Py_IS_FINITE(x) && Py_IS_FINITE(y)) {
638         double absx, absy, c, m, r;
639 
640         if (y == 0.0) {
641             return Py_NAN;
642         }
643 
644         absx = fabs(x);
645         absy = fabs(y);
646         m = fmod(absx, absy);
647 
648         /*
649            Warning: some subtlety here. What we *want* to know at this point is
650            whether the remainder m is less than, equal to, or greater than half
651            of absy. However, we can't do that comparison directly because we
652            can't be sure that 0.5*absy is representable (the multiplication
653            might incur precision loss due to underflow). So instead we compare
654            m with the complement c = absy - m: m < 0.5*absy if and only if m <
655            c, and so on. The catch is that absy - m might also not be
656            representable, but it turns out that it doesn't matter:
657 
658            - if m > 0.5*absy then absy - m is exactly representable, by
659              Sterbenz's lemma, so m > c
660            - if m == 0.5*absy then again absy - m is exactly representable
661              and m == c
662            - if m < 0.5*absy then either (i) 0.5*absy is exactly representable,
663              in which case 0.5*absy < absy - m, so 0.5*absy <= c and hence m <
664              c, or (ii) absy is tiny, either subnormal or in the lowest normal
665              binade. Then absy - m is exactly representable and again m < c.
666         */
667 
668         c = absy - m;
669         if (m < c) {
670             r = m;
671         }
672         else if (m > c) {
673             r = -c;
674         }
675         else {
676             /*
677                Here absx is exactly halfway between two multiples of absy,
678                and we need to choose the even multiple. x now has the form
679 
680                    absx = n * absy + m
681 
682                for some integer n (recalling that m = 0.5*absy at this point).
683                If n is even we want to return m; if n is odd, we need to
684                return -m.
685 
686                So
687 
688                    0.5 * (absx - m) = (n/2) * absy
689 
690                and now reducing modulo absy gives us:
691 
692                                                   | m, if n is odd
693                    fmod(0.5 * (absx - m), absy) = |
694                                                   | 0, if n is even
695 
696                Now m - 2.0 * fmod(...) gives the desired result: m
697                if n is even, -m if m is odd.
698 
699                Note that all steps in fmod(0.5 * (absx - m), absy)
700                will be computed exactly, with no rounding error
701                introduced.
702             */
703             assert(m == c);
704             r = m - 2.0 * fmod(0.5 * (absx - m), absy);
705         }
706         return copysign(1.0, x) * r;
707     }
708 
709     /* Special values. */
710     if (Py_IS_NAN(x)) {
711         return x;
712     }
713     if (Py_IS_NAN(y)) {
714         return y;
715     }
716     if (Py_IS_INFINITY(x)) {
717         return Py_NAN;
718     }
719     assert(Py_IS_INFINITY(y));
720     return x;
721 }
722 
723 
724 /*
725     Various platforms (Solaris, OpenBSD) do nonstandard things for log(0),
726     log(-ve), log(NaN).  Here are wrappers for log and log10 that deal with
727     special values directly, passing positive non-special values through to
728     the system log/log10.
729  */
730 
731 static double
m_log(double x)732 m_log(double x)
733 {
734     if (Py_IS_FINITE(x)) {
735         if (x > 0.0)
736             return log(x);
737         errno = EDOM;
738         if (x == 0.0)
739             return -Py_HUGE_VAL; /* log(0) = -inf */
740         else
741             return Py_NAN; /* log(-ve) = nan */
742     }
743     else if (Py_IS_NAN(x))
744         return x; /* log(nan) = nan */
745     else if (x > 0.0)
746         return x; /* log(inf) = inf */
747     else {
748         errno = EDOM;
749         return Py_NAN; /* log(-inf) = nan */
750     }
751 }
752 
753 /*
754    log2: log to base 2.
755 
756    Uses an algorithm that should:
757 
758      (a) produce exact results for powers of 2, and
759      (b) give a monotonic log2 (for positive finite floats),
760          assuming that the system log is monotonic.
761 */
762 
763 static double
m_log2(double x)764 m_log2(double x)
765 {
766     if (!Py_IS_FINITE(x)) {
767         if (Py_IS_NAN(x))
768             return x; /* log2(nan) = nan */
769         else if (x > 0.0)
770             return x; /* log2(+inf) = +inf */
771         else {
772             errno = EDOM;
773             return Py_NAN; /* log2(-inf) = nan, invalid-operation */
774         }
775     }
776 
777     if (x > 0.0) {
778 #ifdef HAVE_LOG2
779         return log2(x);
780 #else
781         double m;
782         int e;
783         m = frexp(x, &e);
784         /* We want log2(m * 2**e) == log(m) / log(2) + e.  Care is needed when
785          * x is just greater than 1.0: in that case e is 1, log(m) is negative,
786          * and we get significant cancellation error from the addition of
787          * log(m) / log(2) to e.  The slight rewrite of the expression below
788          * avoids this problem.
789          */
790         if (x >= 1.0) {
791             return log(2.0 * m) / log(2.0) + (e - 1);
792         }
793         else {
794             return log(m) / log(2.0) + e;
795         }
796 #endif
797     }
798     else if (x == 0.0) {
799         errno = EDOM;
800         return -Py_HUGE_VAL; /* log2(0) = -inf, divide-by-zero */
801     }
802     else {
803         errno = EDOM;
804         return Py_NAN; /* log2(-inf) = nan, invalid-operation */
805     }
806 }
807 
808 static double
m_log10(double x)809 m_log10(double x)
810 {
811     if (Py_IS_FINITE(x)) {
812         if (x > 0.0)
813             return log10(x);
814         errno = EDOM;
815         if (x == 0.0)
816             return -Py_HUGE_VAL; /* log10(0) = -inf */
817         else
818             return Py_NAN; /* log10(-ve) = nan */
819     }
820     else if (Py_IS_NAN(x))
821         return x; /* log10(nan) = nan */
822     else if (x > 0.0)
823         return x; /* log10(inf) = inf */
824     else {
825         errno = EDOM;
826         return Py_NAN; /* log10(-inf) = nan */
827     }
828 }
829 
830 
831 static PyObject *
math_gcd(PyObject * module,PyObject * const * args,Py_ssize_t nargs)832 math_gcd(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
833 {
834     PyObject *res, *x;
835     Py_ssize_t i;
836 
837     if (nargs == 0) {
838         return PyLong_FromLong(0);
839     }
840     res = PyNumber_Index(args[0]);
841     if (res == NULL) {
842         return NULL;
843     }
844     if (nargs == 1) {
845         Py_SETREF(res, PyNumber_Absolute(res));
846         return res;
847     }
848 
849     PyObject *one = _PyLong_GetOne();  // borrowed ref
850     for (i = 1; i < nargs; i++) {
851         x = _PyNumber_Index(args[i]);
852         if (x == NULL) {
853             Py_DECREF(res);
854             return NULL;
855         }
856         if (res == one) {
857             /* Fast path: just check arguments.
858                It is okay to use identity comparison here. */
859             Py_DECREF(x);
860             continue;
861         }
862         Py_SETREF(res, _PyLong_GCD(res, x));
863         Py_DECREF(x);
864         if (res == NULL) {
865             return NULL;
866         }
867     }
868     return res;
869 }
870 
871 PyDoc_STRVAR(math_gcd_doc,
872 "gcd($module, *integers)\n"
873 "--\n"
874 "\n"
875 "Greatest Common Divisor.");
876 
877 
878 static PyObject *
long_lcm(PyObject * a,PyObject * b)879 long_lcm(PyObject *a, PyObject *b)
880 {
881     PyObject *g, *m, *f, *ab;
882 
883     if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
884         return PyLong_FromLong(0);
885     }
886     g = _PyLong_GCD(a, b);
887     if (g == NULL) {
888         return NULL;
889     }
890     f = PyNumber_FloorDivide(a, g);
891     Py_DECREF(g);
892     if (f == NULL) {
893         return NULL;
894     }
895     m = PyNumber_Multiply(f, b);
896     Py_DECREF(f);
897     if (m == NULL) {
898         return NULL;
899     }
900     ab = PyNumber_Absolute(m);
901     Py_DECREF(m);
902     return ab;
903 }
904 
905 
906 static PyObject *
math_lcm(PyObject * module,PyObject * const * args,Py_ssize_t nargs)907 math_lcm(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
908 {
909     PyObject *res, *x;
910     Py_ssize_t i;
911 
912     if (nargs == 0) {
913         return PyLong_FromLong(1);
914     }
915     res = PyNumber_Index(args[0]);
916     if (res == NULL) {
917         return NULL;
918     }
919     if (nargs == 1) {
920         Py_SETREF(res, PyNumber_Absolute(res));
921         return res;
922     }
923 
924     PyObject *zero = _PyLong_GetZero();  // borrowed ref
925     for (i = 1; i < nargs; i++) {
926         x = PyNumber_Index(args[i]);
927         if (x == NULL) {
928             Py_DECREF(res);
929             return NULL;
930         }
931         if (res == zero) {
932             /* Fast path: just check arguments.
933                It is okay to use identity comparison here. */
934             Py_DECREF(x);
935             continue;
936         }
937         Py_SETREF(res, long_lcm(res, x));
938         Py_DECREF(x);
939         if (res == NULL) {
940             return NULL;
941         }
942     }
943     return res;
944 }
945 
946 
947 PyDoc_STRVAR(math_lcm_doc,
948 "lcm($module, *integers)\n"
949 "--\n"
950 "\n"
951 "Least Common Multiple.");
952 
953 
954 /* Call is_error when errno != 0, and where x is the result libm
955  * returned.  is_error will usually set up an exception and return
956  * true (1), but may return false (0) without setting up an exception.
957  */
958 static int
is_error(double x)959 is_error(double x)
960 {
961     int result = 1;     /* presumption of guilt */
962     assert(errno);      /* non-zero errno is a precondition for calling */
963     if (errno == EDOM)
964         PyErr_SetString(PyExc_ValueError, "math domain error");
965 
966     else if (errno == ERANGE) {
967         /* ANSI C generally requires libm functions to set ERANGE
968          * on overflow, but also generally *allows* them to set
969          * ERANGE on underflow too.  There's no consistency about
970          * the latter across platforms.
971          * Alas, C99 never requires that errno be set.
972          * Here we suppress the underflow errors (libm functions
973          * should return a zero on underflow, and +- HUGE_VAL on
974          * overflow, so testing the result for zero suffices to
975          * distinguish the cases).
976          *
977          * On some platforms (Ubuntu/ia64) it seems that errno can be
978          * set to ERANGE for subnormal results that do *not* underflow
979          * to zero.  So to be safe, we'll ignore ERANGE whenever the
980          * function result is less than 1.5 in absolute value.
981          *
982          * bpo-46018: Changed to 1.5 to ensure underflows in expm1()
983          * are correctly detected, since the function may underflow
984          * toward -1.0 rather than 0.0.
985          */
986         if (fabs(x) < 1.5)
987             result = 0;
988         else
989             PyErr_SetString(PyExc_OverflowError,
990                             "math range error");
991     }
992     else
993         /* Unexpected math error */
994         PyErr_SetFromErrno(PyExc_ValueError);
995     return result;
996 }
997 
998 /*
999    math_1 is used to wrap a libm function f that takes a double
1000    argument and returns a double.
1001 
1002    The error reporting follows these rules, which are designed to do
1003    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1004    platforms.
1005 
1006    - a NaN result from non-NaN inputs causes ValueError to be raised
1007    - an infinite result from finite inputs causes OverflowError to be
1008      raised if can_overflow is 1, or raises ValueError if can_overflow
1009      is 0.
1010    - if the result is finite and errno == EDOM then ValueError is
1011      raised
1012    - if the result is finite and nonzero and errno == ERANGE then
1013      OverflowError is raised
1014 
1015    The last rule is used to catch overflow on platforms which follow
1016    C89 but for which HUGE_VAL is not an infinity.
1017 
1018    For the majority of one-argument functions these rules are enough
1019    to ensure that Python's functions behave as specified in 'Annex F'
1020    of the C99 standard, with the 'invalid' and 'divide-by-zero'
1021    floating-point exceptions mapping to Python's ValueError and the
1022    'overflow' floating-point exception mapping to OverflowError.
1023    math_1 only works for functions that don't have singularities *and*
1024    the possibility of overflow; fortunately, that covers everything we
1025    care about right now.
1026 */
1027 
1028 static PyObject *
math_1_to_whatever(PyObject * arg,double (* func)(double),PyObject * (* from_double_func)(double),int can_overflow)1029 math_1_to_whatever(PyObject *arg, double (*func) (double),
1030                    PyObject *(*from_double_func) (double),
1031                    int can_overflow)
1032 {
1033     double x, r;
1034     x = PyFloat_AsDouble(arg);
1035     if (x == -1.0 && PyErr_Occurred())
1036         return NULL;
1037     errno = 0;
1038     r = (*func)(x);
1039     if (Py_IS_NAN(r) && !Py_IS_NAN(x)) {
1040         PyErr_SetString(PyExc_ValueError,
1041                         "math domain error"); /* invalid arg */
1042         return NULL;
1043     }
1044     if (Py_IS_INFINITY(r) && Py_IS_FINITE(x)) {
1045         if (can_overflow)
1046             PyErr_SetString(PyExc_OverflowError,
1047                             "math range error"); /* overflow */
1048         else
1049             PyErr_SetString(PyExc_ValueError,
1050                             "math domain error"); /* singularity */
1051         return NULL;
1052     }
1053     if (Py_IS_FINITE(r) && errno && is_error(r))
1054         /* this branch unnecessary on most platforms */
1055         return NULL;
1056 
1057     return (*from_double_func)(r);
1058 }
1059 
1060 /* variant of math_1, to be used when the function being wrapped is known to
1061    set errno properly (that is, errno = EDOM for invalid or divide-by-zero,
1062    errno = ERANGE for overflow). */
1063 
1064 static PyObject *
math_1a(PyObject * arg,double (* func)(double))1065 math_1a(PyObject *arg, double (*func) (double))
1066 {
1067     double x, r;
1068     x = PyFloat_AsDouble(arg);
1069     if (x == -1.0 && PyErr_Occurred())
1070         return NULL;
1071     errno = 0;
1072     r = (*func)(x);
1073     if (errno && is_error(r))
1074         return NULL;
1075     return PyFloat_FromDouble(r);
1076 }
1077 
1078 /*
1079    math_2 is used to wrap a libm function f that takes two double
1080    arguments and returns a double.
1081 
1082    The error reporting follows these rules, which are designed to do
1083    the right thing on C89/C99 platforms and IEEE 754/non IEEE 754
1084    platforms.
1085 
1086    - a NaN result from non-NaN inputs causes ValueError to be raised
1087    - an infinite result from finite inputs causes OverflowError to be
1088      raised.
1089    - if the result is finite and errno == EDOM then ValueError is
1090      raised
1091    - if the result is finite and nonzero and errno == ERANGE then
1092      OverflowError is raised
1093 
1094    The last rule is used to catch overflow on platforms which follow
1095    C89 but for which HUGE_VAL is not an infinity.
1096 
1097    For most two-argument functions (copysign, fmod, hypot, atan2)
1098    these rules are enough to ensure that Python's functions behave as
1099    specified in 'Annex F' of the C99 standard, with the 'invalid' and
1100    'divide-by-zero' floating-point exceptions mapping to Python's
1101    ValueError and the 'overflow' floating-point exception mapping to
1102    OverflowError.
1103 */
1104 
1105 static PyObject *
math_1(PyObject * arg,double (* func)(double),int can_overflow)1106 math_1(PyObject *arg, double (*func) (double), int can_overflow)
1107 {
1108     return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow);
1109 }
1110 
1111 static PyObject *
math_2(PyObject * const * args,Py_ssize_t nargs,double (* func)(double,double),const char * funcname)1112 math_2(PyObject *const *args, Py_ssize_t nargs,
1113        double (*func) (double, double), const char *funcname)
1114 {
1115     double x, y, r;
1116     if (!_PyArg_CheckPositional(funcname, nargs, 2, 2))
1117         return NULL;
1118     x = PyFloat_AsDouble(args[0]);
1119     if (x == -1.0 && PyErr_Occurred()) {
1120         return NULL;
1121     }
1122     y = PyFloat_AsDouble(args[1]);
1123     if (y == -1.0 && PyErr_Occurred()) {
1124         return NULL;
1125     }
1126     errno = 0;
1127     r = (*func)(x, y);
1128     if (Py_IS_NAN(r)) {
1129         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
1130             errno = EDOM;
1131         else
1132             errno = 0;
1133     }
1134     else if (Py_IS_INFINITY(r)) {
1135         if (Py_IS_FINITE(x) && Py_IS_FINITE(y))
1136             errno = ERANGE;
1137         else
1138             errno = 0;
1139     }
1140     if (errno && is_error(r))
1141         return NULL;
1142     else
1143         return PyFloat_FromDouble(r);
1144 }
1145 
1146 #define FUNC1(funcname, func, can_overflow, docstring)                  \
1147     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1148         return math_1(args, func, can_overflow);                            \
1149     }\
1150     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1151 
1152 #define FUNC1A(funcname, func, docstring)                               \
1153     static PyObject * math_##funcname(PyObject *self, PyObject *args) { \
1154         return math_1a(args, func);                                     \
1155     }\
1156     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1157 
1158 #define FUNC2(funcname, func, docstring) \
1159     static PyObject * math_##funcname(PyObject *self, PyObject *const *args, Py_ssize_t nargs) { \
1160         return math_2(args, nargs, func, #funcname); \
1161     }\
1162     PyDoc_STRVAR(math_##funcname##_doc, docstring);
1163 
1164 FUNC1(acos, acos, 0,
1165       "acos($module, x, /)\n--\n\n"
1166       "Return the arc cosine (measured in radians) of x.\n\n"
1167       "The result is between 0 and pi.")
1168 FUNC1(acosh, m_acosh, 0,
1169       "acosh($module, x, /)\n--\n\n"
1170       "Return the inverse hyperbolic cosine of x.")
1171 FUNC1(asin, asin, 0,
1172       "asin($module, x, /)\n--\n\n"
1173       "Return the arc sine (measured in radians) of x.\n\n"
1174       "The result is between -pi/2 and pi/2.")
1175 FUNC1(asinh, m_asinh, 0,
1176       "asinh($module, x, /)\n--\n\n"
1177       "Return the inverse hyperbolic sine of x.")
1178 FUNC1(atan, atan, 0,
1179       "atan($module, x, /)\n--\n\n"
1180       "Return the arc tangent (measured in radians) of x.\n\n"
1181       "The result is between -pi/2 and pi/2.")
1182 FUNC2(atan2, m_atan2,
1183       "atan2($module, y, x, /)\n--\n\n"
1184       "Return the arc tangent (measured in radians) of y/x.\n\n"
1185       "Unlike atan(y/x), the signs of both x and y are considered.")
1186 FUNC1(atanh, m_atanh, 0,
1187       "atanh($module, x, /)\n--\n\n"
1188       "Return the inverse hyperbolic tangent of x.")
1189 
1190 /*[clinic input]
1191 math.ceil
1192 
1193     x as number: object
1194     /
1195 
1196 Return the ceiling of x as an Integral.
1197 
1198 This is the smallest integer >= x.
1199 [clinic start generated code]*/
1200 
1201 static PyObject *
math_ceil(PyObject * module,PyObject * number)1202 math_ceil(PyObject *module, PyObject *number)
1203 /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
1204 {
1205     _Py_IDENTIFIER(__ceil__);
1206 
1207     if (!PyFloat_CheckExact(number)) {
1208         PyObject *method = _PyObject_LookupSpecial(number, &PyId___ceil__);
1209         if (method != NULL) {
1210             PyObject *result = _PyObject_CallNoArg(method);
1211             Py_DECREF(method);
1212             return result;
1213         }
1214         if (PyErr_Occurred())
1215             return NULL;
1216     }
1217     double x = PyFloat_AsDouble(number);
1218     if (x == -1.0 && PyErr_Occurred())
1219         return NULL;
1220 
1221     return PyLong_FromDouble(ceil(x));
1222 }
1223 
1224 FUNC2(copysign, copysign,
1225       "copysign($module, x, y, /)\n--\n\n"
1226        "Return a float with the magnitude (absolute value) of x but the sign of y.\n\n"
1227       "On platforms that support signed zeros, copysign(1.0, -0.0)\n"
1228       "returns -1.0.\n")
1229 FUNC1(cos, cos, 0,
1230       "cos($module, x, /)\n--\n\n"
1231       "Return the cosine of x (measured in radians).")
1232 FUNC1(cosh, cosh, 1,
1233       "cosh($module, x, /)\n--\n\n"
1234       "Return the hyperbolic cosine of x.")
1235 FUNC1A(erf, m_erf,
1236        "erf($module, x, /)\n--\n\n"
1237        "Error function at x.")
1238 FUNC1A(erfc, m_erfc,
1239        "erfc($module, x, /)\n--\n\n"
1240        "Complementary error function at x.")
1241 FUNC1(exp, exp, 1,
1242       "exp($module, x, /)\n--\n\n"
1243       "Return e raised to the power of x.")
1244 FUNC1(expm1, m_expm1, 1,
1245       "expm1($module, x, /)\n--\n\n"
1246       "Return exp(x)-1.\n\n"
1247       "This function avoids the loss of precision involved in the direct "
1248       "evaluation of exp(x)-1 for small x.")
1249 FUNC1(fabs, fabs, 0,
1250       "fabs($module, x, /)\n--\n\n"
1251       "Return the absolute value of the float x.")
1252 
1253 /*[clinic input]
1254 math.floor
1255 
1256     x as number: object
1257     /
1258 
1259 Return the floor of x as an Integral.
1260 
1261 This is the largest integer <= x.
1262 [clinic start generated code]*/
1263 
1264 static PyObject *
math_floor(PyObject * module,PyObject * number)1265 math_floor(PyObject *module, PyObject *number)
1266 /*[clinic end generated code: output=c6a65c4884884b8a input=63af6b5d7ebcc3d6]*/
1267 {
1268     double x;
1269 
1270     _Py_IDENTIFIER(__floor__);
1271 
1272     if (PyFloat_CheckExact(number)) {
1273         x = PyFloat_AS_DOUBLE(number);
1274     }
1275     else
1276     {
1277         PyObject *method = _PyObject_LookupSpecial(number, &PyId___floor__);
1278         if (method != NULL) {
1279             PyObject *result = _PyObject_CallNoArg(method);
1280             Py_DECREF(method);
1281             return result;
1282         }
1283         if (PyErr_Occurred())
1284             return NULL;
1285         x = PyFloat_AsDouble(number);
1286         if (x == -1.0 && PyErr_Occurred())
1287             return NULL;
1288     }
1289     return PyLong_FromDouble(floor(x));
1290 }
1291 
1292 FUNC1A(gamma, m_tgamma,
1293       "gamma($module, x, /)\n--\n\n"
1294       "Gamma function at x.")
1295 FUNC1A(lgamma, m_lgamma,
1296       "lgamma($module, x, /)\n--\n\n"
1297       "Natural logarithm of absolute value of Gamma function at x.")
1298 FUNC1(log1p, m_log1p, 0,
1299       "log1p($module, x, /)\n--\n\n"
1300       "Return the natural logarithm of 1+x (base e).\n\n"
1301       "The result is computed in a way which is accurate for x near zero.")
1302 FUNC2(remainder, m_remainder,
1303       "remainder($module, x, y, /)\n--\n\n"
1304       "Difference between x and the closest integer multiple of y.\n\n"
1305       "Return x - n*y where n*y is the closest integer multiple of y.\n"
1306       "In the case where x is exactly halfway between two multiples of\n"
1307       "y, the nearest even value of n is used. The result is always exact.")
1308 FUNC1(sin, sin, 0,
1309       "sin($module, x, /)\n--\n\n"
1310       "Return the sine of x (measured in radians).")
1311 FUNC1(sinh, sinh, 1,
1312       "sinh($module, x, /)\n--\n\n"
1313       "Return the hyperbolic sine of x.")
1314 FUNC1(sqrt, sqrt, 0,
1315       "sqrt($module, x, /)\n--\n\n"
1316       "Return the square root of x.")
1317 FUNC1(tan, tan, 0,
1318       "tan($module, x, /)\n--\n\n"
1319       "Return the tangent of x (measured in radians).")
1320 FUNC1(tanh, tanh, 0,
1321       "tanh($module, x, /)\n--\n\n"
1322       "Return the hyperbolic tangent of x.")
1323 
1324 /* Precision summation function as msum() by Raymond Hettinger in
1325    <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
1326    enhanced with the exact partials sum and roundoff from Mark
1327    Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
1328    See those links for more details, proofs and other references.
1329 
1330    Note 1: IEEE 754R floating point semantics are assumed,
1331    but the current implementation does not re-establish special
1332    value semantics across iterations (i.e. handling -Inf + Inf).
1333 
1334    Note 2:  No provision is made for intermediate overflow handling;
1335    therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while
1336    sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
1337    overflow of the first partial sum.
1338 
1339    Note 3: The intermediate values lo, yr, and hi are declared volatile so
1340    aggressive compilers won't algebraically reduce lo to always be exactly 0.0.
1341    Also, the volatile declaration forces the values to be stored in memory as
1342    regular doubles instead of extended long precision (80-bit) values.  This
1343    prevents double rounding because any addition or subtraction of two doubles
1344    can be resolved exactly into double-sized hi and lo values.  As long as the
1345    hi value gets forced into a double before yr and lo are computed, the extra
1346    bits in downstream extended precision operations (x87 for example) will be
1347    exactly zero and therefore can be losslessly stored back into a double,
1348    thereby preventing double rounding.
1349 
1350    Note 4: A similar implementation is in Modules/cmathmodule.c.
1351    Be sure to update both when making changes.
1352 
1353    Note 5: The signature of math.fsum() differs from builtins.sum()
1354    because the start argument doesn't make sense in the context of
1355    accurate summation.  Since the partials table is collapsed before
1356    returning a result, sum(seq2, start=sum(seq1)) may not equal the
1357    accurate result returned by sum(itertools.chain(seq1, seq2)).
1358 */
1359 
1360 #define NUM_PARTIALS  32  /* initial partials array size, on stack */
1361 
1362 /* Extend the partials array p[] by doubling its size. */
1363 static int                          /* non-zero on error */
_fsum_realloc(double ** p_ptr,Py_ssize_t n,double * ps,Py_ssize_t * m_ptr)1364 _fsum_realloc(double **p_ptr, Py_ssize_t  n,
1365              double  *ps,    Py_ssize_t *m_ptr)
1366 {
1367     void *v = NULL;
1368     Py_ssize_t m = *m_ptr;
1369 
1370     m += m;  /* double */
1371     if (n < m && (size_t)m < ((size_t)PY_SSIZE_T_MAX / sizeof(double))) {
1372         double *p = *p_ptr;
1373         if (p == ps) {
1374             v = PyMem_Malloc(sizeof(double) * m);
1375             if (v != NULL)
1376                 memcpy(v, ps, sizeof(double) * n);
1377         }
1378         else
1379             v = PyMem_Realloc(p, sizeof(double) * m);
1380     }
1381     if (v == NULL) {        /* size overflow or no memory */
1382         PyErr_SetString(PyExc_MemoryError, "math.fsum partials");
1383         return 1;
1384     }
1385     *p_ptr = (double*) v;
1386     *m_ptr = m;
1387     return 0;
1388 }
1389 
1390 /* Full precision summation of a sequence of floats.
1391 
1392    def msum(iterable):
1393        partials = []  # sorted, non-overlapping partial sums
1394        for x in iterable:
1395            i = 0
1396            for y in partials:
1397                if abs(x) < abs(y):
1398                    x, y = y, x
1399                hi = x + y
1400                lo = y - (hi - x)
1401                if lo:
1402                    partials[i] = lo
1403                    i += 1
1404                x = hi
1405            partials[i:] = [x]
1406        return sum_exact(partials)
1407 
1408    Rounded x+y stored in hi with the roundoff stored in lo.  Together hi+lo
1409    are exactly equal to x+y.  The inner loop applies hi/lo summation to each
1410    partial so that the list of partial sums remains exact.
1411 
1412    Sum_exact() adds the partial sums exactly and correctly rounds the final
1413    result (using the round-half-to-even rule).  The items in partials remain
1414    non-zero, non-special, non-overlapping and strictly increasing in
1415    magnitude, but possibly not all having the same sign.
1416 
1417    Depends on IEEE 754 arithmetic guarantees and half-even rounding.
1418 */
1419 
1420 /*[clinic input]
1421 math.fsum
1422 
1423     seq: object
1424     /
1425 
1426 Return an accurate floating point sum of values in the iterable seq.
1427 
1428 Assumes IEEE-754 floating point arithmetic.
1429 [clinic start generated code]*/
1430 
1431 static PyObject *
math_fsum(PyObject * module,PyObject * seq)1432 math_fsum(PyObject *module, PyObject *seq)
1433 /*[clinic end generated code: output=ba5c672b87fe34fc input=c51b7d8caf6f6e82]*/
1434 {
1435     PyObject *item, *iter, *sum = NULL;
1436     Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
1437     double x, y, t, ps[NUM_PARTIALS], *p = ps;
1438     double xsave, special_sum = 0.0, inf_sum = 0.0;
1439     volatile double hi, yr, lo;
1440 
1441     iter = PyObject_GetIter(seq);
1442     if (iter == NULL)
1443         return NULL;
1444 
1445     for(;;) {           /* for x in iterable */
1446         assert(0 <= n && n <= m);
1447         assert((m == NUM_PARTIALS && p == ps) ||
1448                (m >  NUM_PARTIALS && p != NULL));
1449 
1450         item = PyIter_Next(iter);
1451         if (item == NULL) {
1452             if (PyErr_Occurred())
1453                 goto _fsum_error;
1454             break;
1455         }
1456         ASSIGN_DOUBLE(x, item, error_with_item);
1457         Py_DECREF(item);
1458 
1459         xsave = x;
1460         for (i = j = 0; j < n; j++) {       /* for y in partials */
1461             y = p[j];
1462             if (fabs(x) < fabs(y)) {
1463                 t = x; x = y; y = t;
1464             }
1465             hi = x + y;
1466             yr = hi - x;
1467             lo = y - yr;
1468             if (lo != 0.0)
1469                 p[i++] = lo;
1470             x = hi;
1471         }
1472 
1473         n = i;                              /* ps[i:] = [x] */
1474         if (x != 0.0) {
1475             if (! Py_IS_FINITE(x)) {
1476                 /* a nonfinite x could arise either as
1477                    a result of intermediate overflow, or
1478                    as a result of a nan or inf in the
1479                    summands */
1480                 if (Py_IS_FINITE(xsave)) {
1481                     PyErr_SetString(PyExc_OverflowError,
1482                           "intermediate overflow in fsum");
1483                     goto _fsum_error;
1484                 }
1485                 if (Py_IS_INFINITY(xsave))
1486                     inf_sum += xsave;
1487                 special_sum += xsave;
1488                 /* reset partials */
1489                 n = 0;
1490             }
1491             else if (n >= m && _fsum_realloc(&p, n, ps, &m))
1492                 goto _fsum_error;
1493             else
1494                 p[n++] = x;
1495         }
1496     }
1497 
1498     if (special_sum != 0.0) {
1499         if (Py_IS_NAN(inf_sum))
1500             PyErr_SetString(PyExc_ValueError,
1501                             "-inf + inf in fsum");
1502         else
1503             sum = PyFloat_FromDouble(special_sum);
1504         goto _fsum_error;
1505     }
1506 
1507     hi = 0.0;
1508     if (n > 0) {
1509         hi = p[--n];
1510         /* sum_exact(ps, hi) from the top, stop when the sum becomes
1511            inexact. */
1512         while (n > 0) {
1513             x = hi;
1514             y = p[--n];
1515             assert(fabs(y) < fabs(x));
1516             hi = x + y;
1517             yr = hi - x;
1518             lo = y - yr;
1519             if (lo != 0.0)
1520                 break;
1521         }
1522         /* Make half-even rounding work across multiple partials.
1523            Needed so that sum([1e-16, 1, 1e16]) will round-up the last
1524            digit to two instead of down to zero (the 1e-16 makes the 1
1525            slightly closer to two).  With a potential 1 ULP rounding
1526            error fixed-up, math.fsum() can guarantee commutativity. */
1527         if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) ||
1528                       (lo > 0.0 && p[n-1] > 0.0))) {
1529             y = lo * 2.0;
1530             x = hi + y;
1531             yr = x - hi;
1532             if (y == yr)
1533                 hi = x;
1534         }
1535     }
1536     sum = PyFloat_FromDouble(hi);
1537 
1538   _fsum_error:
1539     Py_DECREF(iter);
1540     if (p != ps)
1541         PyMem_Free(p);
1542     return sum;
1543 
1544   error_with_item:
1545     Py_DECREF(item);
1546     goto _fsum_error;
1547 }
1548 
1549 #undef NUM_PARTIALS
1550 
1551 
1552 static unsigned long
count_set_bits(unsigned long n)1553 count_set_bits(unsigned long n)
1554 {
1555     unsigned long count = 0;
1556     while (n != 0) {
1557         ++count;
1558         n &= n - 1; /* clear least significant bit */
1559     }
1560     return count;
1561 }
1562 
1563 /* Integer square root
1564 
1565 Given a nonnegative integer `n`, we want to compute the largest integer
1566 `a` for which `a * a <= n`, or equivalently the integer part of the exact
1567 square root of `n`.
1568 
1569 We use an adaptive-precision pure-integer version of Newton's iteration. Given
1570 a positive integer `n`, the algorithm produces at each iteration an integer
1571 approximation `a` to the square root of `n >> s` for some even integer `s`,
1572 with `s` decreasing as the iterations progress. On the final iteration, `s` is
1573 zero and we have an approximation to the square root of `n` itself.
1574 
1575 At every step, the approximation `a` is strictly within 1.0 of the true square
1576 root, so we have
1577 
1578     (a - 1)**2 < (n >> s) < (a + 1)**2
1579 
1580 After the final iteration, a check-and-correct step is needed to determine
1581 whether `a` or `a - 1` gives the desired integer square root of `n`.
1582 
1583 The algorithm is remarkable in its simplicity. There's no need for a
1584 per-iteration check-and-correct step, and termination is straightforward: the
1585 number of iterations is known in advance (it's exactly `floor(log2(log2(n)))`
1586 for `n > 1`). The only tricky part of the correctness proof is in establishing
1587 that the bound `(a - 1)**2 < (n >> s) < (a + 1)**2` is maintained from one
1588 iteration to the next. A sketch of the proof of this is given below.
1589 
1590 In addition to the proof sketch, a formal, computer-verified proof
1591 of correctness (using Lean) of an equivalent recursive algorithm can be found
1592 here:
1593 
1594     https://github.com/mdickinson/snippets/blob/master/proofs/isqrt/src/isqrt.lean
1595 
1596 
1597 Here's Python code equivalent to the C implementation below:
1598 
1599     def isqrt(n):
1600         """
1601         Return the integer part of the square root of the input.
1602         """
1603         n = operator.index(n)
1604 
1605         if n < 0:
1606             raise ValueError("isqrt() argument must be nonnegative")
1607         if n == 0:
1608             return 0
1609 
1610         c = (n.bit_length() - 1) // 2
1611         a = 1
1612         d = 0
1613         for s in reversed(range(c.bit_length())):
1614             # Loop invariant: (a-1)**2 < (n >> 2*(c - d)) < (a+1)**2
1615             e = d
1616             d = c >> s
1617             a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1618 
1619         return a - (a*a > n)
1620 
1621 
1622 Sketch of proof of correctness
1623 ------------------------------
1624 
1625 The delicate part of the correctness proof is showing that the loop invariant
1626 is preserved from one iteration to the next. That is, just before the line
1627 
1628     a = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1629 
1630 is executed in the above code, we know that
1631 
1632     (1)  (a - 1)**2 < (n >> 2*(c - e)) < (a + 1)**2.
1633 
1634 (since `e` is always the value of `d` from the previous iteration). We must
1635 prove that after that line is executed, we have
1636 
1637     (a - 1)**2 < (n >> 2*(c - d)) < (a + 1)**2
1638 
1639 To facilitate the proof, we make some changes of notation. Write `m` for
1640 `n >> 2*(c-d)`, and write `b` for the new value of `a`, so
1641 
1642     b = (a << d - e - 1) + (n >> 2*c - e - d + 1) // a
1643 
1644 or equivalently:
1645 
1646     (2)  b = (a << d - e - 1) + (m >> d - e + 1) // a
1647 
1648 Then we can rewrite (1) as:
1649 
1650     (3)  (a - 1)**2 < (m >> 2*(d - e)) < (a + 1)**2
1651 
1652 and we must show that (b - 1)**2 < m < (b + 1)**2.
1653 
1654 From this point on, we switch to mathematical notation, so `/` means exact
1655 division rather than integer division and `^` is used for exponentiation. We
1656 use the `√` symbol for the exact square root. In (3), we can remove the
1657 implicit floor operation to give:
1658 
1659     (4)  (a - 1)^2 < m / 4^(d - e) < (a + 1)^2
1660 
1661 Taking square roots throughout (4), scaling by `2^(d-e)`, and rearranging gives
1662 
1663     (5)  0 <= | 2^(d-e)a - √m | < 2^(d-e)
1664 
1665 Squaring and dividing through by `2^(d-e+1) a` gives
1666 
1667     (6)  0 <= 2^(d-e-1) a + m / (2^(d-e+1) a) - √m < 2^(d-e-1) / a
1668 
1669 We'll show below that `2^(d-e-1) <= a`. Given that, we can replace the
1670 right-hand side of (6) with `1`, and now replacing the central
1671 term `m / (2^(d-e+1) a)` with its floor in (6) gives
1672 
1673     (7) -1 < 2^(d-e-1) a + m // 2^(d-e+1) a - √m < 1
1674 
1675 Or equivalently, from (2):
1676 
1677     (7) -1 < b - √m < 1
1678 
1679 and rearranging gives that `(b-1)^2 < m < (b+1)^2`, which is what we needed
1680 to prove.
1681 
1682 We're not quite done: we still have to prove the inequality `2^(d - e - 1) <=
1683 a` that was used to get line (7) above. From the definition of `c`, we have
1684 `4^c <= n`, which implies
1685 
1686     (8)  4^d <= m
1687 
1688 also, since `e == d >> 1`, `d` is at most `2e + 1`, from which it follows
1689 that `2d - 2e - 1 <= d` and hence that
1690 
1691     (9)  4^(2d - 2e - 1) <= m
1692 
1693 Dividing both sides by `4^(d - e)` gives
1694 
1695     (10)  4^(d - e - 1) <= m / 4^(d - e)
1696 
1697 But we know from (4) that `m / 4^(d-e) < (a + 1)^2`, hence
1698 
1699     (11)  4^(d - e - 1) < (a + 1)^2
1700 
1701 Now taking square roots of both sides and observing that both `2^(d-e-1)` and
1702 `a` are integers gives `2^(d - e - 1) <= a`, which is what we needed. This
1703 completes the proof sketch.
1704 
1705 */
1706 
1707 
1708 /* Approximate square root of a large 64-bit integer.
1709 
1710    Given `n` satisfying `2**62 <= n < 2**64`, return `a`
1711    satisfying `(a - 1)**2 < n < (a + 1)**2`. */
1712 
1713 static uint64_t
_approximate_isqrt(uint64_t n)1714 _approximate_isqrt(uint64_t n)
1715 {
1716     uint32_t u = 1U + (n >> 62);
1717     u = (u << 1) + (n >> 59) / u;
1718     u = (u << 3) + (n >> 53) / u;
1719     u = (u << 7) + (n >> 41) / u;
1720     return (u << 15) + (n >> 17) / u;
1721 }
1722 
1723 /*[clinic input]
1724 math.isqrt
1725 
1726     n: object
1727     /
1728 
1729 Return the integer part of the square root of the input.
1730 [clinic start generated code]*/
1731 
1732 static PyObject *
math_isqrt(PyObject * module,PyObject * n)1733 math_isqrt(PyObject *module, PyObject *n)
1734 /*[clinic end generated code: output=35a6f7f980beab26 input=5b6e7ae4fa6c43d6]*/
1735 {
1736     int a_too_large, c_bit_length;
1737     size_t c, d;
1738     uint64_t m, u;
1739     PyObject *a = NULL, *b;
1740 
1741     n = _PyNumber_Index(n);
1742     if (n == NULL) {
1743         return NULL;
1744     }
1745 
1746     if (_PyLong_Sign(n) < 0) {
1747         PyErr_SetString(
1748             PyExc_ValueError,
1749             "isqrt() argument must be nonnegative");
1750         goto error;
1751     }
1752     if (_PyLong_Sign(n) == 0) {
1753         Py_DECREF(n);
1754         return PyLong_FromLong(0);
1755     }
1756 
1757     /* c = (n.bit_length() - 1) // 2 */
1758     c = _PyLong_NumBits(n);
1759     if (c == (size_t)(-1)) {
1760         goto error;
1761     }
1762     c = (c - 1U) / 2U;
1763 
1764     /* Fast path: if c <= 31 then n < 2**64 and we can compute directly with a
1765        fast, almost branch-free algorithm. In the final correction, we use `u*u
1766        - 1 >= m` instead of the simpler `u*u > m` in order to get the correct
1767        result in the corner case where `u=2**32`. */
1768     if (c <= 31U) {
1769         m = (uint64_t)PyLong_AsUnsignedLongLong(n);
1770         Py_DECREF(n);
1771         if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1772             return NULL;
1773         }
1774         u = _approximate_isqrt(m << (62U - 2U*c)) >> (31U - c);
1775         u -= u * u - 1U >= m;
1776         return PyLong_FromUnsignedLongLong((unsigned long long)u);
1777     }
1778 
1779     /* Slow path: n >= 2**64. We perform the first five iterations in C integer
1780        arithmetic, then switch to using Python long integers. */
1781 
1782     /* From n >= 2**64 it follows that c.bit_length() >= 6. */
1783     c_bit_length = 6;
1784     while ((c >> c_bit_length) > 0U) {
1785         ++c_bit_length;
1786     }
1787 
1788     /* Initialise d and a. */
1789     d = c >> (c_bit_length - 5);
1790     b = _PyLong_Rshift(n, 2U*c - 62U);
1791     if (b == NULL) {
1792         goto error;
1793     }
1794     m = (uint64_t)PyLong_AsUnsignedLongLong(b);
1795     Py_DECREF(b);
1796     if (m == (uint64_t)(-1) && PyErr_Occurred()) {
1797         goto error;
1798     }
1799     u = _approximate_isqrt(m) >> (31U - d);
1800     a = PyLong_FromUnsignedLongLong((unsigned long long)u);
1801     if (a == NULL) {
1802         goto error;
1803     }
1804 
1805     for (int s = c_bit_length - 6; s >= 0; --s) {
1806         PyObject *q;
1807         size_t e = d;
1808 
1809         d = c >> s;
1810 
1811         /* q = (n >> 2*c - e - d + 1) // a */
1812         q = _PyLong_Rshift(n, 2U*c - d - e + 1U);
1813         if (q == NULL) {
1814             goto error;
1815         }
1816         Py_SETREF(q, PyNumber_FloorDivide(q, a));
1817         if (q == NULL) {
1818             goto error;
1819         }
1820 
1821         /* a = (a << d - 1 - e) + q */
1822         Py_SETREF(a, _PyLong_Lshift(a, d - 1U - e));
1823         if (a == NULL) {
1824             Py_DECREF(q);
1825             goto error;
1826         }
1827         Py_SETREF(a, PyNumber_Add(a, q));
1828         Py_DECREF(q);
1829         if (a == NULL) {
1830             goto error;
1831         }
1832     }
1833 
1834     /* The correct result is either a or a - 1. Figure out which, and
1835        decrement a if necessary. */
1836 
1837     /* a_too_large = n < a * a */
1838     b = PyNumber_Multiply(a, a);
1839     if (b == NULL) {
1840         goto error;
1841     }
1842     a_too_large = PyObject_RichCompareBool(n, b, Py_LT);
1843     Py_DECREF(b);
1844     if (a_too_large == -1) {
1845         goto error;
1846     }
1847 
1848     if (a_too_large) {
1849         Py_SETREF(a, PyNumber_Subtract(a, _PyLong_GetOne()));
1850     }
1851     Py_DECREF(n);
1852     return a;
1853 
1854   error:
1855     Py_XDECREF(a);
1856     Py_DECREF(n);
1857     return NULL;
1858 }
1859 
1860 /* Divide-and-conquer factorial algorithm
1861  *
1862  * Based on the formula and pseudo-code provided at:
1863  * http://www.luschny.de/math/factorial/binarysplitfact.html
1864  *
1865  * Faster algorithms exist, but they're more complicated and depend on
1866  * a fast prime factorization algorithm.
1867  *
1868  * Notes on the algorithm
1869  * ----------------------
1870  *
1871  * factorial(n) is written in the form 2**k * m, with m odd.  k and m are
1872  * computed separately, and then combined using a left shift.
1873  *
1874  * The function factorial_odd_part computes the odd part m (i.e., the greatest
1875  * odd divisor) of factorial(n), using the formula:
1876  *
1877  *   factorial_odd_part(n) =
1878  *
1879  *        product_{i >= 0} product_{0 < j <= n / 2**i, j odd} j
1880  *
1881  * Example: factorial_odd_part(20) =
1882  *
1883  *        (1) *
1884  *        (1) *
1885  *        (1 * 3 * 5) *
1886  *        (1 * 3 * 5 * 7 * 9) *
1887  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1888  *
1889  * Here i goes from large to small: the first term corresponds to i=4 (any
1890  * larger i gives an empty product), and the last term corresponds to i=0.
1891  * Each term can be computed from the last by multiplying by the extra odd
1892  * numbers required: e.g., to get from the penultimate term to the last one,
1893  * we multiply by (11 * 13 * 15 * 17 * 19).
1894  *
1895  * To see a hint of why this formula works, here are the same numbers as above
1896  * but with the even parts (i.e., the appropriate powers of 2) included.  For
1897  * each subterm in the product for i, we multiply that subterm by 2**i:
1898  *
1899  *   factorial(20) =
1900  *
1901  *        (16) *
1902  *        (8) *
1903  *        (4 * 12 * 20) *
1904  *        (2 * 6 * 10 * 14 * 18) *
1905  *        (1 * 3 * 5 * 7 * 9 * 11 * 13 * 15 * 17 * 19)
1906  *
1907  * The factorial_partial_product function computes the product of all odd j in
1908  * range(start, stop) for given start and stop.  It's used to compute the
1909  * partial products like (11 * 13 * 15 * 17 * 19) in the example above.  It
1910  * operates recursively, repeatedly splitting the range into two roughly equal
1911  * pieces until the subranges are small enough to be computed using only C
1912  * integer arithmetic.
1913  *
1914  * The two-valuation k (i.e., the exponent of the largest power of 2 dividing
1915  * the factorial) is computed independently in the main math_factorial
1916  * function.  By standard results, its value is:
1917  *
1918  *    two_valuation = n//2 + n//4 + n//8 + ....
1919  *
1920  * It can be shown (e.g., by complete induction on n) that two_valuation is
1921  * equal to n - count_set_bits(n), where count_set_bits(n) gives the number of
1922  * '1'-bits in the binary expansion of n.
1923  */
1924 
1925 /* factorial_partial_product: Compute product(range(start, stop, 2)) using
1926  * divide and conquer.  Assumes start and stop are odd and stop > start.
1927  * max_bits must be >= bit_length(stop - 2). */
1928 
1929 static PyObject *
factorial_partial_product(unsigned long start,unsigned long stop,unsigned long max_bits)1930 factorial_partial_product(unsigned long start, unsigned long stop,
1931                           unsigned long max_bits)
1932 {
1933     unsigned long midpoint, num_operands;
1934     PyObject *left = NULL, *right = NULL, *result = NULL;
1935 
1936     /* If the return value will fit an unsigned long, then we can
1937      * multiply in a tight, fast loop where each multiply is O(1).
1938      * Compute an upper bound on the number of bits required to store
1939      * the answer.
1940      *
1941      * Storing some integer z requires floor(lg(z))+1 bits, which is
1942      * conveniently the value returned by bit_length(z).  The
1943      * product x*y will require at most
1944      * bit_length(x) + bit_length(y) bits to store, based
1945      * on the idea that lg product = lg x + lg y.
1946      *
1947      * We know that stop - 2 is the largest number to be multiplied.  From
1948      * there, we have: bit_length(answer) <= num_operands *
1949      * bit_length(stop - 2)
1950      */
1951 
1952     num_operands = (stop - start) / 2;
1953     /* The "num_operands <= 8 * SIZEOF_LONG" check guards against the
1954      * unlikely case of an overflow in num_operands * max_bits. */
1955     if (num_operands <= 8 * SIZEOF_LONG &&
1956         num_operands * max_bits <= 8 * SIZEOF_LONG) {
1957         unsigned long j, total;
1958         for (total = start, j = start + 2; j < stop; j += 2)
1959             total *= j;
1960         return PyLong_FromUnsignedLong(total);
1961     }
1962 
1963     /* find midpoint of range(start, stop), rounded up to next odd number. */
1964     midpoint = (start + num_operands) | 1;
1965     left = factorial_partial_product(start, midpoint,
1966                                      _Py_bit_length(midpoint - 2));
1967     if (left == NULL)
1968         goto error;
1969     right = factorial_partial_product(midpoint, stop, max_bits);
1970     if (right == NULL)
1971         goto error;
1972     result = PyNumber_Multiply(left, right);
1973 
1974   error:
1975     Py_XDECREF(left);
1976     Py_XDECREF(right);
1977     return result;
1978 }
1979 
1980 /* factorial_odd_part:  compute the odd part of factorial(n). */
1981 
1982 static PyObject *
factorial_odd_part(unsigned long n)1983 factorial_odd_part(unsigned long n)
1984 {
1985     long i;
1986     unsigned long v, lower, upper;
1987     PyObject *partial, *tmp, *inner, *outer;
1988 
1989     inner = PyLong_FromLong(1);
1990     if (inner == NULL)
1991         return NULL;
1992     outer = inner;
1993     Py_INCREF(outer);
1994 
1995     upper = 3;
1996     for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
1997         v = n >> i;
1998         if (v <= 2)
1999             continue;
2000         lower = upper;
2001         /* (v + 1) | 1 = least odd integer strictly larger than n / 2**i */
2002         upper = (v + 1) | 1;
2003         /* Here inner is the product of all odd integers j in the range (0,
2004            n/2**(i+1)].  The factorial_partial_product call below gives the
2005            product of all odd integers j in the range (n/2**(i+1), n/2**i]. */
2006         partial = factorial_partial_product(lower, upper, _Py_bit_length(upper-2));
2007         /* inner *= partial */
2008         if (partial == NULL)
2009             goto error;
2010         tmp = PyNumber_Multiply(inner, partial);
2011         Py_DECREF(partial);
2012         if (tmp == NULL)
2013             goto error;
2014         Py_DECREF(inner);
2015         inner = tmp;
2016         /* Now inner is the product of all odd integers j in the range (0,
2017            n/2**i], giving the inner product in the formula above. */
2018 
2019         /* outer *= inner; */
2020         tmp = PyNumber_Multiply(outer, inner);
2021         if (tmp == NULL)
2022             goto error;
2023         Py_DECREF(outer);
2024         outer = tmp;
2025     }
2026     Py_DECREF(inner);
2027     return outer;
2028 
2029   error:
2030     Py_DECREF(outer);
2031     Py_DECREF(inner);
2032     return NULL;
2033 }
2034 
2035 
2036 /* Lookup table for small factorial values */
2037 
2038 static const unsigned long SmallFactorials[] = {
2039     1, 1, 2, 6, 24, 120, 720, 5040, 40320,
2040     362880, 3628800, 39916800, 479001600,
2041 #if SIZEOF_LONG >= 8
2042     6227020800, 87178291200, 1307674368000,
2043     20922789888000, 355687428096000, 6402373705728000,
2044     121645100408832000, 2432902008176640000
2045 #endif
2046 };
2047 
2048 /*[clinic input]
2049 math.factorial
2050 
2051     x as arg: object
2052     /
2053 
2054 Find x!.
2055 
2056 Raise a ValueError if x is negative or non-integral.
2057 [clinic start generated code]*/
2058 
2059 static PyObject *
math_factorial(PyObject * module,PyObject * arg)2060 math_factorial(PyObject *module, PyObject *arg)
2061 /*[clinic end generated code: output=6686f26fae00e9ca input=6d1c8105c0d91fb4]*/
2062 {
2063     long x, two_valuation;
2064     int overflow;
2065     PyObject *result, *odd_part;
2066 
2067     x = PyLong_AsLongAndOverflow(arg, &overflow);
2068     if (x == -1 && PyErr_Occurred()) {
2069         return NULL;
2070     }
2071     else if (overflow == 1) {
2072         PyErr_Format(PyExc_OverflowError,
2073                      "factorial() argument should not exceed %ld",
2074                      LONG_MAX);
2075         return NULL;
2076     }
2077     else if (overflow == -1 || x < 0) {
2078         PyErr_SetString(PyExc_ValueError,
2079                         "factorial() not defined for negative values");
2080         return NULL;
2081     }
2082 
2083     /* use lookup table if x is small */
2084     if (x < (long)Py_ARRAY_LENGTH(SmallFactorials))
2085         return PyLong_FromUnsignedLong(SmallFactorials[x]);
2086 
2087     /* else express in the form odd_part * 2**two_valuation, and compute as
2088        odd_part << two_valuation. */
2089     odd_part = factorial_odd_part(x);
2090     if (odd_part == NULL)
2091         return NULL;
2092     two_valuation = x - count_set_bits(x);
2093     result = _PyLong_Lshift(odd_part, two_valuation);
2094     Py_DECREF(odd_part);
2095     return result;
2096 }
2097 
2098 
2099 /*[clinic input]
2100 math.trunc
2101 
2102     x: object
2103     /
2104 
2105 Truncates the Real x to the nearest Integral toward 0.
2106 
2107 Uses the __trunc__ magic method.
2108 [clinic start generated code]*/
2109 
2110 static PyObject *
math_trunc(PyObject * module,PyObject * x)2111 math_trunc(PyObject *module, PyObject *x)
2112 /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
2113 {
2114     _Py_IDENTIFIER(__trunc__);
2115     PyObject *trunc, *result;
2116 
2117     if (PyFloat_CheckExact(x)) {
2118         return PyFloat_Type.tp_as_number->nb_int(x);
2119     }
2120 
2121     if (Py_TYPE(x)->tp_dict == NULL) {
2122         if (PyType_Ready(Py_TYPE(x)) < 0)
2123             return NULL;
2124     }
2125 
2126     trunc = _PyObject_LookupSpecial(x, &PyId___trunc__);
2127     if (trunc == NULL) {
2128         if (!PyErr_Occurred())
2129             PyErr_Format(PyExc_TypeError,
2130                          "type %.100s doesn't define __trunc__ method",
2131                          Py_TYPE(x)->tp_name);
2132         return NULL;
2133     }
2134     result = _PyObject_CallNoArg(trunc);
2135     Py_DECREF(trunc);
2136     return result;
2137 }
2138 
2139 
2140 /*[clinic input]
2141 math.frexp
2142 
2143     x: double
2144     /
2145 
2146 Return the mantissa and exponent of x, as pair (m, e).
2147 
2148 m is a float and e is an int, such that x = m * 2.**e.
2149 If x is 0, m and e are both 0.  Else 0.5 <= abs(m) < 1.0.
2150 [clinic start generated code]*/
2151 
2152 static PyObject *
math_frexp_impl(PyObject * module,double x)2153 math_frexp_impl(PyObject *module, double x)
2154 /*[clinic end generated code: output=03e30d252a15ad4a input=96251c9e208bc6e9]*/
2155 {
2156     int i;
2157     /* deal with special cases directly, to sidestep platform
2158        differences */
2159     if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x) {
2160         i = 0;
2161     }
2162     else {
2163         x = frexp(x, &i);
2164     }
2165     return Py_BuildValue("(di)", x, i);
2166 }
2167 
2168 
2169 /*[clinic input]
2170 math.ldexp
2171 
2172     x: double
2173     i: object
2174     /
2175 
2176 Return x * (2**i).
2177 
2178 This is essentially the inverse of frexp().
2179 [clinic start generated code]*/
2180 
2181 static PyObject *
math_ldexp_impl(PyObject * module,double x,PyObject * i)2182 math_ldexp_impl(PyObject *module, double x, PyObject *i)
2183 /*[clinic end generated code: output=b6892f3c2df9cc6a input=17d5970c1a40a8c1]*/
2184 {
2185     double r;
2186     long exp;
2187     int overflow;
2188 
2189     if (PyLong_Check(i)) {
2190         /* on overflow, replace exponent with either LONG_MAX
2191            or LONG_MIN, depending on the sign. */
2192         exp = PyLong_AsLongAndOverflow(i, &overflow);
2193         if (exp == -1 && PyErr_Occurred())
2194             return NULL;
2195         if (overflow)
2196             exp = overflow < 0 ? LONG_MIN : LONG_MAX;
2197     }
2198     else {
2199         PyErr_SetString(PyExc_TypeError,
2200                         "Expected an int as second argument to ldexp.");
2201         return NULL;
2202     }
2203 
2204     if (x == 0. || !Py_IS_FINITE(x)) {
2205         /* NaNs, zeros and infinities are returned unchanged */
2206         r = x;
2207         errno = 0;
2208     } else if (exp > INT_MAX) {
2209         /* overflow */
2210         r = copysign(Py_HUGE_VAL, x);
2211         errno = ERANGE;
2212     } else if (exp < INT_MIN) {
2213         /* underflow to +-0 */
2214         r = copysign(0., x);
2215         errno = 0;
2216     } else {
2217         errno = 0;
2218         r = ldexp(x, (int)exp);
2219         if (Py_IS_INFINITY(r))
2220             errno = ERANGE;
2221     }
2222 
2223     if (errno && is_error(r))
2224         return NULL;
2225     return PyFloat_FromDouble(r);
2226 }
2227 
2228 
2229 /*[clinic input]
2230 math.modf
2231 
2232     x: double
2233     /
2234 
2235 Return the fractional and integer parts of x.
2236 
2237 Both results carry the sign of x and are floats.
2238 [clinic start generated code]*/
2239 
2240 static PyObject *
math_modf_impl(PyObject * module,double x)2241 math_modf_impl(PyObject *module, double x)
2242 /*[clinic end generated code: output=90cee0260014c3c0 input=b4cfb6786afd9035]*/
2243 {
2244     double y;
2245     /* some platforms don't do the right thing for NaNs and
2246        infinities, so we take care of special cases directly. */
2247     if (!Py_IS_FINITE(x)) {
2248         if (Py_IS_INFINITY(x))
2249             return Py_BuildValue("(dd)", copysign(0., x), x);
2250         else if (Py_IS_NAN(x))
2251             return Py_BuildValue("(dd)", x, x);
2252     }
2253 
2254     errno = 0;
2255     x = modf(x, &y);
2256     return Py_BuildValue("(dd)", x, y);
2257 }
2258 
2259 
2260 /* A decent logarithm is easy to compute even for huge ints, but libm can't
2261    do that by itself -- loghelper can.  func is log or log10, and name is
2262    "log" or "log10".  Note that overflow of the result isn't possible: an int
2263    can contain no more than INT_MAX * SHIFT bits, so has value certainly less
2264    than 2**(2**64 * 2**16) == 2**2**80, and log2 of that is 2**80, which is
2265    small enough to fit in an IEEE single.  log and log10 are even smaller.
2266    However, intermediate overflow is possible for an int if the number of bits
2267    in that int is larger than PY_SSIZE_T_MAX. */
2268 
2269 static PyObject*
loghelper(PyObject * arg,double (* func)(double),const char * funcname)2270 loghelper(PyObject* arg, double (*func)(double), const char *funcname)
2271 {
2272     /* If it is int, do it ourselves. */
2273     if (PyLong_Check(arg)) {
2274         double x, result;
2275         Py_ssize_t e;
2276 
2277         /* Negative or zero inputs give a ValueError. */
2278         if (Py_SIZE(arg) <= 0) {
2279             PyErr_SetString(PyExc_ValueError,
2280                             "math domain error");
2281             return NULL;
2282         }
2283 
2284         x = PyLong_AsDouble(arg);
2285         if (x == -1.0 && PyErr_Occurred()) {
2286             if (!PyErr_ExceptionMatches(PyExc_OverflowError))
2287                 return NULL;
2288             /* Here the conversion to double overflowed, but it's possible
2289                to compute the log anyway.  Clear the exception and continue. */
2290             PyErr_Clear();
2291             x = _PyLong_Frexp((PyLongObject *)arg, &e);
2292             if (x == -1.0 && PyErr_Occurred())
2293                 return NULL;
2294             /* Value is ~= x * 2**e, so the log ~= log(x) + log(2) * e. */
2295             result = func(x) + func(2.0) * e;
2296         }
2297         else
2298             /* Successfully converted x to a double. */
2299             result = func(x);
2300         return PyFloat_FromDouble(result);
2301     }
2302 
2303     /* Else let libm handle it by itself. */
2304     return math_1(arg, func, 0);
2305 }
2306 
2307 
2308 /*[clinic input]
2309 math.log
2310 
2311     x:    object
2312     [
2313     base: object(c_default="NULL") = math.e
2314     ]
2315     /
2316 
2317 Return the logarithm of x to the given base.
2318 
2319 If the base not specified, returns the natural logarithm (base e) of x.
2320 [clinic start generated code]*/
2321 
2322 static PyObject *
math_log_impl(PyObject * module,PyObject * x,int group_right_1,PyObject * base)2323 math_log_impl(PyObject *module, PyObject *x, int group_right_1,
2324               PyObject *base)
2325 /*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
2326 {
2327     PyObject *num, *den;
2328     PyObject *ans;
2329 
2330     num = loghelper(x, m_log, "log");
2331     if (num == NULL || base == NULL)
2332         return num;
2333 
2334     den = loghelper(base, m_log, "log");
2335     if (den == NULL) {
2336         Py_DECREF(num);
2337         return NULL;
2338     }
2339 
2340     ans = PyNumber_TrueDivide(num, den);
2341     Py_DECREF(num);
2342     Py_DECREF(den);
2343     return ans;
2344 }
2345 
2346 
2347 /*[clinic input]
2348 math.log2
2349 
2350     x: object
2351     /
2352 
2353 Return the base 2 logarithm of x.
2354 [clinic start generated code]*/
2355 
2356 static PyObject *
math_log2(PyObject * module,PyObject * x)2357 math_log2(PyObject *module, PyObject *x)
2358 /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
2359 {
2360     return loghelper(x, m_log2, "log2");
2361 }
2362 
2363 
2364 /*[clinic input]
2365 math.log10
2366 
2367     x: object
2368     /
2369 
2370 Return the base 10 logarithm of x.
2371 [clinic start generated code]*/
2372 
2373 static PyObject *
math_log10(PyObject * module,PyObject * x)2374 math_log10(PyObject *module, PyObject *x)
2375 /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
2376 {
2377     return loghelper(x, m_log10, "log10");
2378 }
2379 
2380 
2381 /*[clinic input]
2382 math.fmod
2383 
2384     x: double
2385     y: double
2386     /
2387 
2388 Return fmod(x, y), according to platform C.
2389 
2390 x % y may differ.
2391 [clinic start generated code]*/
2392 
2393 static PyObject *
math_fmod_impl(PyObject * module,double x,double y)2394 math_fmod_impl(PyObject *module, double x, double y)
2395 /*[clinic end generated code: output=7559d794343a27b5 input=4f84caa8cfc26a03]*/
2396 {
2397     double r;
2398     /* fmod(x, +/-Inf) returns x for finite x. */
2399     if (Py_IS_INFINITY(y) && Py_IS_FINITE(x))
2400         return PyFloat_FromDouble(x);
2401     errno = 0;
2402     r = fmod(x, y);
2403     if (Py_IS_NAN(r)) {
2404         if (!Py_IS_NAN(x) && !Py_IS_NAN(y))
2405             errno = EDOM;
2406         else
2407             errno = 0;
2408     }
2409     if (errno && is_error(r))
2410         return NULL;
2411     else
2412         return PyFloat_FromDouble(r);
2413 }
2414 
2415 /*
2416 Given a *vec* of values, compute the vector norm:
2417 
2418     sqrt(sum(x ** 2 for x in vec))
2419 
2420 The *max* variable should be equal to the largest fabs(x).
2421 The *n* variable is the length of *vec*.
2422 If n==0, then *max* should be 0.0.
2423 If an infinity is present in the vec, *max* should be INF.
2424 The *found_nan* variable indicates whether some member of
2425 the *vec* is a NaN.
2426 
2427 To avoid overflow/underflow and to achieve high accuracy giving results
2428 that are almost always correctly rounded, four techniques are used:
2429 
2430 * lossless scaling using a power-of-two scaling factor
2431 * accurate squaring using Veltkamp-Dekker splitting [1]
2432 * compensated summation using a variant of the Neumaier algorithm [2]
2433 * differential correction of the square root [3]
2434 
2435 The usual presentation of the Neumaier summation algorithm has an
2436 expensive branch depending on which operand has the larger
2437 magnitude.  We avoid this cost by arranging the calculation so that
2438 fabs(csum) is always as large as fabs(x).
2439 
2440 To establish the invariant, *csum* is initialized to 1.0 which is
2441 always larger than x**2 after scaling or after division by *max*.
2442 After the loop is finished, the initial 1.0 is subtracted out for a
2443 net zero effect on the final sum.  Since *csum* will be greater than
2444 1.0, the subtraction of 1.0 will not cause fractional digits to be
2445 dropped from *csum*.
2446 
2447 To get the full benefit from compensated summation, the largest
2448 addend should be in the range: 0.5 <= |x| <= 1.0.  Accordingly,
2449 scaling or division by *max* should not be skipped even if not
2450 otherwise needed to prevent overflow or loss of precision.
2451 
2452 The assertion that hi*hi <= 1.0 is a bit subtle.  Each vector element
2453 gets scaled to a magnitude below 1.0.  The Veltkamp-Dekker splitting
2454 algorithm gives a *hi* value that is correctly rounded to half
2455 precision.  When a value at or below 1.0 is correctly rounded, it
2456 never goes above 1.0.  And when values at or below 1.0 are squared,
2457 they remain at or below 1.0, thus preserving the summation invariant.
2458 
2459 Another interesting assertion is that csum+lo*lo == csum. In the loop,
2460 each scaled vector element has a magnitude less than 1.0.  After the
2461 Veltkamp split, *lo* has a maximum value of 2**-27.  So the maximum
2462 value of *lo* squared is 2**-54.  The value of ulp(1.0)/2.0 is 2**-53.
2463 Given that csum >= 1.0, we have:
2464     lo**2 <= 2**-54 < 2**-53 == 1/2*ulp(1.0) <= ulp(csum)/2
2465 Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
2466 
2467 To minimize loss of information during the accumulation of fractional
2468 values, each term has a separate accumulator.  This also breaks up
2469 sequential dependencies in the inner loop so the CPU can maximize
2470 floating point throughput. [4]  On a 2.6 GHz Haswell, adding one
2471 dimension has an incremental cost of only 5ns -- for example when
2472 moving from hypot(x,y) to hypot(x,y,z).
2473 
2474 The square root differential correction is needed because a
2475 correctly rounded square root of a correctly rounded sum of
2476 squares can still be off by as much as one ulp.
2477 
2478 The differential correction starts with a value *x* that is
2479 the difference between the square of *h*, the possibly inaccurately
2480 rounded square root, and the accurately computed sum of squares.
2481 The correction is the first order term of the Maclaurin series
2482 expansion of sqrt(h**2 + x) == h + x/(2*h) + O(x**2). [5]
2483 
2484 Essentially, this differential correction is equivalent to one
2485 refinement step in Newton's divide-and-average square root
2486 algorithm, effectively doubling the number of accurate bits.
2487 This technique is used in Dekker's SQRT2 algorithm and again in
2488 Borges' ALGORITHM 4 and 5.
2489 
2490 Without proof for all cases, hypot() cannot claim to be always
2491 correctly rounded.  However for n <= 1000, prior to the final addition
2492 that rounds the overall result, the internal accuracy of "h" together
2493 with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
2494 Also, hypot() was tested against a Decimal implementation with
2495 prec=300.  After 100 million trials, no incorrectly rounded examples
2496 were found.  In addition, perfect commutativity (all permutations are
2497 exactly equal) was verified for 1 billion random inputs with n=5. [7]
2498 
2499 References:
2500 
2501 1. Veltkamp-Dekker splitting: http://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
2502 2. Compensated summation:  http://www.ti3.tu-harburg.de/paper/rump/Ru08b.pdf
2503 3. Square root differential correction:  https://arxiv.org/pdf/1904.09481.pdf
2504 4. Data dependency graph:  https://bugs.python.org/file49439/hypot.png
2505 5. https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
2506 6. Analysis of internal accuracy:  https://bugs.python.org/file49484/best_frac.py
2507 7. Commutativity test:  https://bugs.python.org/file49448/test_hypot_commutativity.py
2508 
2509 */
2510 
2511 static inline double
vector_norm(Py_ssize_t n,double * vec,double max,int found_nan)2512 vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
2513 {
2514     const double T27 = 134217729.0;     /* ldexp(1.0, 27) + 1.0) */
2515     double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
2516     double t, hi, lo, h;
2517     int max_e;
2518     Py_ssize_t i;
2519 
2520     if (Py_IS_INFINITY(max)) {
2521         return max;
2522     }
2523     if (found_nan) {
2524         return Py_NAN;
2525     }
2526     if (max == 0.0 || n <= 1) {
2527         return max;
2528     }
2529     frexp(max, &max_e);
2530     if (max_e >= -1023) {
2531         scale = ldexp(1.0, -max_e);
2532         assert(max * scale >= 0.5);
2533         assert(max * scale < 1.0);
2534         for (i=0 ; i < n ; i++) {
2535             x = vec[i];
2536             assert(Py_IS_FINITE(x) && fabs(x) <= max);
2537 
2538             x *= scale;
2539             assert(fabs(x) < 1.0);
2540 
2541             t = x * T27;
2542             hi = t - (t - x);
2543             lo = x - hi;
2544             assert(hi + lo == x);
2545 
2546             x = hi * hi;
2547             assert(x <= 1.0);
2548             assert(fabs(csum) >= fabs(x));
2549             oldcsum = csum;
2550             csum += x;
2551             frac1 += (oldcsum - csum) + x;
2552 
2553             x = 2.0 * hi * lo;
2554             assert(fabs(csum) >= fabs(x));
2555             oldcsum = csum;
2556             csum += x;
2557             frac2 += (oldcsum - csum) + x;
2558 
2559             assert(csum + lo * lo == csum);
2560             frac3 += lo * lo;
2561         }
2562         h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
2563 
2564         x = h;
2565         t = x * T27;
2566         hi = t - (t - x);
2567         lo = x - hi;
2568         assert (hi + lo == x);
2569 
2570         x = -hi * hi;
2571         assert(fabs(csum) >= fabs(x));
2572         oldcsum = csum;
2573         csum += x;
2574         frac1 += (oldcsum - csum) + x;
2575 
2576         x = -2.0 * hi * lo;
2577         assert(fabs(csum) >= fabs(x));
2578         oldcsum = csum;
2579         csum += x;
2580         frac2 += (oldcsum - csum) + x;
2581 
2582         x = -lo * lo;
2583         assert(fabs(csum) >= fabs(x));
2584         oldcsum = csum;
2585         csum += x;
2586         frac3 += (oldcsum - csum) + x;
2587 
2588         x = csum - 1.0 + (frac1 + frac2 + frac3);
2589         return (h + x / (2.0 * h)) / scale;
2590     }
2591     /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
2592        So instead of multiplying by a scale, we just divide by *max*.
2593     */
2594     for (i=0 ; i < n ; i++) {
2595         x = vec[i];
2596         assert(Py_IS_FINITE(x) && fabs(x) <= max);
2597         x /= max;
2598         x = x*x;
2599         assert(x <= 1.0);
2600         assert(fabs(csum) >= fabs(x));
2601         oldcsum = csum;
2602         csum += x;
2603         frac1 += (oldcsum - csum) + x;
2604     }
2605     return max * sqrt(csum - 1.0 + frac1);
2606 }
2607 
2608 #define NUM_STACK_ELEMS 16
2609 
2610 /*[clinic input]
2611 math.dist
2612 
2613     p: object
2614     q: object
2615     /
2616 
2617 Return the Euclidean distance between two points p and q.
2618 
2619 The points should be specified as sequences (or iterables) of
2620 coordinates.  Both inputs must have the same dimension.
2621 
2622 Roughly equivalent to:
2623     sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
2624 [clinic start generated code]*/
2625 
2626 static PyObject *
math_dist_impl(PyObject * module,PyObject * p,PyObject * q)2627 math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
2628 /*[clinic end generated code: output=56bd9538d06bbcfe input=74e85e1b6092e68e]*/
2629 {
2630     PyObject *item;
2631     double max = 0.0;
2632     double x, px, qx, result;
2633     Py_ssize_t i, m, n;
2634     int found_nan = 0, p_allocated = 0, q_allocated = 0;
2635     double diffs_on_stack[NUM_STACK_ELEMS];
2636     double *diffs = diffs_on_stack;
2637 
2638     if (!PyTuple_Check(p)) {
2639         p = PySequence_Tuple(p);
2640         if (p == NULL) {
2641             return NULL;
2642         }
2643         p_allocated = 1;
2644     }
2645     if (!PyTuple_Check(q)) {
2646         q = PySequence_Tuple(q);
2647         if (q == NULL) {
2648             if (p_allocated) {
2649                 Py_DECREF(p);
2650             }
2651             return NULL;
2652         }
2653         q_allocated = 1;
2654     }
2655 
2656     m = PyTuple_GET_SIZE(p);
2657     n = PyTuple_GET_SIZE(q);
2658     if (m != n) {
2659         PyErr_SetString(PyExc_ValueError,
2660                         "both points must have the same number of dimensions");
2661         return NULL;
2662 
2663     }
2664     if (n > NUM_STACK_ELEMS) {
2665         diffs = (double *) PyObject_Malloc(n * sizeof(double));
2666         if (diffs == NULL) {
2667             return PyErr_NoMemory();
2668         }
2669     }
2670     for (i=0 ; i<n ; i++) {
2671         item = PyTuple_GET_ITEM(p, i);
2672         ASSIGN_DOUBLE(px, item, error_exit);
2673         item = PyTuple_GET_ITEM(q, i);
2674         ASSIGN_DOUBLE(qx, item, error_exit);
2675         x = fabs(px - qx);
2676         diffs[i] = x;
2677         found_nan |= Py_IS_NAN(x);
2678         if (x > max) {
2679             max = x;
2680         }
2681     }
2682     result = vector_norm(n, diffs, max, found_nan);
2683     if (diffs != diffs_on_stack) {
2684         PyObject_Free(diffs);
2685     }
2686     if (p_allocated) {
2687         Py_DECREF(p);
2688     }
2689     if (q_allocated) {
2690         Py_DECREF(q);
2691     }
2692     return PyFloat_FromDouble(result);
2693 
2694   error_exit:
2695     if (diffs != diffs_on_stack) {
2696         PyObject_Free(diffs);
2697     }
2698     if (p_allocated) {
2699         Py_DECREF(p);
2700     }
2701     if (q_allocated) {
2702         Py_DECREF(q);
2703     }
2704     return NULL;
2705 }
2706 
2707 /* AC: cannot convert yet, waiting for *args support */
2708 static PyObject *
math_hypot(PyObject * self,PyObject * const * args,Py_ssize_t nargs)2709 math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
2710 {
2711     Py_ssize_t i;
2712     PyObject *item;
2713     double max = 0.0;
2714     double x, result;
2715     int found_nan = 0;
2716     double coord_on_stack[NUM_STACK_ELEMS];
2717     double *coordinates = coord_on_stack;
2718 
2719     if (nargs > NUM_STACK_ELEMS) {
2720         coordinates = (double *) PyObject_Malloc(nargs * sizeof(double));
2721         if (coordinates == NULL) {
2722             return PyErr_NoMemory();
2723         }
2724     }
2725     for (i = 0; i < nargs; i++) {
2726         item = args[i];
2727         ASSIGN_DOUBLE(x, item, error_exit);
2728         x = fabs(x);
2729         coordinates[i] = x;
2730         found_nan |= Py_IS_NAN(x);
2731         if (x > max) {
2732             max = x;
2733         }
2734     }
2735     result = vector_norm(nargs, coordinates, max, found_nan);
2736     if (coordinates != coord_on_stack) {
2737         PyObject_Free(coordinates);
2738     }
2739     return PyFloat_FromDouble(result);
2740 
2741   error_exit:
2742     if (coordinates != coord_on_stack) {
2743         PyObject_Free(coordinates);
2744     }
2745     return NULL;
2746 }
2747 
2748 #undef NUM_STACK_ELEMS
2749 
2750 PyDoc_STRVAR(math_hypot_doc,
2751              "hypot(*coordinates) -> value\n\n\
2752 Multidimensional Euclidean distance from the origin to a point.\n\
2753 \n\
2754 Roughly equivalent to:\n\
2755     sqrt(sum(x**2 for x in coordinates))\n\
2756 \n\
2757 For a two dimensional point (x, y), gives the hypotenuse\n\
2758 using the Pythagorean theorem:  sqrt(x*x + y*y).\n\
2759 \n\
2760 For example, the hypotenuse of a 3/4/5 right triangle is:\n\
2761 \n\
2762     >>> hypot(3.0, 4.0)\n\
2763     5.0\n\
2764 ");
2765 
2766 /* pow can't use math_2, but needs its own wrapper: the problem is
2767    that an infinite result can arise either as a result of overflow
2768    (in which case OverflowError should be raised) or as a result of
2769    e.g. 0.**-5. (for which ValueError needs to be raised.)
2770 */
2771 
2772 /*[clinic input]
2773 math.pow
2774 
2775     x: double
2776     y: double
2777     /
2778 
2779 Return x**y (x to the power of y).
2780 [clinic start generated code]*/
2781 
2782 static PyObject *
math_pow_impl(PyObject * module,double x,double y)2783 math_pow_impl(PyObject *module, double x, double y)
2784 /*[clinic end generated code: output=fff93e65abccd6b0 input=c26f1f6075088bfd]*/
2785 {
2786     double r;
2787     int odd_y;
2788 
2789     /* deal directly with IEEE specials, to cope with problems on various
2790        platforms whose semantics don't exactly match C99 */
2791     r = 0.; /* silence compiler warning */
2792     if (!Py_IS_FINITE(x) || !Py_IS_FINITE(y)) {
2793         errno = 0;
2794         if (Py_IS_NAN(x))
2795             r = y == 0. ? 1. : x; /* NaN**0 = 1 */
2796         else if (Py_IS_NAN(y))
2797             r = x == 1. ? 1. : y; /* 1**NaN = 1 */
2798         else if (Py_IS_INFINITY(x)) {
2799             odd_y = Py_IS_FINITE(y) && fmod(fabs(y), 2.0) == 1.0;
2800             if (y > 0.)
2801                 r = odd_y ? x : fabs(x);
2802             else if (y == 0.)
2803                 r = 1.;
2804             else /* y < 0. */
2805                 r = odd_y ? copysign(0., x) : 0.;
2806         }
2807         else if (Py_IS_INFINITY(y)) {
2808             if (fabs(x) == 1.0)
2809                 r = 1.;
2810             else if (y > 0. && fabs(x) > 1.0)
2811                 r = y;
2812             else if (y < 0. && fabs(x) < 1.0) {
2813                 r = -y; /* result is +inf */
2814                 if (x == 0.) /* 0**-inf: divide-by-zero */
2815                     errno = EDOM;
2816             }
2817             else
2818                 r = 0.;
2819         }
2820     }
2821     else {
2822         /* let libm handle finite**finite */
2823         errno = 0;
2824         r = pow(x, y);
2825         /* a NaN result should arise only from (-ve)**(finite
2826            non-integer); in this case we want to raise ValueError. */
2827         if (!Py_IS_FINITE(r)) {
2828             if (Py_IS_NAN(r)) {
2829                 errno = EDOM;
2830             }
2831             /*
2832                an infinite result here arises either from:
2833                (A) (+/-0.)**negative (-> divide-by-zero)
2834                (B) overflow of x**y with x and y finite
2835             */
2836             else if (Py_IS_INFINITY(r)) {
2837                 if (x == 0.)
2838                     errno = EDOM;
2839                 else
2840                     errno = ERANGE;
2841             }
2842         }
2843     }
2844 
2845     if (errno && is_error(r))
2846         return NULL;
2847     else
2848         return PyFloat_FromDouble(r);
2849 }
2850 
2851 
2852 static const double degToRad = Py_MATH_PI / 180.0;
2853 static const double radToDeg = 180.0 / Py_MATH_PI;
2854 
2855 /*[clinic input]
2856 math.degrees
2857 
2858     x: double
2859     /
2860 
2861 Convert angle x from radians to degrees.
2862 [clinic start generated code]*/
2863 
2864 static PyObject *
math_degrees_impl(PyObject * module,double x)2865 math_degrees_impl(PyObject *module, double x)
2866 /*[clinic end generated code: output=7fea78b294acd12f input=81e016555d6e3660]*/
2867 {
2868     return PyFloat_FromDouble(x * radToDeg);
2869 }
2870 
2871 
2872 /*[clinic input]
2873 math.radians
2874 
2875     x: double
2876     /
2877 
2878 Convert angle x from degrees to radians.
2879 [clinic start generated code]*/
2880 
2881 static PyObject *
math_radians_impl(PyObject * module,double x)2882 math_radians_impl(PyObject *module, double x)
2883 /*[clinic end generated code: output=34daa47caf9b1590 input=91626fc489fe3d63]*/
2884 {
2885     return PyFloat_FromDouble(x * degToRad);
2886 }
2887 
2888 
2889 /*[clinic input]
2890 math.isfinite
2891 
2892     x: double
2893     /
2894 
2895 Return True if x is neither an infinity nor a NaN, and False otherwise.
2896 [clinic start generated code]*/
2897 
2898 static PyObject *
math_isfinite_impl(PyObject * module,double x)2899 math_isfinite_impl(PyObject *module, double x)
2900 /*[clinic end generated code: output=8ba1f396440c9901 input=46967d254812e54a]*/
2901 {
2902     return PyBool_FromLong((long)Py_IS_FINITE(x));
2903 }
2904 
2905 
2906 /*[clinic input]
2907 math.isnan
2908 
2909     x: double
2910     /
2911 
2912 Return True if x is a NaN (not a number), and False otherwise.
2913 [clinic start generated code]*/
2914 
2915 static PyObject *
math_isnan_impl(PyObject * module,double x)2916 math_isnan_impl(PyObject *module, double x)
2917 /*[clinic end generated code: output=f537b4d6df878c3e input=935891e66083f46a]*/
2918 {
2919     return PyBool_FromLong((long)Py_IS_NAN(x));
2920 }
2921 
2922 
2923 /*[clinic input]
2924 math.isinf
2925 
2926     x: double
2927     /
2928 
2929 Return True if x is a positive or negative infinity, and False otherwise.
2930 [clinic start generated code]*/
2931 
2932 static PyObject *
math_isinf_impl(PyObject * module,double x)2933 math_isinf_impl(PyObject *module, double x)
2934 /*[clinic end generated code: output=9f00cbec4de7b06b input=32630e4212cf961f]*/
2935 {
2936     return PyBool_FromLong((long)Py_IS_INFINITY(x));
2937 }
2938 
2939 
2940 /*[clinic input]
2941 math.isclose -> bool
2942 
2943     a: double
2944     b: double
2945     *
2946     rel_tol: double = 1e-09
2947         maximum difference for being considered "close", relative to the
2948         magnitude of the input values
2949     abs_tol: double = 0.0
2950         maximum difference for being considered "close", regardless of the
2951         magnitude of the input values
2952 
2953 Determine whether two floating point numbers are close in value.
2954 
2955 Return True if a is close in value to b, and False otherwise.
2956 
2957 For the values to be considered close, the difference between them
2958 must be smaller than at least one of the tolerances.
2959 
2960 -inf, inf and NaN behave similarly to the IEEE 754 Standard.  That
2961 is, NaN is not close to anything, even itself.  inf and -inf are
2962 only close to themselves.
2963 [clinic start generated code]*/
2964 
2965 static int
math_isclose_impl(PyObject * module,double a,double b,double rel_tol,double abs_tol)2966 math_isclose_impl(PyObject *module, double a, double b, double rel_tol,
2967                   double abs_tol)
2968 /*[clinic end generated code: output=b73070207511952d input=f28671871ea5bfba]*/
2969 {
2970     double diff = 0.0;
2971 
2972     /* sanity check on the inputs */
2973     if (rel_tol < 0.0 || abs_tol < 0.0 ) {
2974         PyErr_SetString(PyExc_ValueError,
2975                         "tolerances must be non-negative");
2976         return -1;
2977     }
2978 
2979     if ( a == b ) {
2980         /* short circuit exact equality -- needed to catch two infinities of
2981            the same sign. And perhaps speeds things up a bit sometimes.
2982         */
2983         return 1;
2984     }
2985 
2986     /* This catches the case of two infinities of opposite sign, or
2987        one infinity and one finite number. Two infinities of opposite
2988        sign would otherwise have an infinite relative tolerance.
2989        Two infinities of the same sign are caught by the equality check
2990        above.
2991     */
2992 
2993     if (Py_IS_INFINITY(a) || Py_IS_INFINITY(b)) {
2994         return 0;
2995     }
2996 
2997     /* now do the regular computation
2998        this is essentially the "weak" test from the Boost library
2999     */
3000 
3001     diff = fabs(b - a);
3002 
3003     return (((diff <= fabs(rel_tol * b)) ||
3004              (diff <= fabs(rel_tol * a))) ||
3005             (diff <= abs_tol));
3006 }
3007 
3008 static inline int
_check_long_mult_overflow(long a,long b)3009 _check_long_mult_overflow(long a, long b) {
3010 
3011     /* From Python2's int_mul code:
3012 
3013     Integer overflow checking for * is painful:  Python tried a couple ways, but
3014     they didn't work on all platforms, or failed in endcases (a product of
3015     -sys.maxint-1 has been a particular pain).
3016 
3017     Here's another way:
3018 
3019     The native long product x*y is either exactly right or *way* off, being
3020     just the last n bits of the true product, where n is the number of bits
3021     in a long (the delivered product is the true product plus i*2**n for
3022     some integer i).
3023 
3024     The native double product (double)x * (double)y is subject to three
3025     rounding errors:  on a sizeof(long)==8 box, each cast to double can lose
3026     info, and even on a sizeof(long)==4 box, the multiplication can lose info.
3027     But, unlike the native long product, it's not in *range* trouble:  even
3028     if sizeof(long)==32 (256-bit longs), the product easily fits in the
3029     dynamic range of a double.  So the leading 50 (or so) bits of the double
3030     product are correct.
3031 
3032     We check these two ways against each other, and declare victory if they're
3033     approximately the same.  Else, because the native long product is the only
3034     one that can lose catastrophic amounts of information, it's the native long
3035     product that must have overflowed.
3036 
3037     */
3038 
3039     long longprod = (long)((unsigned long)a * b);
3040     double doubleprod = (double)a * (double)b;
3041     double doubled_longprod = (double)longprod;
3042 
3043     if (doubled_longprod == doubleprod) {
3044         return 0;
3045     }
3046 
3047     const double diff = doubled_longprod - doubleprod;
3048     const double absdiff = diff >= 0.0 ? diff : -diff;
3049     const double absprod = doubleprod >= 0.0 ? doubleprod : -doubleprod;
3050 
3051     if (32.0 * absdiff <= absprod) {
3052         return 0;
3053     }
3054 
3055     return 1;
3056 }
3057 
3058 /*[clinic input]
3059 math.prod
3060 
3061     iterable: object
3062     /
3063     *
3064     start: object(c_default="NULL") = 1
3065 
3066 Calculate the product of all the elements in the input iterable.
3067 
3068 The default start value for the product is 1.
3069 
3070 When the iterable is empty, return the start value.  This function is
3071 intended specifically for use with numeric values and may reject
3072 non-numeric types.
3073 [clinic start generated code]*/
3074 
3075 static PyObject *
math_prod_impl(PyObject * module,PyObject * iterable,PyObject * start)3076 math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
3077 /*[clinic end generated code: output=36153bedac74a198 input=4c5ab0682782ed54]*/
3078 {
3079     PyObject *result = start;
3080     PyObject *temp, *item, *iter;
3081 
3082     iter = PyObject_GetIter(iterable);
3083     if (iter == NULL) {
3084         return NULL;
3085     }
3086 
3087     if (result == NULL) {
3088         result = _PyLong_GetOne();
3089     }
3090     Py_INCREF(result);
3091 #ifndef SLOW_PROD
3092     /* Fast paths for integers keeping temporary products in C.
3093      * Assumes all inputs are the same type.
3094      * If the assumption fails, default to use PyObjects instead.
3095     */
3096     if (PyLong_CheckExact(result)) {
3097         int overflow;
3098         long i_result = PyLong_AsLongAndOverflow(result, &overflow);
3099         /* If this already overflowed, don't even enter the loop. */
3100         if (overflow == 0) {
3101             Py_DECREF(result);
3102             result = NULL;
3103         }
3104         /* Loop over all the items in the iterable until we finish, we overflow
3105          * or we found a non integer element */
3106         while (result == NULL) {
3107             item = PyIter_Next(iter);
3108             if (item == NULL) {
3109                 Py_DECREF(iter);
3110                 if (PyErr_Occurred()) {
3111                     return NULL;
3112                 }
3113                 return PyLong_FromLong(i_result);
3114             }
3115             if (PyLong_CheckExact(item)) {
3116                 long b = PyLong_AsLongAndOverflow(item, &overflow);
3117                 if (overflow == 0 && !_check_long_mult_overflow(i_result, b)) {
3118                     long x = i_result * b;
3119                     i_result = x;
3120                     Py_DECREF(item);
3121                     continue;
3122                 }
3123             }
3124             /* Either overflowed or is not an int.
3125              * Restore real objects and process normally */
3126             result = PyLong_FromLong(i_result);
3127             if (result == NULL) {
3128                 Py_DECREF(item);
3129                 Py_DECREF(iter);
3130                 return NULL;
3131             }
3132             temp = PyNumber_Multiply(result, item);
3133             Py_DECREF(result);
3134             Py_DECREF(item);
3135             result = temp;
3136             if (result == NULL) {
3137                 Py_DECREF(iter);
3138                 return NULL;
3139             }
3140         }
3141     }
3142 
3143     /* Fast paths for floats keeping temporary products in C.
3144      * Assumes all inputs are the same type.
3145      * If the assumption fails, default to use PyObjects instead.
3146     */
3147     if (PyFloat_CheckExact(result)) {
3148         double f_result = PyFloat_AS_DOUBLE(result);
3149         Py_DECREF(result);
3150         result = NULL;
3151         while(result == NULL) {
3152             item = PyIter_Next(iter);
3153             if (item == NULL) {
3154                 Py_DECREF(iter);
3155                 if (PyErr_Occurred()) {
3156                     return NULL;
3157                 }
3158                 return PyFloat_FromDouble(f_result);
3159             }
3160             if (PyFloat_CheckExact(item)) {
3161                 f_result *= PyFloat_AS_DOUBLE(item);
3162                 Py_DECREF(item);
3163                 continue;
3164             }
3165             if (PyLong_CheckExact(item)) {
3166                 long value;
3167                 int overflow;
3168                 value = PyLong_AsLongAndOverflow(item, &overflow);
3169                 if (!overflow) {
3170                     f_result *= (double)value;
3171                     Py_DECREF(item);
3172                     continue;
3173                 }
3174             }
3175             result = PyFloat_FromDouble(f_result);
3176             if (result == NULL) {
3177                 Py_DECREF(item);
3178                 Py_DECREF(iter);
3179                 return NULL;
3180             }
3181             temp = PyNumber_Multiply(result, item);
3182             Py_DECREF(result);
3183             Py_DECREF(item);
3184             result = temp;
3185             if (result == NULL) {
3186                 Py_DECREF(iter);
3187                 return NULL;
3188             }
3189         }
3190     }
3191 #endif
3192     /* Consume rest of the iterable (if any) that could not be handled
3193      * by specialized functions above.*/
3194     for(;;) {
3195         item = PyIter_Next(iter);
3196         if (item == NULL) {
3197             /* error, or end-of-sequence */
3198             if (PyErr_Occurred()) {
3199                 Py_DECREF(result);
3200                 result = NULL;
3201             }
3202             break;
3203         }
3204         temp = PyNumber_Multiply(result, item);
3205         Py_DECREF(result);
3206         Py_DECREF(item);
3207         result = temp;
3208         if (result == NULL)
3209             break;
3210     }
3211     Py_DECREF(iter);
3212     return result;
3213 }
3214 
3215 
3216 /*[clinic input]
3217 math.perm
3218 
3219     n: object
3220     k: object = None
3221     /
3222 
3223 Number of ways to choose k items from n items without repetition and with order.
3224 
3225 Evaluates to n! / (n - k)! when k <= n and evaluates
3226 to zero when k > n.
3227 
3228 If k is not specified or is None, then k defaults to n
3229 and the function returns n!.
3230 
3231 Raises TypeError if either of the arguments are not integers.
3232 Raises ValueError if either of the arguments are negative.
3233 [clinic start generated code]*/
3234 
3235 static PyObject *
math_perm_impl(PyObject * module,PyObject * n,PyObject * k)3236 math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
3237 /*[clinic end generated code: output=e021a25469653e23 input=5311c5a00f359b53]*/
3238 {
3239     PyObject *result = NULL, *factor = NULL;
3240     int overflow, cmp;
3241     long long i, factors;
3242 
3243     if (k == Py_None) {
3244         return math_factorial(module, n);
3245     }
3246     n = PyNumber_Index(n);
3247     if (n == NULL) {
3248         return NULL;
3249     }
3250     k = PyNumber_Index(k);
3251     if (k == NULL) {
3252         Py_DECREF(n);
3253         return NULL;
3254     }
3255 
3256     if (Py_SIZE(n) < 0) {
3257         PyErr_SetString(PyExc_ValueError,
3258                         "n must be a non-negative integer");
3259         goto error;
3260     }
3261     if (Py_SIZE(k) < 0) {
3262         PyErr_SetString(PyExc_ValueError,
3263                         "k must be a non-negative integer");
3264         goto error;
3265     }
3266 
3267     cmp = PyObject_RichCompareBool(n, k, Py_LT);
3268     if (cmp != 0) {
3269         if (cmp > 0) {
3270             result = PyLong_FromLong(0);
3271             goto done;
3272         }
3273         goto error;
3274     }
3275 
3276     factors = PyLong_AsLongLongAndOverflow(k, &overflow);
3277     if (overflow > 0) {
3278         PyErr_Format(PyExc_OverflowError,
3279                      "k must not exceed %lld",
3280                      LLONG_MAX);
3281         goto error;
3282     }
3283     else if (factors == -1) {
3284         /* k is nonnegative, so a return value of -1 can only indicate error */
3285         goto error;
3286     }
3287 
3288     if (factors == 0) {
3289         result = PyLong_FromLong(1);
3290         goto done;
3291     }
3292 
3293     result = n;
3294     Py_INCREF(result);
3295     if (factors == 1) {
3296         goto done;
3297     }
3298 
3299     factor = Py_NewRef(n);
3300     PyObject *one = _PyLong_GetOne();  // borrowed ref
3301     for (i = 1; i < factors; ++i) {
3302         Py_SETREF(factor, PyNumber_Subtract(factor, one));
3303         if (factor == NULL) {
3304             goto error;
3305         }
3306         Py_SETREF(result, PyNumber_Multiply(result, factor));
3307         if (result == NULL) {
3308             goto error;
3309         }
3310     }
3311     Py_DECREF(factor);
3312 
3313 done:
3314     Py_DECREF(n);
3315     Py_DECREF(k);
3316     return result;
3317 
3318 error:
3319     Py_XDECREF(factor);
3320     Py_XDECREF(result);
3321     Py_DECREF(n);
3322     Py_DECREF(k);
3323     return NULL;
3324 }
3325 
3326 
3327 /*[clinic input]
3328 math.comb
3329 
3330     n: object
3331     k: object
3332     /
3333 
3334 Number of ways to choose k items from n items without repetition and without order.
3335 
3336 Evaluates to n! / (k! * (n - k)!) when k <= n and evaluates
3337 to zero when k > n.
3338 
3339 Also called the binomial coefficient because it is equivalent
3340 to the coefficient of k-th term in polynomial expansion of the
3341 expression (1 + x)**n.
3342 
3343 Raises TypeError if either of the arguments are not integers.
3344 Raises ValueError if either of the arguments are negative.
3345 
3346 [clinic start generated code]*/
3347 
3348 static PyObject *
math_comb_impl(PyObject * module,PyObject * n,PyObject * k)3349 math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
3350 /*[clinic end generated code: output=bd2cec8d854f3493 input=9a05315af2518709]*/
3351 {
3352     PyObject *result = NULL, *factor = NULL, *temp;
3353     int overflow, cmp;
3354     long long i, factors;
3355 
3356     n = PyNumber_Index(n);
3357     if (n == NULL) {
3358         return NULL;
3359     }
3360     k = PyNumber_Index(k);
3361     if (k == NULL) {
3362         Py_DECREF(n);
3363         return NULL;
3364     }
3365 
3366     if (Py_SIZE(n) < 0) {
3367         PyErr_SetString(PyExc_ValueError,
3368                         "n must be a non-negative integer");
3369         goto error;
3370     }
3371     if (Py_SIZE(k) < 0) {
3372         PyErr_SetString(PyExc_ValueError,
3373                         "k must be a non-negative integer");
3374         goto error;
3375     }
3376 
3377     /* k = min(k, n - k) */
3378     temp = PyNumber_Subtract(n, k);
3379     if (temp == NULL) {
3380         goto error;
3381     }
3382     if (Py_SIZE(temp) < 0) {
3383         Py_DECREF(temp);
3384         result = PyLong_FromLong(0);
3385         goto done;
3386     }
3387     cmp = PyObject_RichCompareBool(temp, k, Py_LT);
3388     if (cmp > 0) {
3389         Py_SETREF(k, temp);
3390     }
3391     else {
3392         Py_DECREF(temp);
3393         if (cmp < 0) {
3394             goto error;
3395         }
3396     }
3397 
3398     factors = PyLong_AsLongLongAndOverflow(k, &overflow);
3399     if (overflow > 0) {
3400         PyErr_Format(PyExc_OverflowError,
3401                      "min(n - k, k) must not exceed %lld",
3402                      LLONG_MAX);
3403         goto error;
3404     }
3405     if (factors == -1) {
3406         /* k is nonnegative, so a return value of -1 can only indicate error */
3407         goto error;
3408     }
3409 
3410     if (factors == 0) {
3411         result = PyLong_FromLong(1);
3412         goto done;
3413     }
3414 
3415     result = n;
3416     Py_INCREF(result);
3417     if (factors == 1) {
3418         goto done;
3419     }
3420 
3421     factor = Py_NewRef(n);
3422     PyObject *one = _PyLong_GetOne();  // borrowed ref
3423     for (i = 1; i < factors; ++i) {
3424         Py_SETREF(factor, PyNumber_Subtract(factor, one));
3425         if (factor == NULL) {
3426             goto error;
3427         }
3428         Py_SETREF(result, PyNumber_Multiply(result, factor));
3429         if (result == NULL) {
3430             goto error;
3431         }
3432 
3433         temp = PyLong_FromUnsignedLongLong((unsigned long long)i + 1);
3434         if (temp == NULL) {
3435             goto error;
3436         }
3437         Py_SETREF(result, PyNumber_FloorDivide(result, temp));
3438         Py_DECREF(temp);
3439         if (result == NULL) {
3440             goto error;
3441         }
3442     }
3443     Py_DECREF(factor);
3444 
3445 done:
3446     Py_DECREF(n);
3447     Py_DECREF(k);
3448     return result;
3449 
3450 error:
3451     Py_XDECREF(factor);
3452     Py_XDECREF(result);
3453     Py_DECREF(n);
3454     Py_DECREF(k);
3455     return NULL;
3456 }
3457 
3458 
3459 /*[clinic input]
3460 math.nextafter
3461 
3462     x: double
3463     y: double
3464     /
3465 
3466 Return the next floating-point value after x towards y.
3467 [clinic start generated code]*/
3468 
3469 static PyObject *
math_nextafter_impl(PyObject * module,double x,double y)3470 math_nextafter_impl(PyObject *module, double x, double y)
3471 /*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
3472 {
3473 #if defined(_AIX)
3474     if (x == y) {
3475         /* On AIX 7.1, libm nextafter(-0.0, +0.0) returns -0.0.
3476            Bug fixed in bos.adt.libm 7.2.2.0 by APAR IV95512. */
3477         return PyFloat_FromDouble(y);
3478     }
3479     if (Py_IS_NAN(x)) {
3480         return PyFloat_FromDouble(x);
3481     }
3482     if (Py_IS_NAN(y)) {
3483         return PyFloat_FromDouble(y);
3484     }
3485 #endif
3486     return PyFloat_FromDouble(nextafter(x, y));
3487 }
3488 
3489 
3490 /*[clinic input]
3491 math.ulp -> double
3492 
3493     x: double
3494     /
3495 
3496 Return the value of the least significant bit of the float x.
3497 [clinic start generated code]*/
3498 
3499 static double
math_ulp_impl(PyObject * module,double x)3500 math_ulp_impl(PyObject *module, double x)
3501 /*[clinic end generated code: output=f5207867a9384dd4 input=31f9bfbbe373fcaa]*/
3502 {
3503     if (Py_IS_NAN(x)) {
3504         return x;
3505     }
3506     x = fabs(x);
3507     if (Py_IS_INFINITY(x)) {
3508         return x;
3509     }
3510     double inf = m_inf();
3511     double x2 = nextafter(x, inf);
3512     if (Py_IS_INFINITY(x2)) {
3513         /* special case: x is the largest positive representable float */
3514         x2 = nextafter(x, -inf);
3515         return x - x2;
3516     }
3517     return x2 - x;
3518 }
3519 
3520 static int
math_exec(PyObject * module)3521 math_exec(PyObject *module)
3522 {
3523     if (PyModule_AddObject(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
3524         return -1;
3525     }
3526     if (PyModule_AddObject(module, "e", PyFloat_FromDouble(Py_MATH_E)) < 0) {
3527         return -1;
3528     }
3529     // 2pi
3530     if (PyModule_AddObject(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
3531         return -1;
3532     }
3533     if (PyModule_AddObject(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
3534         return -1;
3535     }
3536 #if !defined(PY_NO_SHORT_FLOAT_REPR) || defined(Py_NAN)
3537     if (PyModule_AddObject(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
3538         return -1;
3539     }
3540 #endif
3541     return 0;
3542 }
3543 
3544 static PyMethodDef math_methods[] = {
3545     {"acos",            math_acos,      METH_O,         math_acos_doc},
3546     {"acosh",           math_acosh,     METH_O,         math_acosh_doc},
3547     {"asin",            math_asin,      METH_O,         math_asin_doc},
3548     {"asinh",           math_asinh,     METH_O,         math_asinh_doc},
3549     {"atan",            math_atan,      METH_O,         math_atan_doc},
3550     {"atan2",           (PyCFunction)(void(*)(void))math_atan2,     METH_FASTCALL,  math_atan2_doc},
3551     {"atanh",           math_atanh,     METH_O,         math_atanh_doc},
3552     MATH_CEIL_METHODDEF
3553     {"copysign",        (PyCFunction)(void(*)(void))math_copysign,  METH_FASTCALL,  math_copysign_doc},
3554     {"cos",             math_cos,       METH_O,         math_cos_doc},
3555     {"cosh",            math_cosh,      METH_O,         math_cosh_doc},
3556     MATH_DEGREES_METHODDEF
3557     MATH_DIST_METHODDEF
3558     {"erf",             math_erf,       METH_O,         math_erf_doc},
3559     {"erfc",            math_erfc,      METH_O,         math_erfc_doc},
3560     {"exp",             math_exp,       METH_O,         math_exp_doc},
3561     {"expm1",           math_expm1,     METH_O,         math_expm1_doc},
3562     {"fabs",            math_fabs,      METH_O,         math_fabs_doc},
3563     MATH_FACTORIAL_METHODDEF
3564     MATH_FLOOR_METHODDEF
3565     MATH_FMOD_METHODDEF
3566     MATH_FREXP_METHODDEF
3567     MATH_FSUM_METHODDEF
3568     {"gamma",           math_gamma,     METH_O,         math_gamma_doc},
3569     {"gcd",             (PyCFunction)(void(*)(void))math_gcd,       METH_FASTCALL,  math_gcd_doc},
3570     {"hypot",           (PyCFunction)(void(*)(void))math_hypot,     METH_FASTCALL,  math_hypot_doc},
3571     MATH_ISCLOSE_METHODDEF
3572     MATH_ISFINITE_METHODDEF
3573     MATH_ISINF_METHODDEF
3574     MATH_ISNAN_METHODDEF
3575     MATH_ISQRT_METHODDEF
3576     {"lcm",             (PyCFunction)(void(*)(void))math_lcm,       METH_FASTCALL,  math_lcm_doc},
3577     MATH_LDEXP_METHODDEF
3578     {"lgamma",          math_lgamma,    METH_O,         math_lgamma_doc},
3579     MATH_LOG_METHODDEF
3580     {"log1p",           math_log1p,     METH_O,         math_log1p_doc},
3581     MATH_LOG10_METHODDEF
3582     MATH_LOG2_METHODDEF
3583     MATH_MODF_METHODDEF
3584     MATH_POW_METHODDEF
3585     MATH_RADIANS_METHODDEF
3586     {"remainder",       (PyCFunction)(void(*)(void))math_remainder, METH_FASTCALL,  math_remainder_doc},
3587     {"sin",             math_sin,       METH_O,         math_sin_doc},
3588     {"sinh",            math_sinh,      METH_O,         math_sinh_doc},
3589     {"sqrt",            math_sqrt,      METH_O,         math_sqrt_doc},
3590     {"tan",             math_tan,       METH_O,         math_tan_doc},
3591     {"tanh",            math_tanh,      METH_O,         math_tanh_doc},
3592     MATH_TRUNC_METHODDEF
3593     MATH_PROD_METHODDEF
3594     MATH_PERM_METHODDEF
3595     MATH_COMB_METHODDEF
3596     MATH_NEXTAFTER_METHODDEF
3597     MATH_ULP_METHODDEF
3598     {NULL,              NULL}           /* sentinel */
3599 };
3600 
3601 static PyModuleDef_Slot math_slots[] = {
3602     {Py_mod_exec, math_exec},
3603     {0, NULL}
3604 };
3605 
3606 PyDoc_STRVAR(module_doc,
3607 "This module provides access to the mathematical functions\n"
3608 "defined by the C standard.");
3609 
3610 static struct PyModuleDef mathmodule = {
3611     PyModuleDef_HEAD_INIT,
3612     .m_name = "math",
3613     .m_doc = module_doc,
3614     .m_size = 0,
3615     .m_methods = math_methods,
3616     .m_slots = math_slots,
3617 };
3618 
3619 PyMODINIT_FUNC
PyInit_math(void)3620 PyInit_math(void)
3621 {
3622     return PyModuleDef_Init(&mathmodule);
3623 }
3624